forked from mindspore-Ecosystem/mindspore
!15176 [MS][LITE]Change lite_kernel.h interface of InnerContext
From: @gongdaguo Reviewed-by: @jpc_chenjianping,@zhanghaibo5 Signed-off-by: @jpc_chenjianping
This commit is contained in:
commit
c71ae4e831
|
@ -841,7 +841,7 @@ table Rsqrt {
|
|||
}
|
||||
|
||||
table QuantDTypeCast {
|
||||
src_t: long; // deprecated
|
||||
src_t: long;
|
||||
dst_t: long;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "src/tensor.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/context.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
enum KERNEL_ARCH {
|
||||
|
@ -64,7 +65,7 @@ class LiteKernel {
|
|||
public:
|
||||
LiteKernel() = default;
|
||||
LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
|
||||
const lite::InnerContext *ctx)
|
||||
const lite::Context *ctx)
|
||||
: op_parameter_(parameter),
|
||||
in_tensors_(std::move(in_tensors)),
|
||||
out_tensors_(std::move(out_tensors)),
|
||||
|
@ -175,7 +176,7 @@ class LiteKernel {
|
|||
|
||||
SubGraphType subgraph_type() const { return this->subgraph_type_; }
|
||||
|
||||
const lite::InnerContext *context() const { return this->context_; }
|
||||
const lite::Context *context() const { return this->context_; }
|
||||
|
||||
virtual std::string ToString() const;
|
||||
|
||||
|
@ -202,7 +203,7 @@ class LiteKernel {
|
|||
// tensor will free in ~lite_session()
|
||||
std::vector<lite::Tensor *> in_tensors_;
|
||||
std::vector<lite::Tensor *> out_tensors_;
|
||||
const lite::InnerContext *context_ = nullptr;
|
||||
const lite::Context *context_ = nullptr;
|
||||
std::vector<LiteKernel *> in_kernels_;
|
||||
std::vector<LiteKernel *> out_kernels_;
|
||||
bool train_mode_ = false;
|
||||
|
@ -217,13 +218,13 @@ class LiteKernel {
|
|||
|
||||
typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::InnerContext *ctx, const KernelKey &desc);
|
||||
const lite::Context *ctx, const KernelKey &desc);
|
||||
|
||||
template <class T>
|
||||
kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||
auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, ctx);
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr.";
|
||||
free(parameter);
|
||||
|
|
|
@ -206,7 +206,8 @@ int SubGraphNpuKernel::Init() {
|
|||
|
||||
MS_ASSERT(npu_manager_ != nullptr);
|
||||
|
||||
npu_manager_->AddModel(model_buffer_data, GetOMModelName(), context_->GetNpuInfo().frequency_);
|
||||
npu_manager_->AddModel(model_buffer_data, GetOMModelName(),
|
||||
static_cast<const lite::InnerContext *>(context_)->GetNpuInfo().frequency_);
|
||||
|
||||
executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName(), npu_manager_);
|
||||
|
||||
|
|
|
@ -73,7 +73,8 @@ int ConstantOfShapeCPUKernel::Run() {
|
|||
int thread_count = MSMIN(op_parameter_->thread_num_, param_->element_size_);
|
||||
thread_stride_ = UP_DIV(param_->element_size_, thread_count);
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ConstantOfShapeRun, this, thread_count);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConstantOfShapeRun,
|
||||
this, thread_count);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstantOfShapeRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -144,17 +144,7 @@ void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() {
|
|||
}
|
||||
}
|
||||
|
||||
int DetectionPostProcessBaseCPUKernel::Run() {
|
||||
MS_ASSERT(context_->allocator != nullptr);
|
||||
int status = GetInputData();
|
||||
if (status != RET_OK) {
|
||||
return status;
|
||||
}
|
||||
auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
|
||||
auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data_c());
|
||||
auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data_c());
|
||||
auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data_c());
|
||||
|
||||
int DetectionPostProcessBaseCPUKernel::ParamInit() {
|
||||
num_boxes_ = in_tensors_.at(0)->shape().at(1);
|
||||
num_classes_with_bg_ = in_tensors_.at(1)->shape().at(2);
|
||||
params_->decoded_boxes_ = context_->allocator->Malloc(num_boxes_ * 4 * sizeof(float));
|
||||
|
@ -221,6 +211,24 @@ int DetectionPostProcessBaseCPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int DetectionPostProcessBaseCPUKernel::Run() {
|
||||
MS_ASSERT(context_->allocator != nullptr);
|
||||
int status = GetInputData();
|
||||
if (status != RET_OK) {
|
||||
return status;
|
||||
}
|
||||
auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
|
||||
auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->data_c());
|
||||
auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->data_c());
|
||||
auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->data_c());
|
||||
|
||||
if (ParamInit() != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParamInit error";
|
||||
return status;
|
||||
}
|
||||
|
||||
status = DecodeBoxes(num_boxes_, input_boxes_, params_->anchors_, params_);
|
||||
if (status != RET_OK) {
|
||||
|
@ -238,7 +246,8 @@ int DetectionPostProcessBaseCPUKernel::Run() {
|
|||
return status;
|
||||
}
|
||||
} else {
|
||||
status = ParallelLaunch(this->context_->thread_pool_, NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
|
||||
status = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
NmsMultiClassesFastCoreRun, this, op_parameter_->thread_num_);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "NmsMultiClassesFastCoreRun error error_code[" << status << "]";
|
||||
FreeAllocatedBuffer();
|
||||
|
|
|
@ -47,6 +47,7 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel {
|
|||
|
||||
protected:
|
||||
virtual int GetInputData() = 0;
|
||||
int ParamInit();
|
||||
|
||||
private:
|
||||
void FreeAllocatedBuffer();
|
||||
|
|
|
@ -166,7 +166,8 @@ int RunPriorBox(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PriorBoxCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, RunPriorBox, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, RunPriorBox,
|
||||
this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "PriorBox run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -172,7 +172,8 @@ int QuantDTypeCastCPUKernel::Run() {
|
|||
uint8_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_[0]->data_c());
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastRun, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, QuantDTypeCastRun,
|
||||
this, thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
if (in_tensors_[0]->data_type() == TypeId::kNumberTypeInt8 &&
|
||||
|
|
|
@ -66,7 +66,8 @@ int ReshapeRun(void *cdata, int task_id) {
|
|||
int ReshapeBaseCPUKernel::Run() {
|
||||
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
|
||||
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ReshapeRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReshapeRun, this,
|
||||
context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -81,7 +81,8 @@ int SliceCPUKernel::Run() {
|
|||
lite::DataTypeSize(in_tensors_.at(0)->data_type()));
|
||||
return RET_OK;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceLaunch, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -120,7 +120,8 @@ int SplitBaseCPUKernel::Run() {
|
|||
output_ptr_.at(i) = output_tensor->data_c();
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SplitRun, this,
|
||||
thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "split error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -100,7 +100,8 @@ int StackBaseCPUKernel::Run() {
|
|||
}
|
||||
// run stack
|
||||
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_);
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, StackRun, this, num_threads_);
|
||||
auto ret =
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StackRun, this, num_threads_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -157,7 +157,8 @@ int StridedSliceCPUKernel::FastRun() {
|
|||
}
|
||||
input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
|
||||
output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, StrideRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StrideRun, this,
|
||||
context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -127,7 +127,8 @@ int TileCPUKernel::SimpleTileImpl(int task_id) {
|
|||
}
|
||||
|
||||
int TileCPUKernel::RunSimpleTile() {
|
||||
auto ret = ParallelLaunch(context_->thread_pool_, SimpleTile, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SimpleTile, this,
|
||||
context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "RunSimpleTile error code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -100,7 +100,8 @@ int ActivationFp16CPUKernel::Run() {
|
|||
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationFp16Run, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ActivationFp16Run, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -160,15 +160,16 @@ int ArithmeticCompareFP16CPUKernel::Run() {
|
|||
is_input0_fp32_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32;
|
||||
is_input1_fp32_ = in_tensors_.at(1)->data_type() == kNumberTypeFloat32;
|
||||
|
||||
input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
|
||||
input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_);
|
||||
input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_));
|
||||
input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_));
|
||||
output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData());
|
||||
if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Memory allocation failed";
|
||||
FreeTmpBuffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRunFp16, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRunFp16,
|
||||
this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticsRunFp16 run error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -127,13 +127,13 @@ void ArithmeticFP16CPUKernel::InitRunFunction(int primitive_type) {
|
|||
int ArithmeticFP16CPUKernel::ConstTensorBroadCast() {
|
||||
int ret;
|
||||
if (in_tensors_[0]->data_c() != nullptr) {
|
||||
ret = ConvertFp32TensorToFp16(in_tensors_[0], context_);
|
||||
ret = ConvertFp32TensorToFp16(in_tensors_[0], static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (in_tensors_[1]->data_c() != nullptr) {
|
||||
ret = ConvertFp32TensorToFp16(in_tensors_[1], context_);
|
||||
ret = ConvertFp32TensorToFp16(in_tensors_[1], static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
@ -167,18 +167,19 @@ int ArithmeticFP16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
if (!input0_broadcast_) {
|
||||
input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
|
||||
input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_));
|
||||
}
|
||||
if (!input1_broadcast_) {
|
||||
input1_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_);
|
||||
input1_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_));
|
||||
}
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
output_ptr_ = MallocOutputFp16(output_tensor, context_);
|
||||
output_ptr_ = MallocOutputFp16(output_tensor, static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (input0_ptr_ == nullptr || input1_ptr_ == nullptr || output_ptr_ == nullptr) {
|
||||
FreeFp16Buffer();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRun, this,
|
||||
context_->thread_num_);
|
||||
if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
|
||||
Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
|
||||
output_tensor->ElementsNum());
|
||||
|
|
|
@ -77,13 +77,14 @@ int ArithmeticSelfFp16CPUKernel::Run() {
|
|||
auto output_tensor = out_tensors_.at(0);
|
||||
|
||||
if (input_tensor->data_type() == kNumberTypeFloat32) {
|
||||
input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, context_);
|
||||
input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->context_));
|
||||
} else {
|
||||
input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
}
|
||||
output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticSelfRun,
|
||||
this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -51,15 +51,16 @@ int BatchnormFp16CPUKernel::InitConstTensor() {
|
|||
int BatchnormFp16CPUKernel::Run() {
|
||||
auto input_tensor = in_tensors_.at(0);
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
input_ = ConvertInputFp32toFp16(input_tensor, context_);
|
||||
output_ = MallocOutputFp16(output_tensor, context_);
|
||||
input_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->context_));
|
||||
output_ = MallocOutputFp16(output_tensor, static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (input_ == nullptr || output_ == nullptr) {
|
||||
FreeInputAndOutput();
|
||||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -132,7 +132,8 @@ int CastFp16CPUKernel::Run() {
|
|||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ParallelLaunch(this->context_->thread_pool_, CastFp16Run, this, op_parameter_->thread_num_);
|
||||
return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CastFp16Run, this,
|
||||
op_parameter_->thread_num_);
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Cast, LiteKernelCreator<CastFp16CPUKernel>)
|
||||
|
|
|
@ -236,14 +236,16 @@ int Convolution1x1FP16CPUKernel::Run() {
|
|||
|
||||
int ret = RET_ERROR;
|
||||
if (multi_thread_by_hw_) {
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
Convolution1x1Fp16RunHw, this, thread_count_);
|
||||
} else {
|
||||
#ifdef ENABLE_ARM64
|
||||
RowMajor2Col16MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
#else
|
||||
RowMajor2Col12MajorFp16Opt(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
#endif
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunOc, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
Convolution1x1Fp16RunOc, this, thread_count_);
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch failed.";
|
||||
|
|
|
@ -95,10 +95,11 @@ static void SetInputOutputShapeInfo(ConvParameter *conv_param, lite::Tensor *inp
|
|||
int ConvolutionDelegateFP16CPUKernel::ReSize() {
|
||||
// Update shape info of input and output
|
||||
kernel::SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(),
|
||||
out_tensors_.front(), context_);
|
||||
out_tensors_.front(), static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (fp16_conv_kernel_ == nullptr) {
|
||||
fp16_conv_kernel_ =
|
||||
CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_, context_, origin_weight_, origin_bias_);
|
||||
CpuConvFp16KernelSelect(in_tensors_, out_tensors_, op_parameter_,
|
||||
static_cast<const lite::InnerContext *>(context_), origin_weight_, origin_bias_);
|
||||
if (fp16_conv_kernel_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Selecting execute kernel failed for conv_kernel, got a nullptr.";
|
||||
return RET_ERROR;
|
||||
|
@ -184,7 +185,7 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
|
|||
/* creator func */
|
||||
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
|
||||
|
||||
|
@ -200,11 +201,12 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &
|
|||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
if (conv_param->group_ == 1) {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs,
|
||||
static_cast<const lite::InnerContext *>(ctx));
|
||||
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
|
||||
kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx);
|
||||
kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx));
|
||||
} else {
|
||||
kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx);
|
||||
kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, static_cast<const lite::InnerContext *>(ctx));
|
||||
}
|
||||
|
||||
if (kernel == nullptr) {
|
||||
|
|
|
@ -104,7 +104,8 @@ static int ConvDwFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConvolutionDepthwiseFp16CPUKernel::Run() {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwFp16Run, this,
|
||||
conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -155,7 +155,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
|
|||
packed_output_ = output_ptr;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwSWFp16Run, this,
|
||||
conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -144,7 +144,8 @@ int ConvolutionFP16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionFp16Impl, this,
|
||||
thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -213,7 +213,8 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ConvolutionWinogradFp16Impl, this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -53,7 +53,8 @@ int CropFp16CPUKernel::Run() {
|
|||
input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, CropFp16Run, this, crop_para_->thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CropFp16Run, this,
|
||||
crop_para_->thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ParallelLaunch failed: " << ret;
|
||||
}
|
||||
|
|
|
@ -173,7 +173,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
|
|||
memset(output_ptr, 0, out_tensors_.at(kOutputIndex)->ElementsNum() * sizeof(float16_t));
|
||||
packed_output_ = output_ptr;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeconvDwFp16Run, this,
|
||||
conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -217,7 +217,8 @@ int DeConvolutionFp16CPUKernel::Run() {
|
|||
|
||||
RowMajor2Col16MajorFp16Opt(batch_input_, pack_input_, input_plane_, conv_param_->input_channel_);
|
||||
|
||||
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_);
|
||||
error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvFp16Run,
|
||||
this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
|
||||
}
|
||||
|
@ -229,7 +230,7 @@ int DeConvolutionFp16CPUKernel::Run() {
|
|||
|
||||
kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(op_parameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
|
||||
|
||||
|
@ -238,12 +239,15 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *>
|
|||
if (conv_param->group_ == 1) {
|
||||
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
|
||||
(conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) {
|
||||
kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs,
|
||||
static_cast<const lite::InnerContext *>(ctx));
|
||||
} else {
|
||||
kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow)
|
||||
kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
}
|
||||
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
|
||||
kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow)
|
||||
DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
}
|
||||
|
||||
if (kernel == nullptr) {
|
||||
|
|
|
@ -392,10 +392,12 @@ int DeConvWinogradFp16CPUKernel::Run() {
|
|||
nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
|
||||
|
||||
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float16_t));
|
||||
ParallelLaunch(this->context_->thread_pool_, DeConvWgFp16Run, this, deconv_param_->thread_num_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgFp16Run, this,
|
||||
deconv_param_->thread_num_);
|
||||
|
||||
/*post bias activate and nhwc */
|
||||
ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgPostFp16Run, this,
|
||||
thread_num_hw_);
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
|
|
|
@ -151,7 +151,8 @@ int GatherFp16CPUKernel::Run() {
|
|||
Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
|
||||
}
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, GatherRunFp16, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherRunFp16, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -109,7 +109,8 @@ int InstanceNormFp16Run(void *cdata, int task_id) {
|
|||
int InstanceNormFp16CPUKernel::Run() {
|
||||
src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
|
||||
dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormFp16Run, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, InstanceNormFp16Run,
|
||||
this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -95,7 +95,8 @@ int LogSoftmaxLastAxisFp16Run(void *cdata, int task_id) {
|
|||
|
||||
int LogSoftmaxFp16CPUKernel::Run() {
|
||||
if (in_plane_size_ == 1) {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, LogSoftmaxLastAxisFp16Run, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
LogSoftmaxLastAxisFp16Run, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LogSoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -286,7 +286,8 @@ int MatmulBaseFP16CPUKernel::Run() {
|
|||
batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
|
||||
batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulBaseFP16Run, this, thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MatmulBaseFP16Run,
|
||||
this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
|
||||
return ret;
|
||||
|
|
|
@ -89,7 +89,8 @@ int PadFp16CPUKernel::Run() {
|
|||
output_[i] = pad_param_->constant_value_;
|
||||
}
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PadImpl, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
@ -101,7 +102,8 @@ int PadFp16CPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MirrorPadImpl, this,
|
||||
context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -90,7 +90,8 @@ int PoolingFp16CPUKernel::Run() {
|
|||
fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||
fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingFp16Impl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
PoolingFp16Impl, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -87,7 +87,8 @@ int PowerFp16CPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImplFp16, this, thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PowerImplFp16, this,
|
||||
thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PowerFp16CPUKernel error: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -164,7 +164,8 @@ int QuantDTypeCastFp16CPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, QuantDTypeCastFP16Run, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
QuantDTypeCastFP16Run, this, thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -93,7 +93,8 @@ int ReduceFp16CPUKernel::Run() {
|
|||
outer_size_ = outer_sizes_.at(i);
|
||||
inner_size_ = inner_sizes_.at(i);
|
||||
axis_size_ = axis_sizes_.at(i);
|
||||
auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_);
|
||||
auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ReduceFp16Impl, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
|
||||
|
@ -108,7 +109,8 @@ int ReduceFp16CPUKernel::Run() {
|
|||
outer_size_ = outer_sizes_.back();
|
||||
inner_size_ = inner_sizes_.back();
|
||||
axis_size_ = axis_sizes_.back();
|
||||
auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceFp16Impl, this, context_->thread_num_);
|
||||
auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ReduceFp16Impl, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
FreeTmpBuffer();
|
||||
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
|
||||
|
|
|
@ -115,7 +115,8 @@ int ScaleFp16CPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ScaleFp16Run, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScaleFp16Run, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
FreeTmpBuffer();
|
||||
|
@ -127,12 +128,12 @@ int ScaleFp16CPUKernel::Run() {
|
|||
}
|
||||
|
||||
int ScaleFp16CPUKernel::MallocAssignTmpBuffer() {
|
||||
scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_);
|
||||
scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (scale_ == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors_.size() == 3) {
|
||||
offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), context_);
|
||||
offset_ = ConvertInputFp32toFp16(in_tensors_.at(2), static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (offset_ == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -63,7 +63,8 @@ int SliceFp16CPUKernel::Run() {
|
|||
DoSliceNoParallel(input_data, out_tensors_.at(0)->data_c(), param_, lite::DataTypeSize(kNumberTypeFloat16));
|
||||
return RET_OK;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SliceFp16Launch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SliceFp16Launch,
|
||||
this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "fp16 slice launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -95,7 +95,8 @@ int SoftmaxLastAxisFp16Run(void *cdata, int task_id) {
|
|||
|
||||
int SoftmaxFp16CPUKernel::Run() {
|
||||
if (in_plane_size_ == 1) {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisFp16Run, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
SoftmaxLastAxisFp16Run, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SoftmaxFp16CPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -40,14 +40,15 @@ void StackFp16CPUKernel::InitMallocFlags() {
|
|||
int StackFp16CPUKernel::MallocAssignBuffer() {
|
||||
buffers_.resize(in_tensors_.size(), nullptr);
|
||||
for (size_t i = 0; i < in_tensors_.size(); ++i) {
|
||||
buffers_.at(i) = reinterpret_cast<char *>(ConvertInputFp32toFp16(in_tensors_.at(i), context_));
|
||||
buffers_.at(i) = reinterpret_cast<char *>(
|
||||
ConvertInputFp32toFp16(in_tensors_.at(i), static_cast<const lite::InnerContext *>(context_)));
|
||||
if (buffers_.at(i) == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
out_buffer_ = nullptr;
|
||||
out_buffer_ = MallocOutputFp16(out_tensors_.at(0), context_);
|
||||
out_buffer_ = MallocOutputFp16(out_tensors_.at(0), static_cast<const lite::InnerContext *>(this->context_));
|
||||
if (out_buffer_ == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -100,7 +101,8 @@ int StackFp16CPUKernel::Run() {
|
|||
}
|
||||
// run stack
|
||||
num_threads_ = MSMIN(UP_DIV(outer_size_, 64), this->context_->thread_num_);
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, StackRun, this, num_threads_);
|
||||
ret =
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, StackRun, this, num_threads_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "StackBaseCPUKernel Run error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -79,7 +79,8 @@ int ActivationGradRunFp16(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ActivationGradCPUKernelFp16::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRunFp16, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ActivationGradRunFp16, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -73,7 +73,8 @@ int ArithmeticSelfGradFp16Run(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ArithmeticSelfGradFp16CPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfGradFp16Run, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ArithmeticSelfGradFp16Run, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -101,7 +101,8 @@ int ActivationRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ActivationCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationRun, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ActivationRun,
|
||||
this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -122,7 +122,8 @@ int AdderCPUKernel::Run() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, AdderImpl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AdderImpl,
|
||||
this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "adder error error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -89,7 +89,8 @@ int AddNCPUKernel::Run() {
|
|||
in1_addr_ = input0_data;
|
||||
in2_addr_ = input1_data;
|
||||
out_addr_ = output_data;
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AddNLaunch, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
@ -97,7 +98,8 @@ int AddNCPUKernel::Run() {
|
|||
for (size_t i = 2; i < in_tensors_.size(); ++i) {
|
||||
in1_addr_ = reinterpret_cast<float *>(in_tensors_[i]->MutableData());
|
||||
in2_addr_ = output_data;
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, AddNLaunch, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AddNLaunch, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -418,7 +418,8 @@ int ArithmeticCPUKernel::Run() {
|
|||
input1_ptr_ = in_tensors_[1]->data_c();
|
||||
}
|
||||
output_ptr_ = out_tensors_[0]->data_c();
|
||||
return ParallelLaunch(this->context_->thread_pool_, ArithmeticsRun, this, context_->thread_num_);
|
||||
return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticsRun, this,
|
||||
context_->thread_num_);
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulFusion, LiteKernelCreator<ArithmeticCPUKernel>)
|
||||
|
|
|
@ -113,7 +113,8 @@ int ArithmeticSelfRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ArithmeticSelfCPUKernel::Run() {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ArithmeticSelfRun,
|
||||
this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -75,7 +75,8 @@ int BatchnormCPUKernel::InitConstTensor() {
|
|||
}
|
||||
|
||||
int BatchnormCPUKernel::Run() {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -141,7 +141,8 @@ int CastCPUKernel::Run() {
|
|||
if (data_num_ == 0) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ParallelLaunch(this->context_->thread_pool_, CastRun, this, op_parameter_->thread_num_);
|
||||
return ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CastRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Cast, LiteKernelCreator<CastCPUKernel>)
|
||||
|
|
|
@ -69,7 +69,8 @@ int ConcatRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ConcatCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ConcatRun, this, op_parameter_->thread_num_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConcatRun,
|
||||
this, op_parameter_->thread_num_);
|
||||
return error_code;
|
||||
}
|
||||
|
||||
|
|
|
@ -247,10 +247,12 @@ int Convolution1x1CPUKernel::Run() {
|
|||
}
|
||||
|
||||
if (multi_thread_by_hw_) {
|
||||
ParallelLaunch(this->context_->thread_pool_, Convolution1x1RunHw, this, thread_count_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, Convolution1x1RunHw, this,
|
||||
thread_count_);
|
||||
} else {
|
||||
PackMatmulInput(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
ParallelLaunch(this->context_->thread_pool_, Convolution1x1Run, this, thread_count_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, Convolution1x1Run, this,
|
||||
thread_count_);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -138,16 +138,19 @@ kernel::LiteKernel *ConvolutionDelegateCPUKernel::CpuConvFp32KernelSelect() {
|
|||
kernel::LiteKernel *kernel = nullptr;
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
|
||||
kernel = new (std::nothrow)
|
||||
kernel::Convolution1x1CPUKernel(op_parameter_, in_tensors_, out_tensors_, context_, origin_weight_, origin_bias_);
|
||||
kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(op_parameter_, in_tensors_, out_tensors_,
|
||||
static_cast<const lite::InnerContext *>(this->context_),
|
||||
origin_weight_, origin_bias_);
|
||||
} else {
|
||||
int out_unit;
|
||||
if (CheckIfUseWinograd(&out_unit, conv_param)) {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(
|
||||
op_parameter_, in_tensors_, out_tensors_, context_, out_unit, origin_weight_, origin_bias_);
|
||||
op_parameter_, in_tensors_, out_tensors_, static_cast<const lite::InnerContext *>(this->context_), out_unit,
|
||||
origin_weight_, origin_bias_);
|
||||
} else {
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionCPUKernel(op_parameter_, in_tensors_, out_tensors_, context_, origin_weight_, origin_bias_);
|
||||
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(op_parameter_, in_tensors_, out_tensors_,
|
||||
static_cast<const lite::InnerContext *>(this->context_),
|
||||
origin_weight_, origin_bias_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,7 +217,7 @@ kernel::LiteKernel *CpuGroupConvFp32KernelCreator(const std::vector<lite::Tensor
|
|||
/* creator func */
|
||||
kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(op_parameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
|
||||
MS_ASSERT(desc.data_type == kNumberTypeFloat32);
|
||||
|
@ -222,11 +225,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
|
|||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
if (conv_param->group_ == 1) {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ConvolutionDelegateCPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
|
||||
kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, ctx);
|
||||
kernel = CpuConvDwFp32KernelCreator(inputs, outputs, op_parameter, static_cast<const lite::InnerContext *>(ctx));
|
||||
} else {
|
||||
kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, ctx);
|
||||
kernel = CpuGroupConvFp32KernelCreator(inputs, outputs, op_parameter, static_cast<const lite::InnerContext *>(ctx));
|
||||
}
|
||||
|
||||
if (kernel == nullptr) {
|
||||
|
|
|
@ -126,7 +126,8 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
|
|||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDw3x3Run, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDw3x3Run, this,
|
||||
conv_param_->thread_num_);
|
||||
ctx_->allocator->Free(buffer_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";
|
||||
|
|
|
@ -107,7 +107,8 @@ int ConvolutionDepthwiseCPUKernel::Run() {
|
|||
auto output_tensor = out_tensors_.at(kOutputIndex);
|
||||
output_ptr_ = reinterpret_cast<float *>(output_tensor->MutableData());
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwRun, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwRun, this,
|
||||
conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -194,7 +194,8 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
|
|||
|
||||
ConvDwInitIndirection(indirect_buffer_, packed_input_, zero_ptr_, conv_param_, step_h, step_w);
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwIndirectRun, this, conv_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwIndirectRun,
|
||||
this, conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwIndirectRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -163,7 +163,8 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
|
|||
packed_output_ = output_ptr;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWRun, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvDwSWRun, this,
|
||||
conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConvDwSWRun error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -152,7 +152,8 @@ int ConvolutionCPUKernel::Run() {
|
|||
PackWeight();
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionImpl, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionImpl, this,
|
||||
thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -219,7 +219,8 @@ int ConvolutionWinogradCPUKernel::Run() {
|
|||
InitWeightBias();
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradImpl, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ConvolutionWinogradImpl,
|
||||
this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -151,7 +151,8 @@ int CropAndResizeCPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, CropAndResizeImpl, this, context_->thread_num_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
CropAndResizeImpl, this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "CropAndResize run error, error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -62,7 +62,8 @@ int CropCPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, CropLaunch, this, crop_para_->thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, CropLaunch, this,
|
||||
crop_para_->thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -168,7 +168,8 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
|
|||
packed_output_ = output_addr;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwRun, this, conv_param_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeconvDwRun, this,
|
||||
conv_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DeconvDwRun error: error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -222,7 +222,8 @@ int DeConvolutionCPUKernel::Run() {
|
|||
RowMajor2Col12Major(input_ptr_, pack_input_, matmul_param_->row_, matmul_param_->deep_);
|
||||
#endif
|
||||
|
||||
error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp32Run, this, thread_count_);
|
||||
error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvFp32Run,
|
||||
this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]";
|
||||
FreeRunBuf();
|
||||
|
@ -236,7 +237,7 @@ int DeConvolutionCPUKernel::Run() {
|
|||
|
||||
kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(op_parameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
|
||||
|
||||
|
@ -245,12 +246,15 @@ kernel::LiteKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
if (conv_param->group_ == 1) {
|
||||
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
|
||||
(conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) {
|
||||
kernel = new (std::nothrow) kernel::DeConvolutionWinogradCPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow) kernel::DeConvolutionWinogradCPUKernel(op_parameter, inputs, outputs,
|
||||
static_cast<const lite::InnerContext *>(ctx));
|
||||
} else {
|
||||
kernel = new (std::nothrow) kernel::DeConvolutionCPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow)
|
||||
kernel::DeConvolutionCPUKernel(op_parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
}
|
||||
} else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
|
||||
kernel = new (std::nothrow) kernel::DeconvolutionDepthwiseCPUKernel(op_parameter, inputs, outputs, ctx);
|
||||
kernel = new (std::nothrow) kernel::DeconvolutionDepthwiseCPUKernel(op_parameter, inputs, outputs,
|
||||
static_cast<const lite::InnerContext *>(ctx));
|
||||
} else {
|
||||
MS_LOG(ERROR) << "deconv do not support group deconv!";
|
||||
kernel = nullptr;
|
||||
|
|
|
@ -411,10 +411,12 @@ int DeConvolutionWinogradCPUKernel::Run() {
|
|||
nhwc_output_ = src_out + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
|
||||
|
||||
::memset(nc4hw4_output_, 0, deconv_param_->output_plane_ * deconv_param_->oc_div4_ * C4NUM * sizeof(float));
|
||||
ParallelLaunch(this->context_->thread_pool_, DeConvWgFp32Run, this, deconv_param_->thread_num_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgFp32Run, this,
|
||||
deconv_param_->thread_num_);
|
||||
|
||||
/*post bias activate and nhwc */
|
||||
ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp32Run, this, thread_num_hw_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, DeConvWgPostFp32Run, this,
|
||||
thread_num_hw_);
|
||||
}
|
||||
|
||||
FreeRunBuf();
|
||||
|
|
|
@ -55,7 +55,8 @@ int EluRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int EluCPUKernel::Run() {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, EluRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, EluRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Elu error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -87,7 +87,8 @@ int EmbeddingLookupCPUKernel::Run() {
|
|||
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
|
||||
dest_loc += in_tensors_.at(i)->ElementsNum();
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, EmbeddingLookupRun,
|
||||
this, op_parameter_->thread_num_);
|
||||
FreeRunBuff();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
|
||||
|
|
|
@ -73,7 +73,8 @@ int ExpCPUKernel::Run() {
|
|||
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
exp_parameter_->element_num_ = in_tensors_.front()->ElementsNum();
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ExpRun, this, exp_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ExpRun, this,
|
||||
exp_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Exp error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -91,7 +91,8 @@ int FillCPUKernel::Run() {
|
|||
MS_LOG(ERROR) << "unsupported fill data type " << fill_input->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, FillRun, this, thread_sz_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, FillRun, this,
|
||||
thread_sz_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -93,7 +93,8 @@ int FusedBatchnormCPUKernel::Run() {
|
|||
|
||||
trained_ = true; // trained at least once
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, BatchNormRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -128,7 +128,8 @@ int GatherNdCPUKernel::Run() {
|
|||
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
InitOffset();
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, GatherNdRun, this, thread_sz_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherNdRun, this,
|
||||
thread_sz_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "gatherNd error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -92,7 +92,8 @@ int GatherCPUKernel::Run() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, GatherRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, GatherRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]";
|
||||
}
|
||||
|
|
|
@ -66,7 +66,8 @@ int InstanceNormCPUKernel::Run() {
|
|||
gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
|
||||
beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
|
||||
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, InstanceNormRun,
|
||||
this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -146,7 +146,8 @@ int L2NormCPUKernel::Run() {
|
|||
output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
|
||||
if (l2_norm_param_->axis_num_ == 0 || l2_norm_param_->axis_num_ == input_shape.size()) {
|
||||
// all axis
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SquareSumRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SquareSumRun, this,
|
||||
context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -156,13 +157,15 @@ int L2NormCPUKernel::Run() {
|
|||
sum += tmp_sum_[i];
|
||||
}
|
||||
sqrt_sum_ = sqrt(sum > l2_norm_param_->epsilon_ ? sum : l2_norm_param_->epsilon_);
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, L2NormRun, this, context_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, L2NormRun, this,
|
||||
context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (l2_norm_param_->axis_num_ == 1 && l2_norm_param_->axis_[0] == static_cast<int>(input_shape.size()) - 1) {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, L2NormTrailingAxisRun, this, context_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
L2NormTrailingAxisRun, this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "L2Norm error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -92,7 +92,8 @@ int LayerNormCPUKernel::Run() {
|
|||
mean_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
|
||||
var_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(param_->norm_outer_size_ * sizeof(float)));
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LayerNormRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (out_tensors_.size() != 3) {
|
||||
context_->allocator->Free(mean_data_);
|
||||
context_->allocator->Free(var_data_);
|
||||
|
|
|
@ -74,7 +74,8 @@ int LocalResponseNormRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int LocalResponseNormCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, LocalResponseNormRun, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
LocalResponseNormRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "LocalResponseNorm function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -96,7 +96,8 @@ int LogSoftmaxLastAxisRun(void *cdata, int task_id) {
|
|||
int LogSoftmaxCPUKernel::Run() {
|
||||
int ret = RET_OK;
|
||||
if (in_plane_size_ == 1) {
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, LogSoftmaxLastAxisRun, this, context_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LogSoftmaxLastAxisRun,
|
||||
this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LogSoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -61,7 +61,8 @@ int LshProjectionCPUKernel::Run() {
|
|||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, LshProjectionRun, this, op_parameter_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, LshProjectionRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "LshProjection kernel parallel launch failed";
|
||||
}
|
||||
|
|
|
@ -332,7 +332,8 @@ int MatmulFp32BaseCPUKernel::Run() {
|
|||
batch_b_ptr_ = b_pack_ptr_ + i * params_->deep_ * params_->col_align_;
|
||||
batch_c_ptr_ = c_ptr + i * params_->row_ * params_->col_;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, MatmulBaseFloatRun, this, thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MatmulBaseFloatRun,
|
||||
this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MatmulBaseFloatRun failed";
|
||||
return ret;
|
||||
|
|
|
@ -181,7 +181,8 @@ int OneHotCPUKernel::GetParams() {
|
|||
}
|
||||
|
||||
int OneHotCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, RunOneHot, this, context_->thread_num_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, RunOneHot,
|
||||
this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "OneHot function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -395,7 +395,8 @@ int PadCPUKernel::Run() {
|
|||
output_data[i] = pad_param_->constant_value_;
|
||||
}
|
||||
}
|
||||
error_code = ParallelLaunch(this->context_->thread_pool_, PadImpl, this, context_->thread_num_);
|
||||
error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PadImpl, this,
|
||||
context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -408,7 +409,8 @@ int PadCPUKernel::Run() {
|
|||
return error_code;
|
||||
}
|
||||
|
||||
error_code = ParallelLaunch(this->context_->thread_pool_, MirrorPadImpl, this, context_->thread_num_);
|
||||
error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, MirrorPadImpl,
|
||||
this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Pad Reflect or Symmetric mode run error, error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -85,7 +85,8 @@ int PoolingImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PoolingCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingImpl, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PoolingImpl,
|
||||
this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -41,7 +41,8 @@ int PowerImpl(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int PowerCPUKernel::Run() {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, PowerImpl, this, thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PowerImpl, this,
|
||||
thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PowerCPUKernel error: " << ret;
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -93,7 +93,8 @@ int PReluCPUKernel::Run() {
|
|||
auto negative_slope_tensor = in_tensors_.at(1);
|
||||
prelu_param_->slope_ = reinterpret_cast<float *>(negative_slope_tensor->data_c());
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, PReluRun, this, prelu_param_->op_parameter_.thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, PReluRun, this,
|
||||
prelu_param_->op_parameter_.thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PRelu Run error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -117,7 +117,8 @@ int ReduceCPUKernel::Run() {
|
|||
outer_size_ = outer_sizes_.at(i);
|
||||
inner_size_ = inner_sizes_.at(i);
|
||||
axis_size_ = axis_sizes_.at(i);
|
||||
auto error_code = ParallelLaunch(this->context_->thread_pool_, ReduceImpl, this, context_->thread_num_);
|
||||
auto error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReduceImpl,
|
||||
this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Reduce run error, error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -205,7 +205,8 @@ int ResizeCPUKernel::RunImpl(int task_id) {
|
|||
}
|
||||
|
||||
int ResizeCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ResizeImpl, this, context_->thread_num_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ResizeImpl,
|
||||
this, context_->thread_num_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
|
||||
FreeTmpBuffer();
|
||||
|
|
|
@ -129,7 +129,8 @@ int ReverseCPUKernel::DoReverse(int task_id) {
|
|||
int ReverseCPUKernel::Run() {
|
||||
in_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_[0]->MutableData());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ReverseRun, this, thread_sz_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ReverseRun, this,
|
||||
thread_sz_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Reverse run error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -101,7 +101,8 @@ int ROIPoolingCPUKernel::Run() {
|
|||
in_ptr_ = reinterpret_cast<float *>(in_tensors_.front()->MutableData());
|
||||
out_ptr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
||||
roi_ptr_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ROIPoolingRun, this, param_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ROIPoolingRun, this,
|
||||
param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ROIPooling error: error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -188,7 +188,8 @@ int ScaleCPUKernel::Run() {
|
|||
auto out_tensor = out_tensors_.front();
|
||||
output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
|
||||
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ScaleRun, this, op_parameter_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScaleRun, this,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -149,7 +149,8 @@ int ScatterNDRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ScatterNDCPUKernel::Run() {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ScatterNDRun, this, thread_n_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, ScatterNDRun, this,
|
||||
thread_n_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ScatterND error error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -96,7 +96,8 @@ int SoftmaxLastAxisRun(void *cdata, int task_id) {
|
|||
int SoftmaxCPUKernel::Run() {
|
||||
int ret = RET_OK;
|
||||
if (in_plane_size_ == 1) {
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, SoftmaxLastAxisRun, this, context_->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SoftmaxLastAxisRun,
|
||||
this, context_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SoftmaxCPUKernel ParallelLaunch failed, ret: " << ret;
|
||||
}
|
||||
|
|
|
@ -102,7 +102,8 @@ int SpaceToBatchCPUKernel::Run() {
|
|||
}
|
||||
}
|
||||
|
||||
ParallelLaunch(this->context_->thread_pool_, SpaceToBatchFp32Run, this, op_parameter_->thread_num_);
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SpaceToBatchFp32Run, this,
|
||||
op_parameter_->thread_num_);
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -93,7 +93,8 @@ int SpaceToDepthCPUKernel::Run() {
|
|||
input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
|
||||
output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
|
||||
if (in_tensors_.at(0)->format() == schema::Format::Format_NHWC) {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, SpaceToDepthRun, this, thread_h_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SpaceToDepthRun,
|
||||
this, thread_h_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SpaceToDepth error error_code[" << ret << "]";
|
||||
return ret;
|
||||
|
|
|
@ -175,7 +175,8 @@ int SparseToDenseCPUKernel::Run() {
|
|||
}
|
||||
output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
count_unit_ = thread_count_ > 1 ? UP_DIV(index_num, thread_count_) : index_num;
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, SparseToDenseRun, this, s2d_param->thread_num_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, SparseToDenseRun, this,
|
||||
s2d_param->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "SparseToDenseRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -159,7 +159,8 @@ int TransposeCPUKernel::Run() {
|
|||
thread_count_ = op_parameter_->thread_num_;
|
||||
GetNHNCTransposeFunc(in_tensor, out_tensor, param_);
|
||||
if (NHNCTransposeFunc_ != nullptr) {
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, TransposeImpl, this, thread_count_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, TransposeImpl,
|
||||
this, thread_count_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "NHNCTransposeFunc_ is error!";
|
||||
}
|
||||
|
@ -187,7 +188,8 @@ int TransposeCPUKernel::Run() {
|
|||
}
|
||||
int ret;
|
||||
if (dims_ > MAX_TRANSPOSE_DIM_SIZE) {
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, TransposeImpl, this, thread_count_);
|
||||
ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, TransposeImpl, this,
|
||||
thread_count_);
|
||||
} else {
|
||||
ret = DoTransposeFp32(in_data_, out_data_, out_shape_, param_);
|
||||
}
|
||||
|
|
|
@ -133,7 +133,8 @@ int WhereCPUKernel::RunWithTripleInputs() {
|
|||
MS_LOG(ERROR) << "Error, inputs' length are zero !!!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, WhereRun, this, where_param_->thread_num_);
|
||||
auto ret = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, WhereRun, this,
|
||||
where_param_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "WhereDwRun error: error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -98,7 +98,8 @@ int ActivationGradRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ActivationGradCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRun, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ActivationGradRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -102,7 +102,8 @@ int AdamRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int AdamCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, AdamRun, this, thread_count_);
|
||||
int error_code =
|
||||
ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_, AdamRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Adam function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -145,9 +146,10 @@ int AdamCPUKernel::OptimizerStep() {
|
|||
|
||||
kernel::LiteKernel *CpuAdamFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Adam);
|
||||
auto *kernel = new (std::nothrow) AdamCPUKernel(opParameter, inputs, outputs, ctx);
|
||||
auto *kernel =
|
||||
new (std::nothrow) AdamCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new AdamCPUKernel fail!";
|
||||
free(opParameter);
|
||||
|
|
|
@ -82,7 +82,8 @@ int ApplyMomentumRun(void *cdata, int task_id) {
|
|||
}
|
||||
|
||||
int ApplyMomentumCPUKernel::Run() {
|
||||
int error_code = ParallelLaunch(this->context_->thread_pool_, ApplyMomentumRun, this, thread_count_);
|
||||
int error_code = ParallelLaunch(static_cast<const lite::InnerContext *>(this->context_)->thread_pool_,
|
||||
ApplyMomentumRun, this, thread_count_);
|
||||
if (error_code != RET_OK) {
|
||||
MS_LOG(ERROR) << "Apply Momentum function error error_code[" << error_code << "]";
|
||||
return RET_ERROR;
|
||||
|
@ -119,10 +120,11 @@ int ApplyMomentumCPUKernel::OptimizerStep() {
|
|||
|
||||
kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::InnerContext *ctx,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_ApplyMomentum);
|
||||
auto *kernel = new (std::nothrow) ApplyMomentumCPUKernel(opParameter, inputs, outputs, ctx);
|
||||
auto *kernel = new (std::nothrow)
|
||||
ApplyMomentumCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new ApplyMomentumCPUKernel fail!";
|
||||
free(opParameter);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue