!15886 [MS][LITE]LiteKernel Interface Rectification

From: @gongdaguo
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-05-10 14:56:30 +08:00 committed by Gitee
commit 98a92db2cb
415 changed files with 2192 additions and 1737 deletions

View File

@ -21,7 +21,7 @@ void SetOutputDtypeFormat(const TensorC *input0, const TensorC *input1, TensorC
output->format_ = input0->format_;
output->data_type_ = input0->data_type_;
// when input0 is const, it is quanted before insert quant trans op, so use input1 data type instead
if (input0->data_ != NULL ||
if (((input0->data_ != NULL) && (input1->data_type_ != kTypeUnknown)) ||
((input0->data_type_ == kNumberTypeInt8) && (input1->data_type_ == kNumberTypeFloat32))) {
output->data_type_ = input1->data_type_;
}

View File

@ -135,7 +135,6 @@ set(LITE_SRC
${LITE_DIR}/src/common/tensor_util.cc
${LITE_DIR}/src/runtime/infer_manager.cc
${LITE_DIR}/src/kernel_interface_registry.cc
${LITE_DIR}/src/kernel_registry.cc
${LITE_DIR}/src/lite_model.cc
${LITE_DIR}/src/tensorlist.cc
${LITE_DIR}/src/tensor.cc

View File

@ -209,7 +209,7 @@ OpParameter *CoderSession::GenParameterAndInfer(const Model::Node *node, const s
MS_CHECK_PTR_RET_NULL(parame_gen);
auto parameter = parame_gen(primitive);
MS_CHECK_PTR_RET_NULL(parameter);
auto ret = KernelInferShape(inputs, outputs, parameter);
auto ret = KernelInferShape(inputs, *outputs, parameter);
if (ret == RET_INFER_INVALID) {
MS_LOG(INFO) << "InferShape shouldn't be done before runtime, name: " << node->name_
<< ", type: " << PrimitiveTypeName(GetPrimitiveType(primitive)) << "flag set to false.";

View File

@ -66,6 +66,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/register_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/kernel_interface.cc
${CMAKE_CURRENT_SOURCE_DIR}/kernel_interface_registry.cc
${CMAKE_CURRENT_SOURCE_DIR}/inner_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc
${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel_util.cc
${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc

View File

@ -150,17 +150,17 @@ int TensorListC2TensorList(TensorListC *src, TensorList *dst) {
return RET_OK;
}
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
std::vector<TensorC *> *out_tensor_c) {
int ret = RET_OK;
for (size_t i = 0; i < outputs->size(); i++) {
for (size_t i = 0; i < outputs.size(); i++) {
out_tensor_c->push_back(nullptr);
}
return ret;
}
int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *out_tensor_c) {
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *out_tensor_c) {
int ret = RET_OK;
if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor ||
parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve ||
@ -176,13 +176,13 @@ int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lit
parameter->type_ == mindspore::schema::PrimitiveType_Switch) {
ret = GenerateMergeSwitchOutTensorC(inputs, outputs, out_tensor_c);
} else {
ret = OutputTensor2TensorC(*outputs, out_tensor_c);
ret = OutputTensor2TensorC(outputs, out_tensor_c);
}
return ret;
}
int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *in_tensor_c) {
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *in_tensor_c) {
int ret = RET_OK;
for (auto input : inputs) {
if (input->data_type() == kObjectTypeTensorType) {

View File

@ -32,12 +32,12 @@ void Tensor2TensorC(Tensor *src, TensorC *dst);
void TensorC2Tensor(TensorC *src, Tensor *dst);
int TensorList2TensorListC(TensorList *src, TensorListC *dst);
int TensorListC2TensorList(TensorListC *src, TensorList *dst);
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
std::vector<TensorC *> *out_tensor_c);
int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *in_tensor_c);
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *in_tensor_c);
int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *out_tensor_c);
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *out_tensor_c);
int CheckTensorsInvalid(const std::vector<Tensor *> &tensors);
void Tensor2MSTensor(const std::vector<Tensor *> &&tensors, std::vector<tensor::MSTensor *> *out_tensors);

View File

@ -48,21 +48,11 @@ int Executor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Ten
auto cur_kernel = kernel_queue.front();
kernel_queue.pop();
MS_ASSERT(nullptr != cur_kernel);
ret = cur_kernel->PreProcess();
if (RET_OK != ret) {
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << cur_kernel->name();
return ret;
}
ret = cur_kernel->Run(before, after);
ret = cur_kernel->Execute(before, after);
if (RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << cur_kernel->name();
return ret;
}
ret = cur_kernel->PostProcess();
if (RET_OK != ret) {
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << cur_kernel->name();
return ret;
}
for (auto &out_kernel : cur_kernel->out_kernels()) {
if (out_kernel->IsReady(out_kernel->in_tensors())) {
kernel_queue.push(out_kernel);

View File

@ -30,7 +30,7 @@ class Executor {
virtual int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
const std::vector<Tensor *> &outputs) {
ctx_ = static_cast<const lite::InnerContext *>(kernels[0]->context());
ctx_ = static_cast<const lite::InnerContext *>(kernels[0]->Context());
return RET_OK;
}

View File

@ -0,0 +1,80 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/inner_kernel.h"
#include <algorithm>
#include <set>
#include "src/tensor.h"
#include "src/common/utils.h"
#include "src/runtime/infer_manager.h"
namespace mindspore::kernel {
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
#ifdef SUPPORT_TRAIN
void *InnerKernel::workspace_ = nullptr;
void InnerKernel::AllocWorkspace(size_t size) {
if (size == 0) {
return;
}
workspace_ = malloc(size);
if (workspace_ == nullptr) {
MS_LOG(ERROR) << "fail to alloc " << size;
}
}
void InnerKernel::FreeWorkspace() {
free(workspace_);
workspace_ = nullptr;
}
#endif
int InnerKernel::PreProcess() {
if (!InferShapeDone()) {
auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
if (ret != 0) {
MS_LOG(ERROR) << "InferShape fail!";
return ret;
}
ret = ReSize();
if (ret != 0) {
MS_LOG(ERROR) << "ReSize fail!ret: " << ret;
return ret;
}
}
for (auto *output : this->out_tensors()) {
MS_ASSERT(output != nullptr);
if (registry_data_type_ == kNumberTypeFloat16 && output->data_type() == kNumberTypeFloat32) {
output->set_data_type(kNumberTypeFloat16);
}
if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
MS_LOG(ERROR) << "The size of output tensor is too big";
return RET_ERROR;
}
auto ret = output->MallocData();
if (ret != RET_OK) {
MS_LOG(ERROR) << "MallocData failed";
return ret;
}
}
return RET_OK;
}
} // namespace mindspore::kernel

View File

@ -0,0 +1,209 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_INNER_KERNEL_H_
#define MINDSPORE_LITE_SRC_INNER_KERNEL_H_
#include <string>
#include <vector>
#include <memory>
#include <utility>
#include <algorithm>
#include "src/common/utils.h"
#include "src/common/log_util.h"
#include "nnacl/op_base.h"
#include "src/inner_context.h"
#include "src/tensor.h"
#include "include/errorcode.h"
#include "schema/model_generated.h"
#include "include/context.h"
#include "src/kernel.h"
namespace mindspore::kernel {
class InnerKernel : public Kernel {
public:
InnerKernel() = default;
InnerKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
const lite::Context *ctx)
: op_parameter_(parameter), in_tensors_(std::move(in_tensors)), out_tensors_(std::move(out_tensors)) {
context_ = ctx;
if (op_parameter_ != nullptr && ctx != nullptr) {
op_parameter_->thread_num_ = ctx->thread_num_;
}
}
virtual ~InnerKernel() {
if (op_parameter_ != nullptr) {
free(op_parameter_);
op_parameter_ = nullptr;
}
}
int Execute() override {
auto ret = PreProcess();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
return ret;
}
// Support ZeroShape
size_t zero_shape_num = 0;
for (auto tensor : this->out_tensors()) {
for (size_t i = 0; i < tensor->shape().size(); i++) {
if (tensor->shape()[i] == 0) {
zero_shape_num++;
break;
}
}
}
if (zero_shape_num != this->out_tensors().size()) {
auto ret = Run();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
return ret;
}
}
ret = PostProcess();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
return ret;
}
return lite::RET_OK;
}
// called while compiling graph
int Prepare() override { return mindspore::lite::RET_OK; }
virtual int Run() { return mindspore::lite::RET_ERROR; }
int ReSize() override { return mindspore::lite::RET_ERROR; }
// called before Run
virtual int PreProcess();
// called after Run
virtual int PostProcess() {
for (auto *output : this->out_tensors()) {
MS_ASSERT(output != nullptr);
output->ResetRefCount();
}
return FreeInWorkTensor();
}
virtual int FreeInWorkTensor() const {
for (auto &in_tensor : this->in_tensors()) {
MS_ASSERT(in_tensor != nullptr);
if (in_tensor->root_tensor() == in_tensor) {
continue;
}
in_tensor->DecRefCount();
}
return lite::RET_OK;
}
virtual int Init() { return mindspore::lite::RET_OK; }
OpParameter *op_parameter() const { return op_parameter_; }
bool InferShapeDone() const {
auto shape = out_tensors_.front()->shape();
if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
return false;
}
return true;
}
schema::PrimitiveType type() override {
return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_)
: schema::PrimitiveType_NONE;
}
void set_inputs(const std::vector<mindspore::tensor::MSTensor *> &in_tensors) override {
this->in_tensors_.resize(in_tensors.size());
(void)std::transform(in_tensors.begin(), in_tensors.end(), in_tensors_.begin(),
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
}
void set_outputs(const std::vector<mindspore::tensor::MSTensor *> &out_tensors) override {
this->out_tensors_.resize(out_tensors.size());
(void)std::transform(out_tensors.begin(), out_tensors.end(), out_tensors_.begin(),
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
}
const std::vector<mindspore::tensor::MSTensor *> &inputs() override {
inputs_.assign(in_tensors_.begin(), in_tensors_.end());
return inputs_;
}
const std::vector<mindspore::tensor::MSTensor *> &outputs() override {
outputs_.assign(out_tensors_.begin(), out_tensors_.end());
return outputs_;
}
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
const std::vector<lite::Tensor *> &in_tensors() const { return in_tensors_; }
const std::vector<lite::Tensor *> &out_tensors() const { return out_tensors_; }
virtual int Train() {
this->train_mode_ = true;
return mindspore::lite::RET_OK;
}
virtual bool IsTrain() const { return this->train_mode_; }
virtual int Eval() {
this->train_mode_ = false;
return mindspore::lite::RET_OK;
}
virtual bool IsEval() const { return !this->train_mode_; }
virtual void set_trainable(bool trainable = true) { this->trainable_ = trainable; }
virtual bool is_trainable() const { return this->trainable_; }
TypeId registry_data_type(void) { return registry_data_type_; }
void set_registry_data_type(TypeId data_type) { registry_data_type_ = data_type; }
#ifdef SUPPORT_TRAIN
void set_workspace_size(size_t value) { workspace_size_ = value; }
size_t workspace_size() { return workspace_size_; }
static void AllocWorkspace(size_t size);
static void FreeWorkspace();
void *workspace() { return workspace_; }
#endif
protected:
OpParameter *op_parameter_ = nullptr;
// tensor will free in ~lite_session()
std::vector<lite::Tensor *> in_tensors_;
std::vector<lite::Tensor *> out_tensors_;
bool train_mode_ = false;
bool trainable_ = false; // parameters of this Kernel are trained in Train Session
TypeId registry_data_type_ = kTypeUnknown;
#ifdef SUPPORT_TRAIN
size_t workspace_size_ = 0;
static void *workspace_;
#endif
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_INNER_KERNEL_H_

View File

@ -0,0 +1,71 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_KERNEL_H_
#define MINDSPORE_LITE_SRC_KERNEL_H_
#include <vector>
#include <string>
#include <utility>
#include "include/lite_utils.h"
namespace mindspore::kernel {
class Kernel {
public:
Kernel() = default;
Kernel(const std::vector<tensor::MSTensor *> &inputs, const std::vector<tensor::MSTensor *> &outputs,
const schema::Primitive *primitive, const lite::Context *ctx)
: inputs_(std::move(inputs)), outputs_(std::move(outputs)), context_(ctx) {
if (primitive != nullptr) {
type_ = primitive->value_type();
}
}
virtual ~Kernel() = default;
virtual int Prepare() = 0;
virtual int Execute() = 0;
virtual int ReSize() = 0;
virtual schema::PrimitiveType type() { return type_; }
virtual void set_inputs(const std::vector<mindspore::tensor::MSTensor *> &in_tensors) { this->inputs_ = in_tensors; }
virtual void set_outputs(const std::vector<mindspore::tensor::MSTensor *> &out_tensors) {
this->outputs_ = out_tensors;
}
virtual const std::vector<mindspore::tensor::MSTensor *> &inputs() { return this->inputs_; }
virtual const std::vector<mindspore::tensor::MSTensor *> &outputs() { return this->outputs_; }
std::string name() const { return this->name_; }
void set_name(const std::string &name) { this->name_ = name; }
const lite::Context *context() const { return this->context_; }
protected:
std::vector<mindspore::tensor::MSTensor *> inputs_;
std::vector<mindspore::tensor::MSTensor *> outputs_;
schema::PrimitiveType type_ = schema::PrimitiveType_NONE;
std::string name_;
const lite::Context *context_ = nullptr;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_KERNEL_H_

View File

@ -133,7 +133,7 @@ kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
if (desc.provider == kBuiltin) {
int index = GetCreatorFuncIndex(desc);
if (index >= array_size_ || index < 0) {
MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type" << desc.data_type << ",op type "
MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type " << desc.data_type << ",op type "
<< desc.type;
return nullptr;
}
@ -231,10 +231,17 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
if (key.provider == kBuiltin) {
auto creator = GetCreator(key);
if (creator != nullptr) {
*kernel = creator(in_tensors, out_tensors, parameter, ctx, key);
if (*kernel != nullptr) {
(*kernel)->set_desc(key);
return RET_OK;
auto inner_kernel = creator(in_tensors, out_tensors, parameter, ctx, key);
if (inner_kernel != nullptr) {
inner_kernel->set_registry_data_type(key.data_type);
auto *lite_kernel = new (std::nothrow) kernel::LiteKernel(inner_kernel);
if (lite_kernel != nullptr) {
lite_kernel->set_desc(key);
*kernel = lite_kernel;
return RET_OK;
} else {
delete inner_kernel;
}
}
return RET_ERROR;
}
@ -247,9 +254,16 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
Tensor2MSTensor(std::move(in_tensors), &tensors_in);
std::vector<tensor::MSTensor *> tensors_out;
Tensor2MSTensor(std::move(out_tensors), &tensors_out);
*kernel = creator(tensors_in, tensors_out, static_cast<const schema::Primitive *>(primitive), ctx);
if (*kernel != nullptr) {
return RET_OK;
auto base_kernel = creator(tensors_in, tensors_out, static_cast<const schema::Primitive *>(primitive), ctx);
if (base_kernel != nullptr) {
auto *lite_kernel = new (std::nothrow) kernel::LiteKernel(base_kernel);
if (lite_kernel != nullptr) {
lite_kernel->set_desc(key);
*kernel = lite_kernel;
return RET_OK;
} else {
delete base_kernel;
}
}
return RET_ERROR;
}

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -21,40 +21,19 @@
#include "src/common/utils.h"
#include "src/runtime/infer_manager.h"
#include "src/common/version_manager.h"
#include "src/runtime/kernel/arm/base/merge.h"
namespace mindspore::kernel {
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
#ifdef SUPPORT_TRAIN
void *LiteKernel::workspace_ = nullptr;
void LiteKernel::AllocWorkspace(size_t size) {
if (size == 0) {
return;
}
workspace_ = malloc(size);
if (workspace_ == nullptr) {
MS_LOG(ERROR) << "fail to alloc " << size;
}
}
void LiteKernel::FreeWorkspace() {
free(workspace_);
workspace_ = nullptr;
}
int LiteKernel::DecOutTensorRefCount() {
for (auto *tensor : this->out_tensors_) {
tensor->set_ref_count(tensor->ref_count() - 1);
if (0 >= tensor->ref_count()) {
tensor->FreeData();
}
}
return 0;
}
#endif
bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) {
MS_ASSERT(kernel_ != nullptr);
if ((desc_.provider == kBuiltin) && (kernel_->type() == schema::PrimitiveType_Merge)) {
return static_cast<MergeCPUKernel *>(kernel_)->IsReady(scope_tensors);
}
auto &in_tensors = this->in_tensors();
return std::all_of(in_tensors.begin(), in_tensors.end(), [&](lite::Tensor *in_tensor) {
if (IsContain(scope_tensors, in_tensor)) {
return in_tensor->IsReady();
} else {
@ -64,120 +43,37 @@ bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
}
void LiteKernel::InitOutTensorInitRefCount() {
for (auto *tensor : this->out_tensors_) {
for (auto *tensor : this->out_tensors()) {
size_t init_ref_count = 0;
for (auto *post_kernel : this->out_kernels_) {
auto &post_in_tensors = post_kernel->in_tensors();
init_ref_count +=
std::count_if(post_kernel->in_tensors_.begin(), post_kernel->in_tensors_.end(),
std::count_if(post_in_tensors.begin(), post_in_tensors.end(),
[&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; });
}
tensor->set_init_ref_count(init_ref_count);
}
}
int LiteKernel::FreeInWorkTensor() const {
for (auto &in_tensor : this->in_tensors_) {
MS_ASSERT(in_tensor != nullptr);
if (in_tensor->root_tensor() == in_tensor) {
continue;
}
in_tensor->DecRefCount();
}
return RET_OK;
}
int LiteKernel::PreProcess() {
if (!InferShapeDone()) {
auto ret = lite::KernelInferShape(in_tensors_, &out_tensors_, op_parameter_);
if (ret != 0) {
MS_LOG(ERROR) << "InferShape fail!";
return ret;
}
ret = ReSize();
if (ret != 0) {
MS_LOG(ERROR) << "ReSize fail!ret: " << ret;
return ret;
}
}
for (auto *output : this->out_tensors()) {
MS_ASSERT(output != nullptr);
if (desc_.data_type == kNumberTypeFloat16 && output->data_type() == kNumberTypeFloat32) {
output->set_data_type(kNumberTypeFloat16);
}
if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
MS_LOG(ERROR) << "The size of output tensor is too big";
return RET_ERROR;
}
auto ret = output->MallocData();
if (ret != RET_OK) {
MS_LOG(ERROR) << "MallocData failed";
return ret;
}
}
return RET_OK;
}
int LiteKernel::PostProcess() {
for (auto *output : this->out_tensors()) {
MS_ASSERT(output != nullptr);
output->ResetRefCount();
}
return FreeInWorkTensor();
}
int LiteKernel::Run(const KernelCallBack &before, const KernelCallBack &after) {
if (before != nullptr) {
if (!before(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_),
{this->name_, this->type_str()})) {
MS_LOG(WARNING) << "run kernel before_callback failed, name: " << this->name_;
}
}
// Support ZeroShape
size_t zero_shape_num = 0;
for (auto tensor : this->out_tensors_) {
for (size_t i = 0; i < tensor->shape().size(); i++) {
if (tensor->shape()[i] == 0) {
zero_shape_num++;
break;
}
}
}
if (zero_shape_num != this->out_tensors_.size()) {
auto ret = Run();
if (RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << this->name_;
return ret;
}
}
if (after != nullptr) {
if (!after(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_),
{this->name_, this->type_str()})) {
MS_LOG(WARNING) << "run kernel after_callback failed, name: " << this->name_;
}
}
return RET_OK;
}
std::string LiteKernel::ToString() const {
std::ostringstream oss;
oss << "LiteKernel: " << this->name_;
oss << "LiteKernel: " << this->name();
oss << ", Type: " << this->type_str();
oss << ", " << this->in_tensors_.size() << " InputTensors:";
for (auto tensor : in_tensors_) {
oss << ", " << this->in_tensors().size() << " InputTensors:";
for (auto tensor : in_tensors()) {
oss << " " << tensor;
}
oss << ", " << this->out_tensors_.size() << " OutputTensors:";
for (auto tensor : out_tensors_) {
oss << ", " << this->out_tensors().size() << " OutputTensors:";
for (auto tensor : out_tensors()) {
oss << " " << tensor;
}
oss << ", " << this->in_kernels_.size() << " InputKernels:";
for (auto in_kernel : in_kernels_) {
oss << " " << in_kernel->name_;
oss << " " << in_kernel->name();
}
oss << ", " << this->out_kernels_.size() << " OutputKernels:";
for (auto out_kernel : out_kernels_) {
oss << " " << out_kernel->name_;
oss << " " << out_kernel->name();
}
return oss.str();
}
@ -187,7 +83,7 @@ void LiteKernel::FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope
this->in_kernels_.clear();
this->out_kernels_.clear();
// find io kernels, need optimize time
for (auto *tensor : this->in_tensors_) {
for (auto *tensor : this->in_tensors()) {
for (auto *scope_kernel : scope_kernels) {
if (scope_kernel == this) {
continue;
@ -198,7 +94,7 @@ void LiteKernel::FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope
}
}
for (auto *tensor : this->out_tensors_) {
for (auto *tensor : this->out_tensors()) {
for (auto *scope_kernel : scope_kernels) {
if (scope_kernel == this) {
continue;

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -20,6 +20,7 @@
#include <vector>
#include <memory>
#include <utility>
#include <algorithm>
#include "src/common/utils.h"
#include "src/common/log_util.h"
#ifdef ENABLE_ARM
@ -31,6 +32,8 @@
#include "include/errorcode.h"
#include "schema/model_generated.h"
#include "include/context.h"
#include "src/kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
@ -62,86 +65,182 @@ enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSu
class LiteKernel {
public:
LiteKernel() = default;
LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
const lite::Context *ctx)
: op_parameter_(parameter),
in_tensors_(std::move(in_tensors)),
out_tensors_(std::move(out_tensors)),
context_(ctx) {
if (op_parameter_ != nullptr && ctx != nullptr) {
op_parameter_->thread_num_ = ctx->thread_num_;
}
LiteKernel() {
this->in_kernels_.clear();
this->out_kernels_.clear();
}
explicit LiteKernel(Kernel *kernel) : kernel_(kernel) {
this->in_kernels_.clear();
this->out_kernels_.clear();
}
virtual ~LiteKernel() {
if (op_parameter_ != nullptr) {
free(op_parameter_);
op_parameter_ = nullptr;
if (kernel_ != nullptr) {
free(kernel_);
kernel_ = nullptr;
}
}
virtual int Execute() { return Execute(nullptr, nullptr); }
virtual int Execute(const KernelCallBack &before, const KernelCallBack &after) {
if (before != nullptr) {
if (!before(TensorVectorCast(this->in_tensors()), TensorVectorCast(this->out_tensors()),
{this->name(), schema::EnumNamePrimitiveType(this->type())})) {
MS_LOG(WARNING) << "run kernel before_callback failed, name: " << this->name();
}
}
auto ret = kernel_->Execute();
if ((ret == lite::RET_OK) && (desc_.provider != kBuiltin)) {
for (auto *output : this->out_tensors()) {
MS_ASSERT(output != nullptr);
output->ResetRefCount();
}
for (auto &in_tensor : this->in_tensors()) {
MS_ASSERT(in_tensor != nullptr);
if (in_tensor->root_tensor() == in_tensor) {
continue;
}
in_tensor->DecRefCount();
}
}
if (after != nullptr) {
if (!after(TensorVectorCast(this->in_tensors()), TensorVectorCast(this->out_tensors()),
{this->name(), schema::EnumNamePrimitiveType(this->type())})) {
MS_LOG(WARNING) << "run kernel after_callback failed, name: " << this->name();
}
}
return ret;
}
// called while compiling graph
virtual int Prepare() { return mindspore::lite::RET_OK; }
// called before Run
virtual int PreProcess();
virtual int Prepare() {
MS_ASSERT(kernel_ != nullptr);
return kernel_->Prepare();
}
virtual int Run() { return mindspore::lite::RET_ERROR; }
virtual int Init() { return mindspore::lite::RET_OK; }
virtual int Run(const KernelCallBack &before, const KernelCallBack &after);
// called after Run
virtual int PostProcess();
virtual int ReSize() { return mindspore::lite::RET_ERROR; }
virtual int ReSize() {
MS_ASSERT(kernel_ != nullptr);
return kernel_->ReSize();
}
virtual void FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope_kernels);
virtual int Init() { return mindspore::lite::RET_ERROR; }
OpParameter *op_parameter() const {
MS_ASSERT(kernel_ != nullptr);
return static_cast<InnerKernel *>(kernel_)->op_parameter();
}
OpParameter *op_parameter() const { return op_parameter_; }
std::string name() const {
MS_ASSERT(kernel_ != nullptr);
return kernel_->name();
}
std::string name() const { return this->name_; }
void set_name(const std::string &name) {
MS_ASSERT(kernel_ != nullptr);
kernel_->set_name(name);
}
virtual int Train() {
this->train_mode_ = true;
return mindspore::lite::RET_OK;
MS_ASSERT(kernel_ != nullptr);
return static_cast<InnerKernel *>(kernel_)->Train();
}
virtual bool IsTrain() const { return this->train_mode_; }
virtual bool IsTrain() const {
MS_ASSERT(kernel_ != nullptr);
return static_cast<InnerKernel *>(kernel_)->IsTrain();
}
virtual int Eval() {
this->train_mode_ = false;
return mindspore::lite::RET_OK;
MS_ASSERT(kernel_ != nullptr);
return static_cast<InnerKernel *>(kernel_)->Eval();
}
virtual bool IsEval() const { return !this->train_mode_; }
virtual bool IsEval() const {
MS_ASSERT(kernel_ != nullptr);
return static_cast<InnerKernel *>(kernel_)->IsEval();
}
virtual void set_trainable(bool trainable = true) { this->trainable_ = trainable; }
virtual void set_trainable(bool trainable = true) {
MS_ASSERT(kernel_ != nullptr);
static_cast<InnerKernel *>(kernel_)->set_trainable(trainable);
}
virtual bool is_trainable() const { return this->trainable_; }
void set_name(const std::string &name) { this->name_ = name; }
virtual bool is_trainable() const {
MS_ASSERT(kernel_ != nullptr);
return static_cast<InnerKernel *>(kernel_)->is_trainable();
}
void set_is_model_output(bool is_model_output) { this->is_model_output_ = is_model_output; }
bool is_model_output() const { return this->is_model_output_; }
schema::PrimitiveType Type() const {
return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_)
: schema::PrimitiveType_NONE;
bool InferShapeDone() const {
auto shape = out_tensors().front()->shape();
if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
return false;
}
return true;
}
std::string type_str() const { return schema::EnumNamePrimitiveType(this->Type()); }
schema::PrimitiveType type() const {
MS_ASSERT(kernel_ != nullptr);
return kernel_->type();
}
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
std::string type_str() const { return schema::EnumNamePrimitiveType(this->type()); }
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) {
MS_ASSERT(kernel_ != nullptr);
if (desc_.provider == kBuiltin) {
static_cast<InnerKernel *>(kernel_)->set_in_tensors(in_tensors);
} else {
std::vector<mindspore::tensor::MSTensor *> ms_tensors(in_tensors.begin(), in_tensors.end());
kernel_->set_inputs(ms_tensors);
}
}
const std::vector<lite::Tensor *> &in_tensors() const { return this->in_tensors_; }
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) {
MS_ASSERT(kernel_ != nullptr);
if (desc_.provider == kBuiltin) {
static_cast<InnerKernel *>(kernel_)->set_out_tensors(out_tensors);
} else {
std::vector<mindspore::tensor::MSTensor *> ms_tensors(out_tensors.begin(), out_tensors.end());
kernel_->set_outputs(ms_tensors);
}
}
const std::vector<lite::Tensor *> &out_tensors() const { return this->out_tensors_; }
const std::vector<lite::Tensor *> &in_tensors() const {
MS_ASSERT(kernel_ != nullptr);
if (desc_.provider == kBuiltin) {
return static_cast<InnerKernel *>(kernel_)->in_tensors();
} else {
auto &ms_tensors = kernel_->inputs();
mutable_in_tensors_.resize(ms_tensors.size());
(void)std::transform(ms_tensors.begin(), ms_tensors.end(), mutable_in_tensors_.begin(),
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
return mutable_in_tensors_;
}
}
const std::vector<lite::Tensor *> &out_tensors() const {
MS_ASSERT(kernel_ != nullptr);
if (desc_.provider == kBuiltin) {
return static_cast<InnerKernel *>(kernel_)->out_tensors();
} else {
auto &ms_tensors = kernel_->outputs();
mutable_out_tensors_.resize(ms_tensors.size());
(void)std::transform(ms_tensors.begin(), ms_tensors.end(), mutable_out_tensors_.begin(),
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
return mutable_out_tensors_;
}
}
void AddInKernel(LiteKernel *kernel) {
if (!lite::IsContain(this->in_kernels_, kernel)) {
@ -167,63 +266,41 @@ class LiteKernel {
virtual void InitOutTensorInitRefCount();
virtual int FreeInWorkTensor() const;
KernelKey desc() const { return desc_; }
void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; }
SubGraphType subgraph_type() const { return this->subgraph_type_; }
const lite::Context *context() const { return this->context_; }
const lite::InnerContext *Context() const {
MS_ASSERT(kernel_ != nullptr);
return static_cast<const lite::InnerContext *>(kernel_->context());
}
virtual std::string ToString() const;
#ifdef SUPPORT_TRAIN
void set_workspace_size(size_t value) { workspace_size_ = value; }
size_t workspace_size() { return workspace_size_; }
static void AllocWorkspace(size_t size);
static void FreeWorkspace();
void *workspace() { return workspace_; }
int DecOutTensorRefCount();
#endif
bool InferShapeDone() const {
auto shape = out_tensors_.front()->shape();
if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
return false;
}
return true;
}
Kernel *kernel() { return kernel_; }
protected:
KernelKey desc_{};
std::string name_;
OpParameter *op_parameter_ = nullptr;
Kernel *kernel_ = nullptr;
KernelKey desc_;
// tensor will free in ~lite_session()
std::vector<lite::Tensor *> in_tensors_;
std::vector<lite::Tensor *> out_tensors_;
const lite::Context *context_ = nullptr;
std::vector<LiteKernel *> in_kernels_;
std::vector<LiteKernel *> out_kernels_;
bool train_mode_ = false;
bool trainable_ = false; // parameters of this Kernel are trained in Train Session
mutable std::vector<lite::Tensor *> mutable_in_tensors_;
mutable std::vector<lite::Tensor *> mutable_out_tensors_;
bool is_model_output_ = false;
SubGraphType subgraph_type_ = kNotSubGraph;
#ifdef SUPPORT_TRAIN
size_t workspace_size_ = 0;
static void *workspace_;
#endif
};
typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const KernelKey &desc);
typedef InnerKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const KernelKey &desc);
template <class T>
kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
kernel::InnerKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr.";
@ -241,4 +318,4 @@ kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
}
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_LITE_KERNEL_H_
#endif // MINDSPORE_LITE_SRC_INNER_KERNEL_H_

View File

@ -202,9 +202,9 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
return false;
}
for (auto &node : subgraph_kernel->nodes()) {
if (node->Type() == schema::PrimitiveType_Switch &&
if (node->type() == schema::PrimitiveType_Switch &&
InputsContainsSpecificNode(node, schema::PrimitiveType_PartialFusion) && node->out_kernels().size() == 1 &&
node->out_kernels().front()->Type() == schema::PrimitiveType_Call) {
node->out_kernels().front()->type() == schema::PrimitiveType_Call) {
return true;
}
}
@ -215,7 +215,7 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
kernel::LiteKernel *LiteKernelUtil::GetInputsSpecificNode(const kernel::LiteKernel *kernel,
const schema::PrimitiveType &primitive_type) {
for (auto input : kernel->in_kernels()) {
if (input->Type() == primitive_type) {
if (input->type() == primitive_type) {
return input;
}
}

View File

@ -16,8 +16,9 @@
#ifndef MINDSPORE_LITE_SRC_LITE_KERNEL_UTIL_H_
#define MINDSPORE_LITE_SRC_LITE_KERNEL_UTIL_H_
#include "src/lite_kernel.h"
#include <vector>
#include "src/lite_kernel.h"
namespace mindspore::kernel {
class LiteKernelUtil {

View File

@ -59,7 +59,7 @@ int LiteOpActor::CompileArrowThroughPartialCall() {
return RET_OK;
}
for (auto &node : subgraph_kernel->nodes()) {
if (node->Type() != schema::PrimitiveType_Call) {
if (node->type() != schema::PrimitiveType_Call) {
continue;
}
call_node_ = node;
@ -290,7 +290,7 @@ int LiteSwitchOpActor::CompileFalseBranchArrow() {
int LiteSwitchOpActor::GetSwitchAndCallNode(kernel::SubGraphKernel *subgraph_kernel) {
for (auto &node : subgraph_kernel->nodes()) {
if (node->Type() != schema::PrimitiveType_Call) {
if (node->type() != schema::PrimitiveType_Call) {
continue;
}
call_node_ = node;

View File

@ -47,8 +47,8 @@ class LiteOpActor : public OpActor<lite::Tensor> {
return;
}
CpuBindMode cpu_bind_mode = kernel_->context()->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_;
BindThreads(static_cast<const lite::InnerContext *>(kernel_->context())->thread_pool_, true, cpu_bind_mode);
CpuBindMode cpu_bind_mode = kernel_->Context()->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_;
BindThreads(static_cast<const lite::InnerContext *>(kernel_->Context())->thread_pool_, true, cpu_bind_mode);
int ret = CheckInputData();
if (ret != RET_OK) {
@ -78,7 +78,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
inputs_data_.clear();
AsyncOutput(context);
BindThreads(static_cast<const lite::InnerContext *>(kernel_->context())->thread_pool_, false, cpu_bind_mode);
BindThreads(static_cast<const lite::InnerContext *>(kernel_->Context())->thread_pool_, false, cpu_bind_mode);
SetOutputData(context);
for (auto &input_data : inputs_data_) {
@ -101,22 +101,11 @@ class LiteOpActor : public OpActor<lite::Tensor> {
}
virtual int CompileArrow();
int RunKernel(const KernelCallBack &before, const KernelCallBack &after) {
int ret = kernel_->PreProcess();
if (RET_OK != ret) {
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel_->name();
return ret;
}
ret = kernel_->Run(before, after);
auto ret = kernel_->Execute(before, after);
if (RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
return ret;
}
ret = kernel_->PostProcess();
if (RET_OK != ret) {
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel_->name();
return ret;
}
return ret;
}

View File

@ -366,7 +366,7 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
for (auto *kernel : kernels) {
MS_ASSERT(kernel != nullptr);
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
if (!IsPackedOp(kernel->Type())) {
if (!IsPackedOp(kernel->type())) {
continue;
}
} else {

View File

@ -85,7 +85,7 @@ int MindrtExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vect
const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator,
const KernelCallBack &before, const KernelCallBack &after) {
MS_ASSERT(nullptr != allocator);
if (kernels.front()->Type() != schema::PrimitiveType_Merge) {
if (kernels.front()->type() != schema::PrimitiveType_Merge) {
auto ret = CheckTensorsInvalid(in_tensors);
if (RET_OK != ret) {
MS_LOG(ERROR) << "CheckInputs failed";

View File

@ -19,13 +19,14 @@
#include <string>
#include <vector>
#include "schema/ops_generated.h"
#include "src/lite_kernel.h"
namespace mindspore {
namespace kernel {
typedef kernel::LiteKernel *(*CreateKernel)(const std::vector<tensor::MSTensor *> &inputs,
const std::vector<tensor::MSTensor *> &outputs,
const schema::Primitive *primitive, const lite::Context *ctx);
typedef kernel::Kernel *(*CreateKernel)(const std::vector<tensor::MSTensor *> &inputs,
const std::vector<tensor::MSTensor *> &outputs,
const schema::Primitive *primitive, const lite::Context *ctx);
class RegisterKernel {
public:
static RegisterKernel *GetInstance();

View File

@ -339,7 +339,7 @@ int NPUFusionPass::Run() {
for (size_t i = 0; i < kernels->size(); i++) {
auto kernel = (*kernels)[i];
if (CheckFusion(kernel)) {
switch (kernel->Type()) {
switch (kernel->type()) {
case schema::PrimitiveType_Split:
i -= kernel->in_kernels().size();
SplitFusion(kernel);

View File

@ -48,7 +48,7 @@ std::set<mindspore::schema::PrimitiveType> npu_insert_nodes = {
int NPUInsertTransformPass::GetInsertState(kernel::LiteKernel *kernel) {
// filter out irrelevant kernel
if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) {
if (npu_insert_nodes.find(kernel->type()) == npu_insert_nodes.end()) {
return InsertNone;
}

View File

@ -46,18 +46,24 @@ kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor
transpose_param->perm_[3] = 1;
transpose_param->num_axes_ = 4;
auto kernel = new (std::nothrow)
auto inner_kernel = new (std::nothrow)
kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param), in_tensors, out_tensors, ctx);
if (kernel != nullptr) {
kernel->set_desc(key);
if (inner_kernel != nullptr) {
auto *kernel = new (std::nothrow) kernel::LiteKernel(inner_kernel);
if (kernel != nullptr) {
kernel->set_desc(key);
kernel->set_name(name);
return kernel;
} else {
free(transpose_param);
delete inner_kernel;
}
} else {
MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed.";
free(transpose_param);
return nullptr;
}
kernel->set_name(name);
return kernel;
return nullptr;
}
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
@ -77,17 +83,24 @@ kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor
transpose_param->perm_[3] = 2;
transpose_param->num_axes_ = 4;
auto kernel = new (std::nothrow)
auto inner_kernel = new (std::nothrow)
kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param), in_tensors, out_tensors, ctx);
if (kernel != nullptr) {
kernel->set_desc(key);
if (inner_kernel != nullptr) {
auto *kernel = new (std::nothrow) kernel::LiteKernel(inner_kernel);
if (kernel != nullptr) {
kernel->set_desc(key);
kernel->set_name(name);
return kernel;
} else {
free(transpose_param);
delete inner_kernel;
}
} else {
MS_LOG(ERROR) << "New Nhwc2Nchw Kernel failed.";
return nullptr;
free(transpose_param);
}
kernel->set_name(name);
return kernel;
return nullptr;
}
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
@ -187,7 +200,7 @@ bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) {
if (kernel == nullptr) {
return false;
}
if (kernel->Type() != schema::PrimitiveType_Transpose) {
if (kernel->type() != schema::PrimitiveType_Transpose) {
return false;
}
auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter());
@ -207,7 +220,7 @@ bool NPUPassUtils::IsNchw2Nhwc(const kernel::LiteKernel *kernel) {
if (kernel == nullptr) {
return false;
}
if (kernel->Type() != schema::PrimitiveType_Transpose) {
if (kernel->type() != schema::PrimitiveType_Transpose) {
return false;
}
auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter());

View File

@ -36,7 +36,7 @@ int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<ker
MS_LOG(ERROR) << "NPU Transform pass does not find in kernel with 4d output";
return RET_ERROR;
}
if (is_input_kernel || (*it)->desc().arch != kNPU || npu_trans_nodes.find((*it)->Type()) == npu_trans_nodes.end()) {
if (is_input_kernel || (*it)->desc().arch != kNPU || npu_trans_nodes.find((*it)->type()) == npu_trans_nodes.end()) {
kernel::LiteKernel *pre_kernel = nullptr;
if (!is_input_kernel) {
pre_kernel = *it;
@ -95,7 +95,7 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke
std::vector<kernel::LiteKernel *> post_non_insert_kernels;
for (int i = 0; i < kernel->out_kernels().size(); i++) {
auto post_kernel = kernel->out_kernels()[i];
if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) {
if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->type()) == npu_trans_nodes.end()) {
post_insert_kernels.push_back(post_kernel);
} else {
post_non_insert_kernels.push_back(post_kernel);
@ -186,15 +186,15 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke
int NPUTransformPass::Run() {
for (size_t i = 0; i < all_kernels_->size();) {
auto kernel = (*all_kernels_)[i];
if (kernel->desc().arch != kNPU || npu_trans_nodes.find(kernel->Type()) == npu_trans_nodes.end()) {
if (kernel->desc().arch != kNPU || npu_trans_nodes.find(kernel->type()) == npu_trans_nodes.end()) {
i++;
continue;
}
if (kernel->Type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) {
if (kernel->type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) {
i++;
continue;
}
if (kernel->Type() == schema::PrimitiveType_Resize &&
if (kernel->type() == schema::PrimitiveType_Resize &&
kernel->in_tensors()[0]->Height() > kernel->out_tensors()[0]->Height()) {
i++;
continue;

View File

@ -88,9 +88,9 @@ std::shared_ptr<domi::ModelBufferData> SubGraphNpuKernel::BuildIRModel() {
return om_model_buff;
}
int SubGraphNpuKernel::Run() {
int SubGraphNpuKernel::Execute() {
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)
->Run(in_tensors_, out_tensor_sorted_, in_nodes_, nodes_);
->Run(in_tensors(), out_tensor_sorted_, in_nodes_, nodes_);
}
int SubGraphNpuKernel::BuildNPUInputOp() {
@ -120,7 +120,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
return RET_ERROR;
}
// input come from npu
auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel->kernel())->GetNPUOp();
if (npu_op == nullptr) {
MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr.";
return RET_ERROR;
@ -138,7 +138,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
// weight tensor
if (is_weight_tensor) {
if (npu_specific_weight_nodes.find(node->Type()) == npu_specific_weight_nodes.end()) {
if (npu_specific_weight_nodes.find(node->type()) == npu_specific_weight_nodes.end()) {
auto name = node->name() + "_" + std::to_string(count++);
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
if (weight_const == nullptr) {
@ -153,8 +153,8 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
}
}
// set input to NPU
int ret = reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op,
index2_multi_out_index);
int ret = reinterpret_cast<NPUKernel *>(node->kernel())
->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op, index2_multi_out_index);
if (ret != RET_OK) {
MS_LOG(ERROR) << node->name() << " set npu inputs failed.";
return RET_ERROR;
@ -170,7 +170,7 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
std::vector<ge::Operator> ops;
ops.reserve(nodes.size());
for (int i = 0; i < nodes.size(); i++) {
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i]->kernel())->GetNPUOp());
}
return ops;
}
@ -178,11 +178,12 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
int SubGraphNpuKernel::BuildNPUOutputOp() {
subgraph_output_op_.clear();
subgraph_output_op_ = GetNPUNodes(out_nodes_);
out_tensor_sorted_.resize(out_tensors_.size());
out_tensor_sorted_.resize(out_tensors().size());
int i = 0;
auto out_tensors = this->out_tensors();
for (auto node : out_nodes_) {
for (auto tensor : node->out_tensors()) {
if (std::find(out_tensors_.begin(), out_tensors_.end(), tensor) != out_tensors_.end())
if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end())
this->out_tensor_sorted_[i++] = tensor;
}
}
@ -193,11 +194,11 @@ int SubGraphNpuKernel::BuildNPUOutputOp() {
return RET_OK;
}
std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; }
std::string SubGraphNpuKernel::GetOMModelName() { return this->name() + ".om"; }
int SubGraphNpuKernel::Init() {
if (!is_compiled_) {
name_ = "kNpuSubGraph" + std::to_string(npu_manager_->index());
this->set_name("kNpuSubGraph" + std::to_string(npu_manager_->index()));
auto model_buffer_data = BuildIRModel();
if (model_buffer_data == nullptr) {
MS_LOG(ERROR) << "Build IR model failed.";
@ -206,7 +207,7 @@ int SubGraphNpuKernel::Init() {
MS_ASSERT(npu_manager_ != nullptr);
int frequency = static_cast<const lite::InnerContext *>(context_)->GetNpuInfo().frequency_;
int frequency = static_cast<const lite::InnerContext *>(this->Context())->GetNpuInfo().frequency_;
if (frequency != hiai::AiModelDescription_Frequency_LOW && frequency != hiai::AiModelDescription_Frequency_MEDIUM &&
frequency != hiai::AiModelDescription_Frequency_HIGH &&
frequency != hiai::AiModelDescription_Frequency_EXTREME) {
@ -226,7 +227,7 @@ int SubGraphNpuKernel::Init() {
}
int SubGraphNpuKernel::Prepare() {
if (executor_->Prepare(nodes_, in_tensors_, out_tensors_) != RET_OK) {
if (executor_->Prepare(nodes_, in_tensors(), out_tensors()) != RET_OK) {
MS_LOG(ERROR) << "NPU executor prepare failed.";
return RET_ERROR;
}

View File

@ -32,11 +32,10 @@ using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
class SubGraphNpuKernel : public SubGraphKernel {
public:
SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels,
SubGraphNpuKernel(const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes,
const lite::InnerContext *ctx = nullptr, lite::NPUManager *npu_manager = nullptr)
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx), npu_manager_(npu_manager) {
Kernel *kernel, lite::NPUManager *npu_manager = nullptr)
: SubGraphKernel(inKernels, outKernels, nodes, kernel), npu_manager_(npu_manager) {
subgraph_type_ = kNpuSubGraph;
desc_.arch = kernel::KERNEL_ARCH::kNPU;
}
@ -47,13 +46,9 @@ class SubGraphNpuKernel : public SubGraphKernel {
int Prepare() override;
int PreProcess() override { return RET_OK; }
int Execute() override;
int Run() override;
int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }
int PostProcess() override { return RET_OK; }
int Execute(const KernelCallBack &before, const KernelCallBack &after) override { return this->Execute(); }
int ReSize() override {
MS_LOG(ERROR) << "NPU does not support the resize function temporarily.";

View File

@ -49,18 +49,7 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
}
}
auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel);
ret = kernel->PreProcess();
if (RET_OK != ret) {
if (is_tune) {
MS_LOG(WARNING) << "PreProcess kernel failed, name: " << kernel->name() << " in tuning";
opencl_runtime_ins->SetProfiling(profiling_tmp);
return RET_OK;
} else {
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name();
return ret;
}
}
auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel->kernel());
// Support ZeroShape
size_t zero_shape_num = 0;
for (auto tensor : kernel->out_tensors()) {
@ -79,7 +68,7 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
return ret;
}
} else {
ret = kernel->Run();
ret = kernel->Execute();
if (ret != RET_OK) {
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
return ret;
@ -92,11 +81,6 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
}
}
}
ret = kernel->PostProcess();
if (ret != RET_OK) {
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name();
return ret;
}
if (after != nullptr) {
if (!after(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) {
MS_LOG(ERROR) << "run kernel after_callback failed, name: " << kernel->name();

View File

@ -51,7 +51,7 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
return RET_ERROR;
}
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
OpParameter *parameter) {
std::vector<TensorC *> in_tensors;
std::vector<TensorC *> out_tensors;
@ -84,7 +84,7 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite
}
if (reinterpret_cast<TensorListC *>(out_tensors.at(i))->data_type_ == TypeIdC::kObjectTypeTensorType) {
auto *tensor_list_c = reinterpret_cast<TensorListC *>(out_tensors.at(i));
auto *tensor_list = reinterpret_cast<TensorList *>(outputs->at(i));
auto *tensor_list = reinterpret_cast<TensorList *>(outputs.at(i));
tensor_list->set_shape({static_cast<int>(tensor_list_c->element_num_)});
auto tensor_shape = std::vector<std::vector<int>>(
tensor_list_c->element_num_,
@ -93,10 +93,10 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite
tensor_list->MallocTensorListData(static_cast<TypeId>(tensor_list_c->data_type_), tensor_shape);
TensorListC2TensorList(tensor_list_c, tensor_list);
} else {
TensorC2Tensor(out_tensors.at(i), outputs->at(i));
TensorC2Tensor(out_tensors.at(i), outputs.at(i));
}
if (ret == NNACL_INFER_INVALID) {
outputs->at(i)->set_shape({-1});
outputs.at(i)->set_shape({-1});
}
}

View File

@ -25,7 +25,7 @@
#include "nnacl/infer/infer.h"
namespace mindspore::lite {
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
int KernelInferShape(const std::vector<lite::Tensor *> &tensors_in, const std::vector<lite::Tensor *> &outputs,
OpParameter *parameter);
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
const void *primitive);

View File

@ -23,14 +23,14 @@
#include "nnacl/fp16/arg_min_max_fp16.h"
#endif
#include "nnacl/common_func.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
class ArgMinMaxCPUKernel : public LiteKernel {
class ArgMinMaxCPUKernel : public InnerKernel {
public:
ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
arg_param_ = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
}

View File

@ -17,14 +17,14 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_ASSERT_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
class AssertCPUKernel : public LiteKernel {
class AssertCPUKernel : public InnerKernel {
public:
AssertCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~AssertCPUKernel() override {}
int Init() override;

View File

@ -23,11 +23,11 @@
// this file is useless when move create actor before schedule.
namespace mindspore::kernel {
class CallCPUKernel : public LiteKernel {
class CallCPUKernel : public InnerKernel {
public:
CallCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~CallCPUKernel() override = default;
int Init() override;
int ReSize() override;

View File

@ -17,16 +17,16 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensor.h"
#include "src/tensorlist.h"
namespace mindspore::kernel {
class CarryDataKernel : public LiteKernel {
class CarryDataKernel : public InnerKernel {
public:
CarryDataKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~CarryDataKernel() override = default;
protected:

View File

@ -18,18 +18,18 @@
#include <vector>
#include "include/errorcode.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/context.h"
#include "nnacl/constant_of_shape_parameter.h"
#include "nnacl/fp32/constant_of_shape_fp32.h"
#include "nnacl/fp16/constant_of_shape_fp16.h"
namespace mindspore::kernel {
class ConstantOfShapeCPUKernel : public LiteKernel {
class ConstantOfShapeCPUKernel : public InnerKernel {
public:
ConstantOfShapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<ConstantOfShapeParameter *>(parameter);
}
~ConstantOfShapeCPUKernel() override = default;

View File

@ -27,7 +27,7 @@
#include <android/log.h>
#endif
#endif
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/context.h"
#include "src/runtime/kernel/arm/base/layout_transform.h"
#include "src/weight_decoder.h"
@ -35,11 +35,11 @@
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class ConvolutionBaseCPUKernel : public LiteKernel {
class ConvolutionBaseCPUKernel : public InnerKernel {
public:
ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
op_parameter_->thread_num_ = ctx->thread_num_;
conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
}

View File

@ -18,15 +18,15 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CROP_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/crop_parameter.h"
namespace mindspore::kernel {
class CropBaseCPUKernel : public LiteKernel {
class CropBaseCPUKernel : public InnerKernel {
public:
CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const mindspore::lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_);
crop_para_->thread_count_ = op_parameter_->thread_num_;
}

View File

@ -18,18 +18,18 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_DEPTH_TO_SPACE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/errorcode.h"
#include "include/context.h"
#include "nnacl/nnacl_common.h"
#include "nnacl/depth_to_space_parameter.h"
namespace mindspore::kernel {
class DepthToSpaceBaseCPUKernel : public LiteKernel {
class DepthToSpaceBaseCPUKernel : public InnerKernel {
public:
DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<DepthToSpaceParameter *>(op_parameter_);
}
virtual ~DepthToSpaceBaseCPUKernel() = default;

View File

@ -132,16 +132,6 @@ void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() {
context_->allocator->Free(params_->selected_);
params_->selected_ = nullptr;
}
if (desc_.data_type == kNumberTypeInt8) {
if (input_boxes_ != nullptr) {
context_->allocator->Free(input_boxes_);
input_boxes_ = nullptr;
}
if (input_scores_ != nullptr) {
context_->allocator->Free(input_scores_);
input_scores_ = nullptr;
}
}
}
int DetectionPostProcessBaseCPUKernel::ParamInit() {

View File

@ -18,18 +18,18 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/context.h"
#include "nnacl/fp32/detection_post_process_fp32.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class DetectionPostProcessBaseCPUKernel : public LiteKernel {
class DetectionPostProcessBaseCPUKernel : public InnerKernel {
public:
DetectionPostProcessBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {
: InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {
params_ = reinterpret_cast<DetectionPostProcessParameter *>(parameter);
}
virtual ~DetectionPostProcessBaseCPUKernel();
@ -37,6 +37,7 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel {
int Init() override;
int ReSize() override;
int Run() override;
virtual void FreeAllocatedBuffer();
int thread_num_ = 1;
int num_boxes_ = 0;
@ -48,9 +49,6 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel {
protected:
virtual int GetInputData() = 0;
int ParamInit();
private:
void FreeAllocatedBuffer();
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_

View File

@ -75,7 +75,7 @@ void GroupConvolutionBaseCPUKernel::FreeSubKernel() {
int GroupConvolutionBaseCPUKernel::PreProcess() {
if (!InferShapeDone()) {
auto ret = lite::KernelInferShape(in_tensors_, &out_tensors_, op_parameter_);
auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
if (ret != 0) {
MS_LOG(ERROR) << "InferShape fail!";
return ret;
@ -118,7 +118,7 @@ int GroupConvolutionBaseCPUKernel::PreProcess() {
}
}
auto outputs = this->out_tensors();
auto outputs = this->out_tensors_;
for (auto *output : outputs) {
MS_ASSERT(output != nullptr);
auto ret = output->MallocData();
@ -132,8 +132,8 @@ int GroupConvolutionBaseCPUKernel::PreProcess() {
}
int GroupConvolutionBaseCPUKernel::Run() {
ori_in_data_ = in_tensors().front()->data_c();
ori_out_data_ = out_tensors().front()->data_c();
ori_in_data_ = in_tensors_[0]->data_c();
ori_out_data_ = out_tensors_[0]->data_c();
for (int i = 0; i < group_num_; ++i) {
// first, separate group conv input into several parts. This step must be in runtime stage.
auto ret = SeparateInput(i);

View File

@ -19,7 +19,7 @@
#include <utility>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/op_base.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp32/conv_common_fp32.h"
@ -29,7 +29,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel {
public:
GroupConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
std::vector<kernel::LiteKernel *> group_convs, const int group_num)
std::vector<kernel::InnerKernel *> group_convs, const int group_num)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
group_convs_(std::move(group_convs)),
group_num_(group_num) {} // opParameter(in channel, out channel) in this kernel has been split to groups, if
@ -45,7 +45,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel {
void FreeSubKernel();
protected:
std::vector<kernel::LiteKernel *> group_convs_;
std::vector<kernel::InnerKernel *> group_convs_;
const int group_num_;
void *ori_in_data_ = nullptr; // do not free
void *ori_out_data_ = nullptr; // do not free

View File

@ -19,7 +19,7 @@
#include <utility>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/conv_parameter.h"
namespace mindspore::kernel {
@ -48,7 +48,7 @@ class GroupConvCreator {
public:
void SetShapeOfTensors();
std::vector<kernel::LiteKernel *> *get_group_conv() { return &group_convs_; }
std::vector<kernel::InnerKernel *> *get_group_conv() { return &group_convs_; }
void CopyQuantParam(std::vector<lite::Tensor *> *tensors);
int GetSingleConvParam(ConvParameter *conv_param, std::vector<lite::Tensor *> *new_inputs,
std::vector<lite::Tensor *> *new_outputs, int group_id);
@ -66,7 +66,7 @@ class GroupConvCreator {
private:
std::vector<lite::Tensor *> origin_inputs_;
std::vector<lite::Tensor *> origin_outputs_;
std::vector<kernel::LiteKernel *> group_convs_;
std::vector<kernel::InnerKernel *> group_convs_;
std::vector<int> input_shape_;
std::vector<int> output_shape_;
std::vector<int> filter_shape_;

View File

@ -75,7 +75,7 @@ int MergeCPUKernel::ReSize() { return RET_OK; }
InputPart MergeCPUKernel::FindReadyPart(const std::vector<lite::Tensor *> &scope_tensors) {
MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size());
bool is_root_tensor_ready =
std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) {
std::all_of(this->in_tensors_.begin(), this->in_tensors_.end(), [&](lite::Tensor *in_tensor) {
// if not in scope_tensors, not care
if (!IsContain(scope_tensors, in_tensor)) {
return true;
@ -94,12 +94,12 @@ InputPart MergeCPUKernel::FindReadyPart(const std::vector<lite::Tensor *> &scope
// if not in scope_tensors, not care
// if in scope_tensors, in_tensor need to be ready
if (std::all_of(
this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(),
this->in_tensors_.begin() + in_tensors().size() / 2, this->in_tensors_.end(),
[&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) {
return RIGHT_INPUT_PART;
}
if (std::all_of(
this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2,
this->in_tensors_.begin(), this->in_tensors_.begin() + in_tensors().size() / 2,
[&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) {
return LEFT_INPUT_PART;
}

View File

@ -29,7 +29,7 @@ class MergeCPUKernel : public CarryDataKernel {
MergeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: CarryDataKernel(parameter, inputs, outputs, ctx) {}
bool IsReady(const std::vector<lite::Tensor *> &scope_tensors) override;
bool IsReady(const std::vector<lite::Tensor *> &scope_tensors);
~MergeCPUKernel() override = default;
int FreeInWorkTensor() const override;
int Init() override;

View File

@ -23,11 +23,11 @@
// this file is going to be removed when move create actor before schedule.
namespace mindspore::kernel {
class PartialFusionKernel : public LiteKernel {
class PartialFusionKernel : public InnerKernel {
public:
PartialFusionKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~PartialFusionKernel() override = default;
int Init() override;
int ReSize() override;

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_POOLING_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/fp32/pooling_fp32.h"
#include "include/errorcode.h"
@ -26,11 +26,11 @@ using mindspore::lite::InnerContext;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace mindspore::kernel {
class PoolingBaseCPUKernel : public LiteKernel {
class PoolingBaseCPUKernel : public InnerKernel {
public:
PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
pooling_param_ = reinterpret_cast<PoolingParameter *>(op_parameter_);
}
~PoolingBaseCPUKernel() = default;

View File

@ -18,18 +18,18 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_PRIOR_BOX_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/reshape_parameter.h"
#include "nnacl/fp32/prior_box_fp32.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class PriorBoxCPUKernel : public LiteKernel {
class PriorBoxCPUKernel : public InnerKernel {
public:
PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(op_parameter_);
}
~PriorBoxCPUKernel() = default;

View File

@ -18,14 +18,14 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_QUANTDTYPECAST_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
class QuantDTypeCastCPUKernel : public LiteKernel {
class QuantDTypeCastCPUKernel : public InnerKernel {
public:
QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
: InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
~QuantDTypeCastCPUKernel() = default;
int Init() override;

View File

@ -18,17 +18,17 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RANDOM_STANDARD_NORMAL_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/random_parameter.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class RandomStandardNormalCPUKernel : public LiteKernel {
class RandomStandardNormalCPUKernel : public InnerKernel {
public:
RandomStandardNormalCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<RandomParam *>(parameter);
}
~RandomStandardNormalCPUKernel() override = default;

View File

@ -18,16 +18,16 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/reduce_parameter.h"
namespace mindspore::kernel {
class ReduceBaseCPUKernel : public LiteKernel {
class ReduceBaseCPUKernel : public InnerKernel {
public:
ReduceBaseCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(param, inputs, outputs, ctx) {}
: InnerKernel(param, inputs, outputs, ctx) {}
virtual ~ReduceBaseCPUKernel() = default;
int Init() override;

View File

@ -17,16 +17,16 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/context.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class ReshapeBaseCPUKernel : public LiteKernel {
class ReshapeBaseCPUKernel : public InnerKernel {
public:
ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~ReshapeBaseCPUKernel() override = default;
int Init() override;

View File

@ -17,18 +17,18 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESIZE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/resize_parameter.h"
using mindspore::schema::PrimitiveType_Resize;
using mindspore::schema::ResizeMethod;
namespace mindspore::kernel {
class ResizeBaseCPUKernel : public LiteKernel {
class ResizeBaseCPUKernel : public InnerKernel {
public:
ResizeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~ResizeBaseCPUKernel() override = default;

View File

@ -18,7 +18,7 @@
#include <vector>
#include "src/runtime/kernel/arm/base/carry_data.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
namespace mindspore::kernel {

View File

@ -17,15 +17,15 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SLICE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/slice_parameter.h"
namespace mindspore::kernel {
class SliceCPUKernel : public LiteKernel {
class SliceCPUKernel : public InnerKernel {
public:
SliceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<SliceParameter *>(op_parameter_);
}
~SliceCPUKernel() = default;

View File

@ -18,15 +18,15 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SOFTMAX_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/softmax_parameter.h"
namespace mindspore::kernel {
class SoftmaxBaseCPUKernel : public LiteKernel {
class SoftmaxBaseCPUKernel : public InnerKernel {
public:
SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
softmax_param_ = reinterpret_cast<SoftmaxParameter *>(op_parameter_);
}
~SoftmaxBaseCPUKernel() = default;

View File

@ -20,16 +20,16 @@
#include <vector>
#include "include/errorcode.h"
#include "include/context.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/split_parameter.h"
#include "nnacl/base/split_base.h"
namespace mindspore::kernel {
class SplitBaseCPUKernel : public LiteKernel {
class SplitBaseCPUKernel : public InnerKernel {
public:
SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param = reinterpret_cast<SplitParameter *>(op_parameter_);
}
~SplitBaseCPUKernel() override {

View File

@ -25,11 +25,11 @@
#include "nnacl/base/split_with_over_lap_base.h"
namespace mindspore::kernel {
class SplitWithOverlapBaseCPUKernel : public LiteKernel {
class SplitWithOverlapBaseCPUKernel : public InnerKernel {
public:
SplitWithOverlapBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param = reinterpret_cast<SplitWithOverlapParameter *>(op_parameter_);
}
~SplitWithOverlapBaseCPUKernel() override = default;

View File

@ -17,16 +17,16 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/stack_parameter.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class StackBaseCPUKernel : public LiteKernel {
class StackBaseCPUKernel : public InnerKernel {
public:
StackBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~StackBaseCPUKernel() override = default;
int Init() override;

View File

@ -19,14 +19,14 @@
#include <vector>
#include "nnacl/fp32/strided_slice_fp32.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
class StridedSliceCPUKernel : public LiteKernel {
class StridedSliceCPUKernel : public InnerKernel {
public:
StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<StridedSliceParameter *>(parameter);
}
~StridedSliceCPUKernel() override = default;

View File

@ -18,7 +18,7 @@
#include <vector>
#include "src/runtime/kernel/arm/base/carry_data.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
namespace mindspore::kernel {

View File

@ -18,17 +18,17 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTFROMTENSOR_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
#include "schema/model_generated.h"
#include "nnacl/tensorlist_parameter.h"
namespace mindspore::kernel {
class TensorListFromTensorCPUKernel : public LiteKernel {
class TensorListFromTensorCPUKernel : public InnerKernel {
public:
TensorListFromTensorCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx),
: InnerKernel(parameter, inputs, outputs, ctx),
dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
~TensorListFromTensorCPUKernel() = default;

View File

@ -18,17 +18,17 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTGETITEM_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
#include "schema/model_generated.h"
#include "nnacl/tensorlist_parameter.h"
namespace mindspore::kernel {
class TensorListGetItemCPUKernel : public LiteKernel {
class TensorListGetItemCPUKernel : public InnerKernel {
public:
TensorListGetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx),
: InnerKernel(parameter, inputs, outputs, ctx),
dtype_(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_) {}
~TensorListGetItemCPUKernel() = default;

View File

@ -18,17 +18,17 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTRESERVE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
#include "schema/model_generated.h"
#include "nnacl/tensorlist_parameter.h"
namespace mindspore::kernel {
class TensorListReserveCPUKernel : public LiteKernel {
class TensorListReserveCPUKernel : public InnerKernel {
public:
TensorListReserveCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx),
: InnerKernel(parameter, inputs, outputs, ctx),
element_dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
~TensorListReserveCPUKernel() = default;

View File

@ -18,17 +18,17 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSETITEM_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
#include "schema/model_generated.h"
#include "nnacl/tensorlist_parameter.h"
namespace mindspore::kernel {
class TensorListSetItemCPUKernel : public LiteKernel {
class TensorListSetItemCPUKernel : public InnerKernel {
public:
TensorListSetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~TensorListSetItemCPUKernel() = default;
int Init() override;

View File

@ -19,17 +19,17 @@
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/tensorlist.h"
#include "schema/model_generated.h"
#include "nnacl/tensorlist_parameter.h"
namespace mindspore::kernel {
class TensorListStackCPUKernel : public LiteKernel {
class TensorListStackCPUKernel : public InnerKernel {
public:
TensorListStackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx),
: InnerKernel(parameter, inputs, outputs, ctx),
num_element_(reinterpret_cast<TensorListParameter *>(parameter)->num_element_),
dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
~TensorListStackCPUKernel() = default;

View File

@ -17,15 +17,15 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_TILE_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/base/tile_base.h"
namespace mindspore::kernel {
class TileCPUKernel : public LiteKernel {
class TileCPUKernel : public InnerKernel {
public:
TileCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~TileCPUKernel() override = default;
int Init() override;

View File

@ -18,16 +18,16 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_ACTIVATION_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/fp32/activation_fp32.h"
#include "nnacl/fp16/activation_fp16.h"
namespace mindspore::kernel {
class ActivationFp16CPUKernel : public LiteKernel {
class ActivationFp16CPUKernel : public InnerKernel {
public:
ActivationFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(param, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {
: InnerKernel(param, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {
type_ = (reinterpret_cast<ActivationParameter *>(param))->type_;
alpha_ = (float16_t)((reinterpret_cast<ActivationParameter *>(param))->alpha_);
min_val_ = (reinterpret_cast<ActivationParameter *>(param))->min_val_;

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_ARITHMETIC_COMPARE_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/fp16/arithmetic_fp16.h"
#include "schema/model_generated.h"
@ -34,11 +34,11 @@ typedef struct {
ArithmeticCompareOptFuncFp16 opt_func_;
} ARITHMETIC_COMPARE_FUNC_INFO_FP16;
class ArithmeticCompareFP16CPUKernel : public LiteKernel {
class ArithmeticCompareFP16CPUKernel : public InnerKernel {
public:
ArithmeticCompareFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
}
~ArithmeticCompareFP16CPUKernel() = default;

View File

@ -17,15 +17,15 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BIASADD_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BIASADD_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/fp16/arithmetic_fp16.h"
namespace mindspore::kernel {
class BiasAddCPUFp16Kernel : public LiteKernel {
class BiasAddCPUFp16Kernel : public InnerKernel {
public:
BiasAddCPUFp16Kernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
}
~BiasAddCPUFp16Kernel() override;

View File

@ -17,17 +17,17 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CAST_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/op_base.h"
#include "nnacl/fp16/cast_fp16.h"
#include "nnacl/base/cast_base.h"
namespace mindspore::kernel {
class CastFp16CPUKernel : public LiteKernel {
class CastFp16CPUKernel : public InnerKernel {
public:
CastFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~CastFp16CPUKernel() = default;

View File

@ -17,7 +17,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_COMMON_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::InnerContext *ctx);

View File

@ -23,16 +23,16 @@
#include "nnacl/base/concat_base.h"
#include "nnacl/concat_parameter.h"
#include "nnacl/fp16/cast_fp16.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/fp16/common_fp16.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class ConcatFp16CPUKernel : public LiteKernel {
class ConcatFp16CPUKernel : public InnerKernel {
public:
ConcatFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
concat_param_ = reinterpret_cast<ConcatParameter *>(op_parameter_);
}
~ConcatFp16CPUKernel() = default;

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "src/common/utils.h"
#include "nnacl/matmul_parameter.h"

View File

@ -110,12 +110,12 @@ int ConvolutionDelegateFP16CPUKernel::ReSize() {
return fp16_conv_kernel_->ReSize();
}
kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx) {
kernel::InnerKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const InnerContext *ctx) {
MS_ASSERT(opParameter != nullptr);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
kernel::LiteKernel *kernel = nullptr;
kernel::InnerKernel *kernel = nullptr;
if (conv_param->input_channel_ < 32) {
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(opParameter, inputs, outputs, ctx);
} else {
@ -129,14 +129,14 @@ kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *>
return kernel;
}
kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias) {
kernel::InnerKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias) {
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
bool use_winograd = false;
int out_unit;
CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param);
kernel::LiteKernel *kernel = nullptr;
kernel::InnerKernel *kernel = nullptr;
if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
kernel = new (std::nothrow)
@ -158,9 +158,9 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &i
return kernel;
}
kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const InnerContext *ctx) {
kernel::InnerKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *op_parameter, const InnerContext *ctx) {
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, false, kNumberTypeFloat16);
group_conv_creator.SetShapeOfTensors();
@ -183,9 +183,9 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
}
/* creator func */
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
kernel::InnerKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
@ -199,7 +199,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &
return nullptr;
}
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
kernel::LiteKernel *kernel = nullptr;
kernel::InnerKernel *kernel = nullptr;
if (conv_param->group_ == 1) {
kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs,
static_cast<const lite::InnerContext *>(ctx));

View File

@ -18,7 +18,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/conv_parameter.h"
#include "nnacl/op_base.h"
@ -26,11 +26,11 @@
#define BIAS_NEED_FREE 0b10
namespace mindspore::kernel {
class ConvolutionDelegateFP16CPUKernel : public LiteKernel {
class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
public:
ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~ConvolutionDelegateFP16CPUKernel() override {
FreeCopiedData();
if (fp16_conv_kernel_ != nullptr) {
@ -52,12 +52,12 @@ class ConvolutionDelegateFP16CPUKernel : public LiteKernel {
uint8_t need_free_ = 0b00;
void *origin_weight_ = nullptr;
void *origin_bias_ = nullptr;
kernel::LiteKernel *fp16_conv_kernel_ = nullptr;
kernel::InnerKernel *fp16_conv_kernel_ = nullptr;
};
kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias);
kernel::InnerKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias);
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DELEGATE_FP16_H_

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp16/conv_depthwise_fp16.h"

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_SW_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp16/conv_depthwise_fp16.h"

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
namespace mindspore::kernel {

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp16/conv_fp16.h"
#include "nnacl/fp16/winograd_utils_fp16.h"

View File

@ -22,7 +22,7 @@
#include "include/errorcode.h"
#include "nnacl/crop_parameter.h"
#include "nnacl/fp16/crop_fp16.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/crop_base.h"
#include "src/runtime/kernel/arm/fp16/common_fp16.h"

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_DEPTHWISE_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/convolution_base.h"
#include "nnacl/fp16/conv_depthwise_fp16.h"

View File

@ -229,13 +229,13 @@ int DeConvolutionFp16CPUKernel::Run() {
return error_code;
}
kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
kernel::InnerKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
MS_ASSERT(op_parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
kernel::LiteKernel *kernel = nullptr;
kernel::InnerKernel *kernel = nullptr;
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
if (conv_param->group_ == 1) {
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&

View File

@ -56,7 +56,7 @@ int GatherFp16CPUKernel::ReSize() { return RET_OK; }
int GatherFp16CPUKernel::PreProcess() {
if (!InferShapeDone()) {
auto ret = lite::KernelInferShape(in_tensors_, &out_tensors_, op_parameter_);
auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
if (ret != 0) {
MS_LOG(ERROR) << "InferShape fail!";
return ret;
@ -68,16 +68,15 @@ int GatherFp16CPUKernel::PreProcess() {
}
out_tensors_[0]->set_data_type(kNumberTypeFloat16);
}
for (auto *output : this->out_tensors()) {
for (auto *output : out_tensors_) {
MS_ASSERT(output != nullptr);
auto ret = output->MallocData();
if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
MS_LOG(ERROR) << "The size of output tensor is too big";
return RET_ERROR;
}
auto ret = output->MallocData();
if (ret != RET_OK) {
MS_LOG(ERROR) << "MallocData failed";
MS_LOG(ERROR) << "gather out tensor malloc data failed.";
return ret;
}
}

View File

@ -20,16 +20,16 @@
#include <arm_neon.h>
#include <vector>
#include "include/errorcode.h"
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/gather_parameter.h"
#include "nnacl/base/gather_base.h"
namespace mindspore::kernel {
class GatherFp16CPUKernel : public LiteKernel {
class GatherFp16CPUKernel : public InnerKernel {
public:
GatherFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {}
: InnerKernel(parameter, inputs, outputs, ctx) {}
~GatherFp16CPUKernel() override;
int Init() override;

View File

@ -26,7 +26,7 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) {
int in_plane = in_tensor->Height() * in_tensor->Width() * in_tensor->Batch();
int sub_in_channel = conv_param_->input_channel_;
int ori_in_channel = sub_in_channel * group_num_;
auto sub_in_data = group_convs_.at(group_id)->in_tensors().front()->data_c();
auto sub_in_data = static_cast<lite::Tensor *>(group_convs_.at(group_id)->in_tensors().front())->data_c();
auto in_data_type = in_tensors_.front()->data_type();
auto sub_in_data_type = group_convs_.at(group_id)->in_tensors().front()->data_type();
if (in_data_type != sub_in_data_type) {
@ -67,7 +67,8 @@ int GroupConvolutionFP16CPUKernel::PostConcat(int group_id) {
int out_plane = out_tensor->Height() * out_tensor->Width() * out_tensor->Batch();
int sub_out_channel = conv_param_->output_channel_;
int ori_out_channel = sub_out_channel * group_num_;
auto sub_out_data = reinterpret_cast<float16_t *>(group_convs_.at(group_id)->out_tensors().front()->data_c());
auto sub_out_data = reinterpret_cast<float16_t *>(
static_cast<lite::Tensor *>(group_convs_.at(group_id)->out_tensors().front())->data_c());
MS_ASSERT(sub_out_data);
float16_t *src_ptr = sub_out_data;
float16_t *dst_ptr = reinterpret_cast<float16_t *>(ori_out_data_) + group_id * sub_out_channel;

View File

@ -19,7 +19,7 @@
#include <utility>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/op_base.h"
#include "src/runtime/kernel/arm/base/group_convolution_base.h"
#include "nnacl/fp16/conv_fp16.h"
@ -29,7 +29,7 @@ class GroupConvolutionFP16CPUKernel : public GroupConvolutionBaseCPUKernel {
public:
GroupConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
std::vector<kernel::LiteKernel *> group_convs, const int group_num)
std::vector<kernel::InnerKernel *> group_convs, const int group_num)
: GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) {
} // opParameter(in channel, out channel) in this kernel has been split to groups, if
// you want to get real params, multiply in channel / out channel with group num

View File

@ -16,15 +16,15 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_GRU_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_GRU_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/gru_parameter.h"
namespace mindspore::kernel {
class GruFp16CPUKernel : public LiteKernel {
class GruFp16CPUKernel : public InnerKernel {
public:
GruFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
gru_param_ = reinterpret_cast<GruParameter *>(op_parameter_);
}

View File

@ -16,18 +16,18 @@
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_INSTANCE_NORM_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_INSTANCE_NORM_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/context.h"
#include "nnacl/instance_norm_parameter.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class InstanceNormFp16CPUKernel : public LiteKernel {
class InstanceNormFp16CPUKernel : public InnerKernel {
public:
InstanceNormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
}
~InstanceNormFp16CPUKernel() override { FreeTmpBuffer(); };

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/softmax_base.h"
namespace mindspore::kernel {

View File

@ -18,15 +18,15 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_LSTM_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/lstm_parameter.h"
namespace mindspore::kernel {
class LstmFp16CPUKernel : public LiteKernel {
class LstmFp16CPUKernel : public InnerKernel {
public:
LstmFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
lstm_param_ = reinterpret_cast<LstmParameter *>(op_parameter_);
}

View File

@ -21,15 +21,15 @@
#include <arm_neon.h>
#endif
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "nnacl/matmul_parameter.h"
namespace mindspore::kernel {
class MatmulBaseFP16CPUKernel : public LiteKernel {
class MatmulBaseFP16CPUKernel : public InnerKernel {
public:
explicit MatmulBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx) {
: InnerKernel(parameter, inputs, outputs, ctx) {
params_ = reinterpret_cast<MatMulParameter *>(op_parameter_);
}
~MatmulBaseFP16CPUKernel() override;

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include <cfloat>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/pooling_base.h"
namespace mindspore::kernel {

View File

@ -18,16 +18,16 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_POWER_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "include/context.h"
#include "nnacl/fp16/power_fp16.h"
namespace mindspore::kernel {
class PowerFp16CPUKernel : public LiteKernel {
class PowerFp16CPUKernel : public InnerKernel {
public:
PowerFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(param, inputs, outputs, ctx),
: InnerKernel(param, inputs, outputs, ctx),
thread_count_(ctx->thread_num_),
scale_(reinterpret_cast<PowerParameter *>(op_parameter_)->scale_),
shift_(reinterpret_cast<PowerParameter *>(op_parameter_)->shift_) {}

View File

@ -174,10 +174,10 @@ int QuantDTypeCastFp16CPUKernel::Run() {
return RET_OK;
}
kernel::LiteKernel *CpuQuantDTypeCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::InnerContext *ctx,
const kernel::KernelKey &desc) {
kernel::InnerKernel *CpuQuantDTypeCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::InnerContext *ctx,
const kernel::KernelKey &desc) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;

View File

@ -19,14 +19,14 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
namespace mindspore::kernel {
class QuantDTypeCastFp16CPUKernel : public LiteKernel {
class QuantDTypeCastFp16CPUKernel : public InnerKernel {
public:
QuantDTypeCastFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
: LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
: InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
~QuantDTypeCastFp16CPUKernel() override = default;
int Init() override;

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/reduce_base.h"
using mindspore::schema::ReduceMode;

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SCALE_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/fp32/scale_fp32.h"
#include "nnacl/scale.h"

View File

@ -18,7 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SLICE_FP16_H_
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/slice_base.h"
namespace mindspore::kernel {

View File

@ -19,7 +19,7 @@
#include <arm_neon.h>
#include <vector>
#include "src/lite_kernel.h"
#include "src/inner_kernel.h"
#include "src/runtime/kernel/arm/base/softmax_base.h"
namespace mindspore::kernel {

Some files were not shown because too many files have changed in this diff Show More