forked from mindspore-Ecosystem/mindspore
!15886 [MS][LITE]LiteKernel Interface Rectification
From: @gongdaguo Reviewed-by: Signed-off-by:
This commit is contained in:
commit
98a92db2cb
|
@ -21,7 +21,7 @@ void SetOutputDtypeFormat(const TensorC *input0, const TensorC *input1, TensorC
|
|||
output->format_ = input0->format_;
|
||||
output->data_type_ = input0->data_type_;
|
||||
// when input0 is const, it is quanted before insert quant trans op, so use input1 data type instead
|
||||
if (input0->data_ != NULL ||
|
||||
if (((input0->data_ != NULL) && (input1->data_type_ != kTypeUnknown)) ||
|
||||
((input0->data_type_ == kNumberTypeInt8) && (input1->data_type_ == kNumberTypeFloat32))) {
|
||||
output->data_type_ = input1->data_type_;
|
||||
}
|
||||
|
|
|
@ -135,7 +135,6 @@ set(LITE_SRC
|
|||
${LITE_DIR}/src/common/tensor_util.cc
|
||||
${LITE_DIR}/src/runtime/infer_manager.cc
|
||||
${LITE_DIR}/src/kernel_interface_registry.cc
|
||||
${LITE_DIR}/src/kernel_registry.cc
|
||||
${LITE_DIR}/src/lite_model.cc
|
||||
${LITE_DIR}/src/tensorlist.cc
|
||||
${LITE_DIR}/src/tensor.cc
|
||||
|
|
|
@ -209,7 +209,7 @@ OpParameter *CoderSession::GenParameterAndInfer(const Model::Node *node, const s
|
|||
MS_CHECK_PTR_RET_NULL(parame_gen);
|
||||
auto parameter = parame_gen(primitive);
|
||||
MS_CHECK_PTR_RET_NULL(parameter);
|
||||
auto ret = KernelInferShape(inputs, outputs, parameter);
|
||||
auto ret = KernelInferShape(inputs, *outputs, parameter);
|
||||
if (ret == RET_INFER_INVALID) {
|
||||
MS_LOG(INFO) << "InferShape shouldn't be done before runtime, name: " << node->name_
|
||||
<< ", type: " << PrimitiveTypeName(GetPrimitiveType(primitive)) << "flag set to false.";
|
||||
|
|
|
@ -66,6 +66,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/register_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_interface.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_interface_registry.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/inner_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc
|
||||
|
|
|
@ -150,17 +150,17 @@ int TensorListC2TensorList(TensorListC *src, TensorList *dst) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
|
||||
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
std::vector<TensorC *> *out_tensor_c) {
|
||||
int ret = RET_OK;
|
||||
for (size_t i = 0; i < outputs->size(); i++) {
|
||||
for (size_t i = 0; i < outputs.size(); i++) {
|
||||
out_tensor_c->push_back(nullptr);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *out_tensor_c) {
|
||||
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *out_tensor_c) {
|
||||
int ret = RET_OK;
|
||||
if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor ||
|
||||
parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve ||
|
||||
|
@ -176,13 +176,13 @@ int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lit
|
|||
parameter->type_ == mindspore::schema::PrimitiveType_Switch) {
|
||||
ret = GenerateMergeSwitchOutTensorC(inputs, outputs, out_tensor_c);
|
||||
} else {
|
||||
ret = OutputTensor2TensorC(*outputs, out_tensor_c);
|
||||
ret = OutputTensor2TensorC(outputs, out_tensor_c);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *in_tensor_c) {
|
||||
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *in_tensor_c) {
|
||||
int ret = RET_OK;
|
||||
for (auto input : inputs) {
|
||||
if (input->data_type() == kObjectTypeTensorType) {
|
||||
|
|
|
@ -32,12 +32,12 @@ void Tensor2TensorC(Tensor *src, TensorC *dst);
|
|||
void TensorC2Tensor(TensorC *src, Tensor *dst);
|
||||
int TensorList2TensorListC(TensorList *src, TensorListC *dst);
|
||||
int TensorListC2TensorList(TensorListC *src, TensorList *dst);
|
||||
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
|
||||
int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
std::vector<TensorC *> *out_tensor_c);
|
||||
int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *in_tensor_c);
|
||||
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *in_tensor_c);
|
||||
int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
std::vector<lite::Tensor *> *outputs, std::vector<TensorC *> *out_tensor_c);
|
||||
const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *out_tensor_c);
|
||||
|
||||
int CheckTensorsInvalid(const std::vector<Tensor *> &tensors);
|
||||
void Tensor2MSTensor(const std::vector<Tensor *> &&tensors, std::vector<tensor::MSTensor *> *out_tensors);
|
||||
|
|
|
@ -48,21 +48,11 @@ int Executor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Ten
|
|||
auto cur_kernel = kernel_queue.front();
|
||||
kernel_queue.pop();
|
||||
MS_ASSERT(nullptr != cur_kernel);
|
||||
ret = cur_kernel->PreProcess();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << cur_kernel->name();
|
||||
return ret;
|
||||
}
|
||||
ret = cur_kernel->Run(before, after);
|
||||
ret = cur_kernel->Execute(before, after);
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << cur_kernel->name();
|
||||
return ret;
|
||||
}
|
||||
ret = cur_kernel->PostProcess();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << cur_kernel->name();
|
||||
return ret;
|
||||
}
|
||||
for (auto &out_kernel : cur_kernel->out_kernels()) {
|
||||
if (out_kernel->IsReady(out_kernel->in_tensors())) {
|
||||
kernel_queue.push(out_kernel);
|
||||
|
|
|
@ -30,7 +30,7 @@ class Executor {
|
|||
|
||||
virtual int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) {
|
||||
ctx_ = static_cast<const lite::InnerContext *>(kernels[0]->context());
|
||||
ctx_ = static_cast<const lite::InnerContext *>(kernels[0]->Context());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/inner_kernel.h"
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include "src/tensor.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/runtime/infer_manager.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
void *InnerKernel::workspace_ = nullptr;
|
||||
|
||||
void InnerKernel::AllocWorkspace(size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
workspace_ = malloc(size);
|
||||
if (workspace_ == nullptr) {
|
||||
MS_LOG(ERROR) << "fail to alloc " << size;
|
||||
}
|
||||
}
|
||||
|
||||
void InnerKernel::FreeWorkspace() {
|
||||
free(workspace_);
|
||||
workspace_ = nullptr;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int InnerKernel::PreProcess() {
|
||||
if (!InferShapeDone()) {
|
||||
auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "InferShape fail!";
|
||||
return ret;
|
||||
}
|
||||
ret = ReSize();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "ReSize fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *output : this->out_tensors()) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
if (registry_data_type_ == kNumberTypeFloat16 && output->data_type() == kNumberTypeFloat32) {
|
||||
output->set_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
|
||||
if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
|
||||
MS_LOG(ERROR) << "The size of output tensor is too big";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = output->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MallocData failed";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,209 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_INNER_KERNEL_H_
|
||||
#define MINDSPORE_LITE_SRC_INNER_KERNEL_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/log_util.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "src/inner_context.h"
|
||||
#include "src/tensor.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/context.h"
|
||||
#include "src/kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class InnerKernel : public Kernel {
|
||||
public:
|
||||
InnerKernel() = default;
|
||||
|
||||
InnerKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
|
||||
const lite::Context *ctx)
|
||||
: op_parameter_(parameter), in_tensors_(std::move(in_tensors)), out_tensors_(std::move(out_tensors)) {
|
||||
context_ = ctx;
|
||||
if (op_parameter_ != nullptr && ctx != nullptr) {
|
||||
op_parameter_->thread_num_ = ctx->thread_num_;
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~InnerKernel() {
|
||||
if (op_parameter_ != nullptr) {
|
||||
free(op_parameter_);
|
||||
op_parameter_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int Execute() override {
|
||||
auto ret = PreProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Support ZeroShape
|
||||
size_t zero_shape_num = 0;
|
||||
for (auto tensor : this->out_tensors()) {
|
||||
for (size_t i = 0; i < tensor->shape().size(); i++) {
|
||||
if (tensor->shape()[i] == 0) {
|
||||
zero_shape_num++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (zero_shape_num != this->out_tensors().size()) {
|
||||
auto ret = Run();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = PostProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
// called while compiling graph
|
||||
int Prepare() override { return mindspore::lite::RET_OK; }
|
||||
virtual int Run() { return mindspore::lite::RET_ERROR; }
|
||||
int ReSize() override { return mindspore::lite::RET_ERROR; }
|
||||
|
||||
// called before Run
|
||||
virtual int PreProcess();
|
||||
// called after Run
|
||||
virtual int PostProcess() {
|
||||
for (auto *output : this->out_tensors()) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
output->ResetRefCount();
|
||||
}
|
||||
|
||||
return FreeInWorkTensor();
|
||||
}
|
||||
|
||||
virtual int FreeInWorkTensor() const {
|
||||
for (auto &in_tensor : this->in_tensors()) {
|
||||
MS_ASSERT(in_tensor != nullptr);
|
||||
if (in_tensor->root_tensor() == in_tensor) {
|
||||
continue;
|
||||
}
|
||||
in_tensor->DecRefCount();
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
virtual int Init() { return mindspore::lite::RET_OK; }
|
||||
|
||||
OpParameter *op_parameter() const { return op_parameter_; }
|
||||
|
||||
bool InferShapeDone() const {
|
||||
auto shape = out_tensors_.front()->shape();
|
||||
if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
schema::PrimitiveType type() override {
|
||||
return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_)
|
||||
: schema::PrimitiveType_NONE;
|
||||
}
|
||||
|
||||
void set_inputs(const std::vector<mindspore::tensor::MSTensor *> &in_tensors) override {
|
||||
this->in_tensors_.resize(in_tensors.size());
|
||||
(void)std::transform(in_tensors.begin(), in_tensors.end(), in_tensors_.begin(),
|
||||
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
|
||||
}
|
||||
|
||||
void set_outputs(const std::vector<mindspore::tensor::MSTensor *> &out_tensors) override {
|
||||
this->out_tensors_.resize(out_tensors.size());
|
||||
(void)std::transform(out_tensors.begin(), out_tensors.end(), out_tensors_.begin(),
|
||||
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
|
||||
}
|
||||
|
||||
const std::vector<mindspore::tensor::MSTensor *> &inputs() override {
|
||||
inputs_.assign(in_tensors_.begin(), in_tensors_.end());
|
||||
return inputs_;
|
||||
}
|
||||
|
||||
const std::vector<mindspore::tensor::MSTensor *> &outputs() override {
|
||||
outputs_.assign(out_tensors_.begin(), out_tensors_.end());
|
||||
return outputs_;
|
||||
}
|
||||
|
||||
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
|
||||
|
||||
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
|
||||
|
||||
const std::vector<lite::Tensor *> &in_tensors() const { return in_tensors_; }
|
||||
|
||||
const std::vector<lite::Tensor *> &out_tensors() const { return out_tensors_; }
|
||||
|
||||
virtual int Train() {
|
||||
this->train_mode_ = true;
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
virtual bool IsTrain() const { return this->train_mode_; }
|
||||
|
||||
virtual int Eval() {
|
||||
this->train_mode_ = false;
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
virtual bool IsEval() const { return !this->train_mode_; }
|
||||
|
||||
virtual void set_trainable(bool trainable = true) { this->trainable_ = trainable; }
|
||||
|
||||
virtual bool is_trainable() const { return this->trainable_; }
|
||||
|
||||
TypeId registry_data_type(void) { return registry_data_type_; }
|
||||
|
||||
void set_registry_data_type(TypeId data_type) { registry_data_type_ = data_type; }
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
void set_workspace_size(size_t value) { workspace_size_ = value; }
|
||||
size_t workspace_size() { return workspace_size_; }
|
||||
static void AllocWorkspace(size_t size);
|
||||
static void FreeWorkspace();
|
||||
void *workspace() { return workspace_; }
|
||||
#endif
|
||||
|
||||
protected:
|
||||
OpParameter *op_parameter_ = nullptr;
|
||||
// tensor will free in ~lite_session()
|
||||
std::vector<lite::Tensor *> in_tensors_;
|
||||
std::vector<lite::Tensor *> out_tensors_;
|
||||
bool train_mode_ = false;
|
||||
bool trainable_ = false; // parameters of this Kernel are trained in Train Session
|
||||
TypeId registry_data_type_ = kTypeUnknown;
|
||||
#ifdef SUPPORT_TRAIN
|
||||
size_t workspace_size_ = 0;
|
||||
static void *workspace_;
|
||||
#endif
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_INNER_KERNEL_H_
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_KERNEL_H_
|
||||
#define MINDSPORE_LITE_SRC_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "include/lite_utils.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class Kernel {
|
||||
public:
|
||||
Kernel() = default;
|
||||
|
||||
Kernel(const std::vector<tensor::MSTensor *> &inputs, const std::vector<tensor::MSTensor *> &outputs,
|
||||
const schema::Primitive *primitive, const lite::Context *ctx)
|
||||
: inputs_(std::move(inputs)), outputs_(std::move(outputs)), context_(ctx) {
|
||||
if (primitive != nullptr) {
|
||||
type_ = primitive->value_type();
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~Kernel() = default;
|
||||
|
||||
virtual int Prepare() = 0;
|
||||
|
||||
virtual int Execute() = 0;
|
||||
|
||||
virtual int ReSize() = 0;
|
||||
|
||||
virtual schema::PrimitiveType type() { return type_; }
|
||||
|
||||
virtual void set_inputs(const std::vector<mindspore::tensor::MSTensor *> &in_tensors) { this->inputs_ = in_tensors; }
|
||||
|
||||
virtual void set_outputs(const std::vector<mindspore::tensor::MSTensor *> &out_tensors) {
|
||||
this->outputs_ = out_tensors;
|
||||
}
|
||||
|
||||
virtual const std::vector<mindspore::tensor::MSTensor *> &inputs() { return this->inputs_; }
|
||||
|
||||
virtual const std::vector<mindspore::tensor::MSTensor *> &outputs() { return this->outputs_; }
|
||||
|
||||
std::string name() const { return this->name_; }
|
||||
|
||||
void set_name(const std::string &name) { this->name_ = name; }
|
||||
const lite::Context *context() const { return this->context_; }
|
||||
|
||||
protected:
|
||||
std::vector<mindspore::tensor::MSTensor *> inputs_;
|
||||
std::vector<mindspore::tensor::MSTensor *> outputs_;
|
||||
schema::PrimitiveType type_ = schema::PrimitiveType_NONE;
|
||||
std::string name_;
|
||||
const lite::Context *context_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_KERNEL_H_
|
|
@ -133,7 +133,7 @@ kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
|
|||
if (desc.provider == kBuiltin) {
|
||||
int index = GetCreatorFuncIndex(desc);
|
||||
if (index >= array_size_ || index < 0) {
|
||||
MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type" << desc.data_type << ",op type "
|
||||
MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type " << desc.data_type << ",op type "
|
||||
<< desc.type;
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -231,10 +231,17 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
|
|||
if (key.provider == kBuiltin) {
|
||||
auto creator = GetCreator(key);
|
||||
if (creator != nullptr) {
|
||||
*kernel = creator(in_tensors, out_tensors, parameter, ctx, key);
|
||||
if (*kernel != nullptr) {
|
||||
(*kernel)->set_desc(key);
|
||||
return RET_OK;
|
||||
auto inner_kernel = creator(in_tensors, out_tensors, parameter, ctx, key);
|
||||
if (inner_kernel != nullptr) {
|
||||
inner_kernel->set_registry_data_type(key.data_type);
|
||||
auto *lite_kernel = new (std::nothrow) kernel::LiteKernel(inner_kernel);
|
||||
if (lite_kernel != nullptr) {
|
||||
lite_kernel->set_desc(key);
|
||||
*kernel = lite_kernel;
|
||||
return RET_OK;
|
||||
} else {
|
||||
delete inner_kernel;
|
||||
}
|
||||
}
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -247,9 +254,16 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
|
|||
Tensor2MSTensor(std::move(in_tensors), &tensors_in);
|
||||
std::vector<tensor::MSTensor *> tensors_out;
|
||||
Tensor2MSTensor(std::move(out_tensors), &tensors_out);
|
||||
*kernel = creator(tensors_in, tensors_out, static_cast<const schema::Primitive *>(primitive), ctx);
|
||||
if (*kernel != nullptr) {
|
||||
return RET_OK;
|
||||
auto base_kernel = creator(tensors_in, tensors_out, static_cast<const schema::Primitive *>(primitive), ctx);
|
||||
if (base_kernel != nullptr) {
|
||||
auto *lite_kernel = new (std::nothrow) kernel::LiteKernel(base_kernel);
|
||||
if (lite_kernel != nullptr) {
|
||||
lite_kernel->set_desc(key);
|
||||
*kernel = lite_kernel;
|
||||
return RET_OK;
|
||||
} else {
|
||||
delete base_kernel;
|
||||
}
|
||||
}
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -21,40 +21,19 @@
|
|||
#include "src/common/utils.h"
|
||||
#include "src/runtime/infer_manager.h"
|
||||
#include "src/common/version_manager.h"
|
||||
#include "src/runtime/kernel/arm/base/merge.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
#ifdef SUPPORT_TRAIN
|
||||
void *LiteKernel::workspace_ = nullptr;
|
||||
|
||||
void LiteKernel::AllocWorkspace(size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
workspace_ = malloc(size);
|
||||
if (workspace_ == nullptr) {
|
||||
MS_LOG(ERROR) << "fail to alloc " << size;
|
||||
}
|
||||
}
|
||||
|
||||
void LiteKernel::FreeWorkspace() {
|
||||
free(workspace_);
|
||||
workspace_ = nullptr;
|
||||
}
|
||||
|
||||
int LiteKernel::DecOutTensorRefCount() {
|
||||
for (auto *tensor : this->out_tensors_) {
|
||||
tensor->set_ref_count(tensor->ref_count() - 1);
|
||||
if (0 >= tensor->ref_count()) {
|
||||
tensor->FreeData();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
|
||||
return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
if ((desc_.provider == kBuiltin) && (kernel_->type() == schema::PrimitiveType_Merge)) {
|
||||
return static_cast<MergeCPUKernel *>(kernel_)->IsReady(scope_tensors);
|
||||
}
|
||||
auto &in_tensors = this->in_tensors();
|
||||
return std::all_of(in_tensors.begin(), in_tensors.end(), [&](lite::Tensor *in_tensor) {
|
||||
if (IsContain(scope_tensors, in_tensor)) {
|
||||
return in_tensor->IsReady();
|
||||
} else {
|
||||
|
@ -64,120 +43,37 @@ bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
|
|||
}
|
||||
|
||||
void LiteKernel::InitOutTensorInitRefCount() {
|
||||
for (auto *tensor : this->out_tensors_) {
|
||||
for (auto *tensor : this->out_tensors()) {
|
||||
size_t init_ref_count = 0;
|
||||
for (auto *post_kernel : this->out_kernels_) {
|
||||
auto &post_in_tensors = post_kernel->in_tensors();
|
||||
init_ref_count +=
|
||||
std::count_if(post_kernel->in_tensors_.begin(), post_kernel->in_tensors_.end(),
|
||||
std::count_if(post_in_tensors.begin(), post_in_tensors.end(),
|
||||
[&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; });
|
||||
}
|
||||
tensor->set_init_ref_count(init_ref_count);
|
||||
}
|
||||
}
|
||||
|
||||
int LiteKernel::FreeInWorkTensor() const {
|
||||
for (auto &in_tensor : this->in_tensors_) {
|
||||
MS_ASSERT(in_tensor != nullptr);
|
||||
if (in_tensor->root_tensor() == in_tensor) {
|
||||
continue;
|
||||
}
|
||||
in_tensor->DecRefCount();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteKernel::PreProcess() {
|
||||
if (!InferShapeDone()) {
|
||||
auto ret = lite::KernelInferShape(in_tensors_, &out_tensors_, op_parameter_);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "InferShape fail!";
|
||||
return ret;
|
||||
}
|
||||
ret = ReSize();
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "ReSize fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *output : this->out_tensors()) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
if (desc_.data_type == kNumberTypeFloat16 && output->data_type() == kNumberTypeFloat32) {
|
||||
output->set_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
|
||||
MS_LOG(ERROR) << "The size of output tensor is too big";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = output->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MallocData failed";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LiteKernel::PostProcess() {
|
||||
for (auto *output : this->out_tensors()) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
output->ResetRefCount();
|
||||
}
|
||||
return FreeInWorkTensor();
|
||||
}
|
||||
|
||||
int LiteKernel::Run(const KernelCallBack &before, const KernelCallBack &after) {
|
||||
if (before != nullptr) {
|
||||
if (!before(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_),
|
||||
{this->name_, this->type_str()})) {
|
||||
MS_LOG(WARNING) << "run kernel before_callback failed, name: " << this->name_;
|
||||
}
|
||||
}
|
||||
// Support ZeroShape
|
||||
size_t zero_shape_num = 0;
|
||||
for (auto tensor : this->out_tensors_) {
|
||||
for (size_t i = 0; i < tensor->shape().size(); i++) {
|
||||
if (tensor->shape()[i] == 0) {
|
||||
zero_shape_num++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (zero_shape_num != this->out_tensors_.size()) {
|
||||
auto ret = Run();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name_;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (after != nullptr) {
|
||||
if (!after(TensorVectorCast(this->in_tensors_), TensorVectorCast(this->out_tensors_),
|
||||
{this->name_, this->type_str()})) {
|
||||
MS_LOG(WARNING) << "run kernel after_callback failed, name: " << this->name_;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::string LiteKernel::ToString() const {
|
||||
std::ostringstream oss;
|
||||
oss << "LiteKernel: " << this->name_;
|
||||
oss << "LiteKernel: " << this->name();
|
||||
oss << ", Type: " << this->type_str();
|
||||
oss << ", " << this->in_tensors_.size() << " InputTensors:";
|
||||
for (auto tensor : in_tensors_) {
|
||||
oss << ", " << this->in_tensors().size() << " InputTensors:";
|
||||
for (auto tensor : in_tensors()) {
|
||||
oss << " " << tensor;
|
||||
}
|
||||
oss << ", " << this->out_tensors_.size() << " OutputTensors:";
|
||||
for (auto tensor : out_tensors_) {
|
||||
oss << ", " << this->out_tensors().size() << " OutputTensors:";
|
||||
for (auto tensor : out_tensors()) {
|
||||
oss << " " << tensor;
|
||||
}
|
||||
oss << ", " << this->in_kernels_.size() << " InputKernels:";
|
||||
for (auto in_kernel : in_kernels_) {
|
||||
oss << " " << in_kernel->name_;
|
||||
oss << " " << in_kernel->name();
|
||||
}
|
||||
oss << ", " << this->out_kernels_.size() << " OutputKernels:";
|
||||
for (auto out_kernel : out_kernels_) {
|
||||
oss << " " << out_kernel->name_;
|
||||
oss << " " << out_kernel->name();
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
@ -187,7 +83,7 @@ void LiteKernel::FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope
|
|||
this->in_kernels_.clear();
|
||||
this->out_kernels_.clear();
|
||||
// find io kernels, need optimize time
|
||||
for (auto *tensor : this->in_tensors_) {
|
||||
for (auto *tensor : this->in_tensors()) {
|
||||
for (auto *scope_kernel : scope_kernels) {
|
||||
if (scope_kernel == this) {
|
||||
continue;
|
||||
|
@ -198,7 +94,7 @@ void LiteKernel::FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope
|
|||
}
|
||||
}
|
||||
|
||||
for (auto *tensor : this->out_tensors_) {
|
||||
for (auto *tensor : this->out_tensors()) {
|
||||
for (auto *scope_kernel : scope_kernels) {
|
||||
if (scope_kernel == this) {
|
||||
continue;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -20,6 +20,7 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/log_util.h"
|
||||
#ifdef ENABLE_ARM
|
||||
|
@ -31,6 +32,8 @@
|
|||
#include "include/errorcode.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/context.h"
|
||||
#include "src/kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
|
||||
|
@ -62,86 +65,182 @@ enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSu
|
|||
|
||||
class LiteKernel {
|
||||
public:
|
||||
LiteKernel() = default;
|
||||
LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors,
|
||||
const lite::Context *ctx)
|
||||
: op_parameter_(parameter),
|
||||
in_tensors_(std::move(in_tensors)),
|
||||
out_tensors_(std::move(out_tensors)),
|
||||
context_(ctx) {
|
||||
if (op_parameter_ != nullptr && ctx != nullptr) {
|
||||
op_parameter_->thread_num_ = ctx->thread_num_;
|
||||
}
|
||||
LiteKernel() {
|
||||
this->in_kernels_.clear();
|
||||
this->out_kernels_.clear();
|
||||
}
|
||||
|
||||
explicit LiteKernel(Kernel *kernel) : kernel_(kernel) {
|
||||
this->in_kernels_.clear();
|
||||
this->out_kernels_.clear();
|
||||
}
|
||||
|
||||
virtual ~LiteKernel() {
|
||||
if (op_parameter_ != nullptr) {
|
||||
free(op_parameter_);
|
||||
op_parameter_ = nullptr;
|
||||
if (kernel_ != nullptr) {
|
||||
free(kernel_);
|
||||
kernel_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
virtual int Execute() { return Execute(nullptr, nullptr); }
|
||||
|
||||
virtual int Execute(const KernelCallBack &before, const KernelCallBack &after) {
|
||||
if (before != nullptr) {
|
||||
if (!before(TensorVectorCast(this->in_tensors()), TensorVectorCast(this->out_tensors()),
|
||||
{this->name(), schema::EnumNamePrimitiveType(this->type())})) {
|
||||
MS_LOG(WARNING) << "run kernel before_callback failed, name: " << this->name();
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = kernel_->Execute();
|
||||
if ((ret == lite::RET_OK) && (desc_.provider != kBuiltin)) {
|
||||
for (auto *output : this->out_tensors()) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
output->ResetRefCount();
|
||||
}
|
||||
for (auto &in_tensor : this->in_tensors()) {
|
||||
MS_ASSERT(in_tensor != nullptr);
|
||||
if (in_tensor->root_tensor() == in_tensor) {
|
||||
continue;
|
||||
}
|
||||
in_tensor->DecRefCount();
|
||||
}
|
||||
}
|
||||
|
||||
if (after != nullptr) {
|
||||
if (!after(TensorVectorCast(this->in_tensors()), TensorVectorCast(this->out_tensors()),
|
||||
{this->name(), schema::EnumNamePrimitiveType(this->type())})) {
|
||||
MS_LOG(WARNING) << "run kernel after_callback failed, name: " << this->name();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// called while compiling graph
|
||||
virtual int Prepare() { return mindspore::lite::RET_OK; }
|
||||
// called before Run
|
||||
virtual int PreProcess();
|
||||
virtual int Prepare() {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return kernel_->Prepare();
|
||||
}
|
||||
|
||||
virtual int Run() { return mindspore::lite::RET_ERROR; }
|
||||
virtual int Init() { return mindspore::lite::RET_OK; }
|
||||
|
||||
virtual int Run(const KernelCallBack &before, const KernelCallBack &after);
|
||||
// called after Run
|
||||
virtual int PostProcess();
|
||||
|
||||
virtual int ReSize() { return mindspore::lite::RET_ERROR; }
|
||||
virtual int ReSize() {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return kernel_->ReSize();
|
||||
}
|
||||
|
||||
virtual void FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope_kernels);
|
||||
|
||||
virtual int Init() { return mindspore::lite::RET_ERROR; }
|
||||
OpParameter *op_parameter() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<InnerKernel *>(kernel_)->op_parameter();
|
||||
}
|
||||
|
||||
OpParameter *op_parameter() const { return op_parameter_; }
|
||||
std::string name() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return kernel_->name();
|
||||
}
|
||||
|
||||
std::string name() const { return this->name_; }
|
||||
void set_name(const std::string &name) {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
kernel_->set_name(name);
|
||||
}
|
||||
|
||||
virtual int Train() {
|
||||
this->train_mode_ = true;
|
||||
return mindspore::lite::RET_OK;
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<InnerKernel *>(kernel_)->Train();
|
||||
}
|
||||
|
||||
virtual bool IsTrain() const { return this->train_mode_; }
|
||||
virtual bool IsTrain() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<InnerKernel *>(kernel_)->IsTrain();
|
||||
}
|
||||
|
||||
virtual int Eval() {
|
||||
this->train_mode_ = false;
|
||||
return mindspore::lite::RET_OK;
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<InnerKernel *>(kernel_)->Eval();
|
||||
}
|
||||
|
||||
virtual bool IsEval() const { return !this->train_mode_; }
|
||||
virtual bool IsEval() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<InnerKernel *>(kernel_)->IsEval();
|
||||
}
|
||||
|
||||
virtual void set_trainable(bool trainable = true) { this->trainable_ = trainable; }
|
||||
virtual void set_trainable(bool trainable = true) {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
static_cast<InnerKernel *>(kernel_)->set_trainable(trainable);
|
||||
}
|
||||
|
||||
virtual bool is_trainable() const { return this->trainable_; }
|
||||
|
||||
void set_name(const std::string &name) { this->name_ = name; }
|
||||
virtual bool is_trainable() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<InnerKernel *>(kernel_)->is_trainable();
|
||||
}
|
||||
|
||||
void set_is_model_output(bool is_model_output) { this->is_model_output_ = is_model_output; }
|
||||
|
||||
bool is_model_output() const { return this->is_model_output_; }
|
||||
|
||||
schema::PrimitiveType Type() const {
|
||||
return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_)
|
||||
: schema::PrimitiveType_NONE;
|
||||
bool InferShapeDone() const {
|
||||
auto shape = out_tensors().front()->shape();
|
||||
if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string type_str() const { return schema::EnumNamePrimitiveType(this->Type()); }
|
||||
schema::PrimitiveType type() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return kernel_->type();
|
||||
}
|
||||
|
||||
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
|
||||
std::string type_str() const { return schema::EnumNamePrimitiveType(this->type()); }
|
||||
|
||||
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
|
||||
void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
if (desc_.provider == kBuiltin) {
|
||||
static_cast<InnerKernel *>(kernel_)->set_in_tensors(in_tensors);
|
||||
} else {
|
||||
std::vector<mindspore::tensor::MSTensor *> ms_tensors(in_tensors.begin(), in_tensors.end());
|
||||
kernel_->set_inputs(ms_tensors);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<lite::Tensor *> &in_tensors() const { return this->in_tensors_; }
|
||||
void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
if (desc_.provider == kBuiltin) {
|
||||
static_cast<InnerKernel *>(kernel_)->set_out_tensors(out_tensors);
|
||||
} else {
|
||||
std::vector<mindspore::tensor::MSTensor *> ms_tensors(out_tensors.begin(), out_tensors.end());
|
||||
kernel_->set_outputs(ms_tensors);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<lite::Tensor *> &out_tensors() const { return this->out_tensors_; }
|
||||
const std::vector<lite::Tensor *> &in_tensors() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return static_cast<InnerKernel *>(kernel_)->in_tensors();
|
||||
} else {
|
||||
auto &ms_tensors = kernel_->inputs();
|
||||
mutable_in_tensors_.resize(ms_tensors.size());
|
||||
(void)std::transform(ms_tensors.begin(), ms_tensors.end(), mutable_in_tensors_.begin(),
|
||||
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
|
||||
|
||||
return mutable_in_tensors_;
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<lite::Tensor *> &out_tensors() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return static_cast<InnerKernel *>(kernel_)->out_tensors();
|
||||
} else {
|
||||
auto &ms_tensors = kernel_->outputs();
|
||||
mutable_out_tensors_.resize(ms_tensors.size());
|
||||
(void)std::transform(ms_tensors.begin(), ms_tensors.end(), mutable_out_tensors_.begin(),
|
||||
[](mindspore::tensor::MSTensor *tensor) { return static_cast<lite::Tensor *>(tensor); });
|
||||
return mutable_out_tensors_;
|
||||
}
|
||||
}
|
||||
|
||||
void AddInKernel(LiteKernel *kernel) {
|
||||
if (!lite::IsContain(this->in_kernels_, kernel)) {
|
||||
|
@ -167,63 +266,41 @@ class LiteKernel {
|
|||
|
||||
virtual void InitOutTensorInitRefCount();
|
||||
|
||||
virtual int FreeInWorkTensor() const;
|
||||
|
||||
KernelKey desc() const { return desc_; }
|
||||
|
||||
void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; }
|
||||
|
||||
SubGraphType subgraph_type() const { return this->subgraph_type_; }
|
||||
|
||||
const lite::Context *context() const { return this->context_; }
|
||||
const lite::InnerContext *Context() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return static_cast<const lite::InnerContext *>(kernel_->context());
|
||||
}
|
||||
|
||||
virtual std::string ToString() const;
|
||||
|
||||
#ifdef SUPPORT_TRAIN
|
||||
void set_workspace_size(size_t value) { workspace_size_ = value; }
|
||||
size_t workspace_size() { return workspace_size_; }
|
||||
static void AllocWorkspace(size_t size);
|
||||
static void FreeWorkspace();
|
||||
void *workspace() { return workspace_; }
|
||||
int DecOutTensorRefCount();
|
||||
#endif
|
||||
|
||||
bool InferShapeDone() const {
|
||||
auto shape = out_tensors_.front()->shape();
|
||||
if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
Kernel *kernel() { return kernel_; }
|
||||
|
||||
protected:
|
||||
KernelKey desc_{};
|
||||
std::string name_;
|
||||
OpParameter *op_parameter_ = nullptr;
|
||||
Kernel *kernel_ = nullptr;
|
||||
KernelKey desc_;
|
||||
// tensor will free in ~lite_session()
|
||||
std::vector<lite::Tensor *> in_tensors_;
|
||||
std::vector<lite::Tensor *> out_tensors_;
|
||||
const lite::Context *context_ = nullptr;
|
||||
std::vector<LiteKernel *> in_kernels_;
|
||||
std::vector<LiteKernel *> out_kernels_;
|
||||
bool train_mode_ = false;
|
||||
bool trainable_ = false; // parameters of this Kernel are trained in Train Session
|
||||
mutable std::vector<lite::Tensor *> mutable_in_tensors_;
|
||||
mutable std::vector<lite::Tensor *> mutable_out_tensors_;
|
||||
bool is_model_output_ = false;
|
||||
SubGraphType subgraph_type_ = kNotSubGraph;
|
||||
#ifdef SUPPORT_TRAIN
|
||||
size_t workspace_size_ = 0;
|
||||
static void *workspace_;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::Context *ctx, const KernelKey &desc);
|
||||
typedef InnerKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::Context *ctx, const KernelKey &desc);
|
||||
|
||||
template <class T>
|
||||
kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
kernel::InnerKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *parameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
auto *kernel = new (std::nothrow) T(parameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel: " << parameter->name_ << "is nullptr.";
|
||||
|
@ -241,4 +318,4 @@ kernel::LiteKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
|||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_LITE_KERNEL_H_
|
||||
#endif // MINDSPORE_LITE_SRC_INNER_KERNEL_H_
|
||||
|
|
|
@ -202,9 +202,9 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
|
|||
return false;
|
||||
}
|
||||
for (auto &node : subgraph_kernel->nodes()) {
|
||||
if (node->Type() == schema::PrimitiveType_Switch &&
|
||||
if (node->type() == schema::PrimitiveType_Switch &&
|
||||
InputsContainsSpecificNode(node, schema::PrimitiveType_PartialFusion) && node->out_kernels().size() == 1 &&
|
||||
node->out_kernels().front()->Type() == schema::PrimitiveType_Call) {
|
||||
node->out_kernels().front()->type() == schema::PrimitiveType_Call) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
|
|||
kernel::LiteKernel *LiteKernelUtil::GetInputsSpecificNode(const kernel::LiteKernel *kernel,
|
||||
const schema::PrimitiveType &primitive_type) {
|
||||
for (auto input : kernel->in_kernels()) {
|
||||
if (input->Type() == primitive_type) {
|
||||
if (input->type() == primitive_type) {
|
||||
return input;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,8 +16,9 @@
|
|||
|
||||
#ifndef MINDSPORE_LITE_SRC_LITE_KERNEL_UTIL_H_
|
||||
#define MINDSPORE_LITE_SRC_LITE_KERNEL_UTIL_H_
|
||||
#include "src/lite_kernel.h"
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
class LiteKernelUtil {
|
||||
|
|
|
@ -59,7 +59,7 @@ int LiteOpActor::CompileArrowThroughPartialCall() {
|
|||
return RET_OK;
|
||||
}
|
||||
for (auto &node : subgraph_kernel->nodes()) {
|
||||
if (node->Type() != schema::PrimitiveType_Call) {
|
||||
if (node->type() != schema::PrimitiveType_Call) {
|
||||
continue;
|
||||
}
|
||||
call_node_ = node;
|
||||
|
@ -290,7 +290,7 @@ int LiteSwitchOpActor::CompileFalseBranchArrow() {
|
|||
|
||||
int LiteSwitchOpActor::GetSwitchAndCallNode(kernel::SubGraphKernel *subgraph_kernel) {
|
||||
for (auto &node : subgraph_kernel->nodes()) {
|
||||
if (node->Type() != schema::PrimitiveType_Call) {
|
||||
if (node->type() != schema::PrimitiveType_Call) {
|
||||
continue;
|
||||
}
|
||||
call_node_ = node;
|
||||
|
|
|
@ -47,8 +47,8 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
return;
|
||||
}
|
||||
|
||||
CpuBindMode cpu_bind_mode = kernel_->context()->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_;
|
||||
BindThreads(static_cast<const lite::InnerContext *>(kernel_->context())->thread_pool_, true, cpu_bind_mode);
|
||||
CpuBindMode cpu_bind_mode = kernel_->Context()->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_;
|
||||
BindThreads(static_cast<const lite::InnerContext *>(kernel_->Context())->thread_pool_, true, cpu_bind_mode);
|
||||
|
||||
int ret = CheckInputData();
|
||||
if (ret != RET_OK) {
|
||||
|
@ -78,7 +78,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
inputs_data_.clear();
|
||||
AsyncOutput(context);
|
||||
|
||||
BindThreads(static_cast<const lite::InnerContext *>(kernel_->context())->thread_pool_, false, cpu_bind_mode);
|
||||
BindThreads(static_cast<const lite::InnerContext *>(kernel_->Context())->thread_pool_, false, cpu_bind_mode);
|
||||
SetOutputData(context);
|
||||
|
||||
for (auto &input_data : inputs_data_) {
|
||||
|
@ -101,22 +101,11 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
}
|
||||
virtual int CompileArrow();
|
||||
int RunKernel(const KernelCallBack &before, const KernelCallBack &after) {
|
||||
int ret = kernel_->PreProcess();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel_->name();
|
||||
return ret;
|
||||
}
|
||||
ret = kernel_->Run(before, after);
|
||||
auto ret = kernel_->Execute(before, after);
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
|
||||
return ret;
|
||||
}
|
||||
ret = kernel_->PostProcess();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel_->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -366,7 +366,7 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
|
|||
for (auto *kernel : kernels) {
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
if (kernel->subgraph_type() == kernel::kNotSubGraph) {
|
||||
if (!IsPackedOp(kernel->Type())) {
|
||||
if (!IsPackedOp(kernel->type())) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -85,7 +85,7 @@ int MindrtExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vect
|
|||
const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator,
|
||||
const KernelCallBack &before, const KernelCallBack &after) {
|
||||
MS_ASSERT(nullptr != allocator);
|
||||
if (kernels.front()->Type() != schema::PrimitiveType_Merge) {
|
||||
if (kernels.front()->type() != schema::PrimitiveType_Merge) {
|
||||
auto ret = CheckTensorsInvalid(in_tensors);
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "CheckInputs failed";
|
||||
|
|
|
@ -19,13 +19,14 @@
|
|||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "schema/ops_generated.h"
|
||||
#include "src/lite_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
typedef kernel::LiteKernel *(*CreateKernel)(const std::vector<tensor::MSTensor *> &inputs,
|
||||
const std::vector<tensor::MSTensor *> &outputs,
|
||||
const schema::Primitive *primitive, const lite::Context *ctx);
|
||||
typedef kernel::Kernel *(*CreateKernel)(const std::vector<tensor::MSTensor *> &inputs,
|
||||
const std::vector<tensor::MSTensor *> &outputs,
|
||||
const schema::Primitive *primitive, const lite::Context *ctx);
|
||||
class RegisterKernel {
|
||||
public:
|
||||
static RegisterKernel *GetInstance();
|
||||
|
|
|
@ -339,7 +339,7 @@ int NPUFusionPass::Run() {
|
|||
for (size_t i = 0; i < kernels->size(); i++) {
|
||||
auto kernel = (*kernels)[i];
|
||||
if (CheckFusion(kernel)) {
|
||||
switch (kernel->Type()) {
|
||||
switch (kernel->type()) {
|
||||
case schema::PrimitiveType_Split:
|
||||
i -= kernel->in_kernels().size();
|
||||
SplitFusion(kernel);
|
||||
|
|
|
@ -48,7 +48,7 @@ std::set<mindspore::schema::PrimitiveType> npu_insert_nodes = {
|
|||
|
||||
int NPUInsertTransformPass::GetInsertState(kernel::LiteKernel *kernel) {
|
||||
// filter out irrelevant kernel
|
||||
if (npu_insert_nodes.find(kernel->Type()) == npu_insert_nodes.end()) {
|
||||
if (npu_insert_nodes.find(kernel->type()) == npu_insert_nodes.end()) {
|
||||
return InsertNone;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,18 +46,24 @@ kernel::LiteKernel *NPUPassUtils::CreateNchw2NhwcKernel(const std::vector<Tensor
|
|||
transpose_param->perm_[3] = 1;
|
||||
transpose_param->num_axes_ = 4;
|
||||
|
||||
auto kernel = new (std::nothrow)
|
||||
auto inner_kernel = new (std::nothrow)
|
||||
kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param), in_tensors, out_tensors, ctx);
|
||||
if (kernel != nullptr) {
|
||||
kernel->set_desc(key);
|
||||
|
||||
if (inner_kernel != nullptr) {
|
||||
auto *kernel = new (std::nothrow) kernel::LiteKernel(inner_kernel);
|
||||
if (kernel != nullptr) {
|
||||
kernel->set_desc(key);
|
||||
kernel->set_name(name);
|
||||
return kernel;
|
||||
} else {
|
||||
free(transpose_param);
|
||||
delete inner_kernel;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "New Nchw2Nhwc Kernel failed.";
|
||||
free(transpose_param);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
kernel->set_name(name);
|
||||
return kernel;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor *> &in_tensors,
|
||||
|
@ -77,17 +83,24 @@ kernel::LiteKernel *NPUPassUtils::CreateNhwc2NchwKernel(const std::vector<Tensor
|
|||
transpose_param->perm_[3] = 2;
|
||||
transpose_param->num_axes_ = 4;
|
||||
|
||||
auto kernel = new (std::nothrow)
|
||||
auto inner_kernel = new (std::nothrow)
|
||||
kernel::TransposeCPUKernel(reinterpret_cast<OpParameter *>(transpose_param), in_tensors, out_tensors, ctx);
|
||||
if (kernel != nullptr) {
|
||||
kernel->set_desc(key);
|
||||
|
||||
if (inner_kernel != nullptr) {
|
||||
auto *kernel = new (std::nothrow) kernel::LiteKernel(inner_kernel);
|
||||
if (kernel != nullptr) {
|
||||
kernel->set_desc(key);
|
||||
kernel->set_name(name);
|
||||
return kernel;
|
||||
} else {
|
||||
free(transpose_param);
|
||||
delete inner_kernel;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "New Nhwc2Nchw Kernel failed.";
|
||||
return nullptr;
|
||||
free(transpose_param);
|
||||
}
|
||||
|
||||
kernel->set_name(name);
|
||||
return kernel;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void NPUPassUtils::UpdateKernel(kernel::LiteKernel *kernel, const std::vector<kernel::LiteKernel *> &in_kernels,
|
||||
|
@ -187,7 +200,7 @@ bool NPUPassUtils::IsNhwc2Nchw(const kernel::LiteKernel *kernel) {
|
|||
if (kernel == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (kernel->Type() != schema::PrimitiveType_Transpose) {
|
||||
if (kernel->type() != schema::PrimitiveType_Transpose) {
|
||||
return false;
|
||||
}
|
||||
auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter());
|
||||
|
@ -207,7 +220,7 @@ bool NPUPassUtils::IsNchw2Nhwc(const kernel::LiteKernel *kernel) {
|
|||
if (kernel == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (kernel->Type() != schema::PrimitiveType_Transpose) {
|
||||
if (kernel->type() != schema::PrimitiveType_Transpose) {
|
||||
return false;
|
||||
}
|
||||
auto parameter = reinterpret_cast<TransposeParameter *>(kernel->op_parameter());
|
||||
|
|
|
@ -36,7 +36,7 @@ int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<ker
|
|||
MS_LOG(ERROR) << "NPU Transform pass does not find in kernel with 4d output";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (is_input_kernel || (*it)->desc().arch != kNPU || npu_trans_nodes.find((*it)->Type()) == npu_trans_nodes.end()) {
|
||||
if (is_input_kernel || (*it)->desc().arch != kNPU || npu_trans_nodes.find((*it)->type()) == npu_trans_nodes.end()) {
|
||||
kernel::LiteKernel *pre_kernel = nullptr;
|
||||
if (!is_input_kernel) {
|
||||
pre_kernel = *it;
|
||||
|
@ -95,7 +95,7 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke
|
|||
std::vector<kernel::LiteKernel *> post_non_insert_kernels;
|
||||
for (int i = 0; i < kernel->out_kernels().size(); i++) {
|
||||
auto post_kernel = kernel->out_kernels()[i];
|
||||
if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->Type()) == npu_trans_nodes.end()) {
|
||||
if (post_kernel->desc().arch != kNPU || npu_trans_nodes.find(post_kernel->type()) == npu_trans_nodes.end()) {
|
||||
post_insert_kernels.push_back(post_kernel);
|
||||
} else {
|
||||
post_non_insert_kernels.push_back(post_kernel);
|
||||
|
@ -186,15 +186,15 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke
|
|||
int NPUTransformPass::Run() {
|
||||
for (size_t i = 0; i < all_kernels_->size();) {
|
||||
auto kernel = (*all_kernels_)[i];
|
||||
if (kernel->desc().arch != kNPU || npu_trans_nodes.find(kernel->Type()) == npu_trans_nodes.end()) {
|
||||
if (kernel->desc().arch != kNPU || npu_trans_nodes.find(kernel->type()) == npu_trans_nodes.end()) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (kernel->Type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) {
|
||||
if (kernel->type() == schema::PrimitiveType_ScaleFusion && !NPUPassUtils::Scale4dCase(kernel)) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (kernel->Type() == schema::PrimitiveType_Resize &&
|
||||
if (kernel->type() == schema::PrimitiveType_Resize &&
|
||||
kernel->in_tensors()[0]->Height() > kernel->out_tensors()[0]->Height()) {
|
||||
i++;
|
||||
continue;
|
||||
|
|
|
@ -88,9 +88,9 @@ std::shared_ptr<domi::ModelBufferData> SubGraphNpuKernel::BuildIRModel() {
|
|||
return om_model_buff;
|
||||
}
|
||||
|
||||
int SubGraphNpuKernel::Run() {
|
||||
int SubGraphNpuKernel::Execute() {
|
||||
return reinterpret_cast<lite::NPUExecutor *>(this->executor_)
|
||||
->Run(in_tensors_, out_tensor_sorted_, in_nodes_, nodes_);
|
||||
->Run(in_tensors(), out_tensor_sorted_, in_nodes_, nodes_);
|
||||
}
|
||||
|
||||
int SubGraphNpuKernel::BuildNPUInputOp() {
|
||||
|
@ -120,7 +120,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
// input come from npu
|
||||
auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
|
||||
auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel->kernel())->GetNPUOp();
|
||||
if (npu_op == nullptr) {
|
||||
MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr.";
|
||||
return RET_ERROR;
|
||||
|
@ -138,7 +138,7 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
|||
|
||||
// weight tensor
|
||||
if (is_weight_tensor) {
|
||||
if (npu_specific_weight_nodes.find(node->Type()) == npu_specific_weight_nodes.end()) {
|
||||
if (npu_specific_weight_nodes.find(node->type()) == npu_specific_weight_nodes.end()) {
|
||||
auto name = node->name() + "_" + std::to_string(count++);
|
||||
auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
|
||||
if (weight_const == nullptr) {
|
||||
|
@ -153,8 +153,8 @@ int SubGraphNpuKernel::BuildNPUInputOp() {
|
|||
}
|
||||
}
|
||||
// set input to NPU
|
||||
int ret = reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op,
|
||||
index2_multi_out_index);
|
||||
int ret = reinterpret_cast<NPUKernel *>(node->kernel())
|
||||
->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op, index2_multi_out_index);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << node->name() << " set npu inputs failed.";
|
||||
return RET_ERROR;
|
||||
|
@ -170,7 +170,7 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
|
|||
std::vector<ge::Operator> ops;
|
||||
ops.reserve(nodes.size());
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
|
||||
ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i]->kernel())->GetNPUOp());
|
||||
}
|
||||
return ops;
|
||||
}
|
||||
|
@ -178,11 +178,12 @@ std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::Li
|
|||
int SubGraphNpuKernel::BuildNPUOutputOp() {
|
||||
subgraph_output_op_.clear();
|
||||
subgraph_output_op_ = GetNPUNodes(out_nodes_);
|
||||
out_tensor_sorted_.resize(out_tensors_.size());
|
||||
out_tensor_sorted_.resize(out_tensors().size());
|
||||
int i = 0;
|
||||
auto out_tensors = this->out_tensors();
|
||||
for (auto node : out_nodes_) {
|
||||
for (auto tensor : node->out_tensors()) {
|
||||
if (std::find(out_tensors_.begin(), out_tensors_.end(), tensor) != out_tensors_.end())
|
||||
if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end())
|
||||
this->out_tensor_sorted_[i++] = tensor;
|
||||
}
|
||||
}
|
||||
|
@ -193,11 +194,11 @@ int SubGraphNpuKernel::BuildNPUOutputOp() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + ".om"; }
|
||||
std::string SubGraphNpuKernel::GetOMModelName() { return this->name() + ".om"; }
|
||||
|
||||
int SubGraphNpuKernel::Init() {
|
||||
if (!is_compiled_) {
|
||||
name_ = "kNpuSubGraph" + std::to_string(npu_manager_->index());
|
||||
this->set_name("kNpuSubGraph" + std::to_string(npu_manager_->index()));
|
||||
auto model_buffer_data = BuildIRModel();
|
||||
if (model_buffer_data == nullptr) {
|
||||
MS_LOG(ERROR) << "Build IR model failed.";
|
||||
|
@ -206,7 +207,7 @@ int SubGraphNpuKernel::Init() {
|
|||
|
||||
MS_ASSERT(npu_manager_ != nullptr);
|
||||
|
||||
int frequency = static_cast<const lite::InnerContext *>(context_)->GetNpuInfo().frequency_;
|
||||
int frequency = static_cast<const lite::InnerContext *>(this->Context())->GetNpuInfo().frequency_;
|
||||
if (frequency != hiai::AiModelDescription_Frequency_LOW && frequency != hiai::AiModelDescription_Frequency_MEDIUM &&
|
||||
frequency != hiai::AiModelDescription_Frequency_HIGH &&
|
||||
frequency != hiai::AiModelDescription_Frequency_EXTREME) {
|
||||
|
@ -226,7 +227,7 @@ int SubGraphNpuKernel::Init() {
|
|||
}
|
||||
|
||||
int SubGraphNpuKernel::Prepare() {
|
||||
if (executor_->Prepare(nodes_, in_tensors_, out_tensors_) != RET_OK) {
|
||||
if (executor_->Prepare(nodes_, in_tensors(), out_tensors()) != RET_OK) {
|
||||
MS_LOG(ERROR) << "NPU executor prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -32,11 +32,10 @@ using mindspore::lite::RET_ERROR;
|
|||
using mindspore::lite::RET_OK;
|
||||
class SubGraphNpuKernel : public SubGraphKernel {
|
||||
public:
|
||||
SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
const std::vector<kernel::LiteKernel *> &inKernels,
|
||||
SubGraphNpuKernel(const std::vector<kernel::LiteKernel *> &inKernels,
|
||||
const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes,
|
||||
const lite::InnerContext *ctx = nullptr, lite::NPUManager *npu_manager = nullptr)
|
||||
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx), npu_manager_(npu_manager) {
|
||||
Kernel *kernel, lite::NPUManager *npu_manager = nullptr)
|
||||
: SubGraphKernel(inKernels, outKernels, nodes, kernel), npu_manager_(npu_manager) {
|
||||
subgraph_type_ = kNpuSubGraph;
|
||||
desc_.arch = kernel::KERNEL_ARCH::kNPU;
|
||||
}
|
||||
|
@ -47,13 +46,9 @@ class SubGraphNpuKernel : public SubGraphKernel {
|
|||
|
||||
int Prepare() override;
|
||||
|
||||
int PreProcess() override { return RET_OK; }
|
||||
int Execute() override;
|
||||
|
||||
int Run() override;
|
||||
|
||||
int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }
|
||||
|
||||
int PostProcess() override { return RET_OK; }
|
||||
int Execute(const KernelCallBack &before, const KernelCallBack &after) override { return this->Execute(); }
|
||||
|
||||
int ReSize() override {
|
||||
MS_LOG(ERROR) << "NPU does not support the resize function temporarily.";
|
||||
|
|
|
@ -49,18 +49,7 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
|
|||
MS_LOG(ERROR) << "run kernel before_callback failed, name: " << kernel->name();
|
||||
}
|
||||
}
|
||||
auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel);
|
||||
ret = kernel->PreProcess();
|
||||
if (RET_OK != ret) {
|
||||
if (is_tune) {
|
||||
MS_LOG(WARNING) << "PreProcess kernel failed, name: " << kernel->name() << " in tuning";
|
||||
opencl_runtime_ins->SetProfiling(profiling_tmp);
|
||||
return RET_OK;
|
||||
} else {
|
||||
MS_LOG(ERROR) << "PreProcess kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
auto *op_kernel = reinterpret_cast<kernel::OpenCLKernel *>(kernel->kernel());
|
||||
// Support ZeroShape
|
||||
size_t zero_shape_num = 0;
|
||||
for (auto tensor : kernel->out_tensors()) {
|
||||
|
@ -79,7 +68,7 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
|
|||
return ret;
|
||||
}
|
||||
} else {
|
||||
ret = kernel->Run();
|
||||
ret = kernel->Execute();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
|
@ -92,11 +81,6 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
|
|||
}
|
||||
}
|
||||
}
|
||||
ret = kernel->PostProcess();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
if (after != nullptr) {
|
||||
if (!after(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) {
|
||||
MS_LOG(ERROR) << "run kernel after_callback failed, name: " << kernel->name();
|
||||
|
|
|
@ -51,7 +51,7 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
|
||||
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *parameter) {
|
||||
std::vector<TensorC *> in_tensors;
|
||||
std::vector<TensorC *> out_tensors;
|
||||
|
@ -84,7 +84,7 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite
|
|||
}
|
||||
if (reinterpret_cast<TensorListC *>(out_tensors.at(i))->data_type_ == TypeIdC::kObjectTypeTensorType) {
|
||||
auto *tensor_list_c = reinterpret_cast<TensorListC *>(out_tensors.at(i));
|
||||
auto *tensor_list = reinterpret_cast<TensorList *>(outputs->at(i));
|
||||
auto *tensor_list = reinterpret_cast<TensorList *>(outputs.at(i));
|
||||
tensor_list->set_shape({static_cast<int>(tensor_list_c->element_num_)});
|
||||
auto tensor_shape = std::vector<std::vector<int>>(
|
||||
tensor_list_c->element_num_,
|
||||
|
@ -93,10 +93,10 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite
|
|||
tensor_list->MallocTensorListData(static_cast<TypeId>(tensor_list_c->data_type_), tensor_shape);
|
||||
TensorListC2TensorList(tensor_list_c, tensor_list);
|
||||
} else {
|
||||
TensorC2Tensor(out_tensors.at(i), outputs->at(i));
|
||||
TensorC2Tensor(out_tensors.at(i), outputs.at(i));
|
||||
}
|
||||
if (ret == NNACL_INFER_INVALID) {
|
||||
outputs->at(i)->set_shape({-1});
|
||||
outputs.at(i)->set_shape({-1});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "nnacl/infer/infer.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, std::vector<lite::Tensor *> *outputs,
|
||||
int KernelInferShape(const std::vector<lite::Tensor *> &tensors_in, const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *parameter);
|
||||
int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
|
||||
const void *primitive);
|
||||
|
|
|
@ -23,14 +23,14 @@
|
|||
#include "nnacl/fp16/arg_min_max_fp16.h"
|
||||
#endif
|
||||
#include "nnacl/common_func.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ArgMinMaxCPUKernel : public LiteKernel {
|
||||
class ArgMinMaxCPUKernel : public InnerKernel {
|
||||
public:
|
||||
ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
arg_param_ = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,14 +17,14 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_ASSERT_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class AssertCPUKernel : public LiteKernel {
|
||||
class AssertCPUKernel : public InnerKernel {
|
||||
public:
|
||||
AssertCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~AssertCPUKernel() override {}
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
|
||||
// this file is useless when move create actor before schedule.
|
||||
namespace mindspore::kernel {
|
||||
class CallCPUKernel : public LiteKernel {
|
||||
class CallCPUKernel : public InnerKernel {
|
||||
public:
|
||||
CallCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~CallCPUKernel() override = default;
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CARRY_DATA_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensor.h"
|
||||
#include "src/tensorlist.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class CarryDataKernel : public LiteKernel {
|
||||
class CarryDataKernel : public InnerKernel {
|
||||
public:
|
||||
CarryDataKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~CarryDataKernel() override = default;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -18,18 +18,18 @@
|
|||
|
||||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/constant_of_shape_parameter.h"
|
||||
#include "nnacl/fp32/constant_of_shape_fp32.h"
|
||||
#include "nnacl/fp16/constant_of_shape_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConstantOfShapeCPUKernel : public LiteKernel {
|
||||
class ConstantOfShapeCPUKernel : public InnerKernel {
|
||||
public:
|
||||
ConstantOfShapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<ConstantOfShapeParameter *>(parameter);
|
||||
}
|
||||
~ConstantOfShapeCPUKernel() override = default;
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
#include <android/log.h>
|
||||
#endif
|
||||
#endif
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "src/runtime/kernel/arm/base/layout_transform.h"
|
||||
#include "src/weight_decoder.h"
|
||||
|
@ -35,11 +35,11 @@
|
|||
using mindspore::lite::InnerContext;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionBaseCPUKernel : public LiteKernel {
|
||||
class ConvolutionBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
op_parameter_->thread_num_ = ctx->thread_num_;
|
||||
conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
|
||||
}
|
||||
|
|
|
@ -18,15 +18,15 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CROP_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class CropBaseCPUKernel : public LiteKernel {
|
||||
class CropBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const mindspore::lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_);
|
||||
crop_para_->thread_count_ = op_parameter_->thread_num_;
|
||||
}
|
||||
|
|
|
@ -18,18 +18,18 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_DEPTH_TO_SPACE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/nnacl_common.h"
|
||||
#include "nnacl/depth_to_space_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class DepthToSpaceBaseCPUKernel : public LiteKernel {
|
||||
class DepthToSpaceBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<DepthToSpaceParameter *>(op_parameter_);
|
||||
}
|
||||
virtual ~DepthToSpaceBaseCPUKernel() = default;
|
||||
|
|
|
@ -132,16 +132,6 @@ void DetectionPostProcessBaseCPUKernel::FreeAllocatedBuffer() {
|
|||
context_->allocator->Free(params_->selected_);
|
||||
params_->selected_ = nullptr;
|
||||
}
|
||||
if (desc_.data_type == kNumberTypeInt8) {
|
||||
if (input_boxes_ != nullptr) {
|
||||
context_->allocator->Free(input_boxes_);
|
||||
input_boxes_ = nullptr;
|
||||
}
|
||||
if (input_scores_ != nullptr) {
|
||||
context_->allocator->Free(input_scores_);
|
||||
input_scores_ = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int DetectionPostProcessBaseCPUKernel::ParamInit() {
|
||||
|
|
|
@ -18,18 +18,18 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/fp32/detection_post_process_fp32.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class DetectionPostProcessBaseCPUKernel : public LiteKernel {
|
||||
class DetectionPostProcessBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
DetectionPostProcessBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {
|
||||
params_ = reinterpret_cast<DetectionPostProcessParameter *>(parameter);
|
||||
}
|
||||
virtual ~DetectionPostProcessBaseCPUKernel();
|
||||
|
@ -37,6 +37,7 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel {
|
|||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
virtual void FreeAllocatedBuffer();
|
||||
|
||||
int thread_num_ = 1;
|
||||
int num_boxes_ = 0;
|
||||
|
@ -48,9 +49,6 @@ class DetectionPostProcessBaseCPUKernel : public LiteKernel {
|
|||
protected:
|
||||
virtual int GetInputData() = 0;
|
||||
int ParamInit();
|
||||
|
||||
private:
|
||||
void FreeAllocatedBuffer();
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
|
||||
|
|
|
@ -75,7 +75,7 @@ void GroupConvolutionBaseCPUKernel::FreeSubKernel() {
|
|||
|
||||
int GroupConvolutionBaseCPUKernel::PreProcess() {
|
||||
if (!InferShapeDone()) {
|
||||
auto ret = lite::KernelInferShape(in_tensors_, &out_tensors_, op_parameter_);
|
||||
auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "InferShape fail!";
|
||||
return ret;
|
||||
|
@ -118,7 +118,7 @@ int GroupConvolutionBaseCPUKernel::PreProcess() {
|
|||
}
|
||||
}
|
||||
|
||||
auto outputs = this->out_tensors();
|
||||
auto outputs = this->out_tensors_;
|
||||
for (auto *output : outputs) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
auto ret = output->MallocData();
|
||||
|
@ -132,8 +132,8 @@ int GroupConvolutionBaseCPUKernel::PreProcess() {
|
|||
}
|
||||
|
||||
int GroupConvolutionBaseCPUKernel::Run() {
|
||||
ori_in_data_ = in_tensors().front()->data_c();
|
||||
ori_out_data_ = out_tensors().front()->data_c();
|
||||
ori_in_data_ = in_tensors_[0]->data_c();
|
||||
ori_out_data_ = out_tensors_[0]->data_c();
|
||||
for (int i = 0; i < group_num_; ++i) {
|
||||
// first, separate group conv input into several parts. This step must be in runtime stage.
|
||||
auto ret = SeparateInput(i);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "nnacl/fp32/conv_common_fp32.h"
|
||||
|
@ -29,7 +29,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
public:
|
||||
GroupConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
std::vector<kernel::LiteKernel *> group_convs, const int group_num)
|
||||
std::vector<kernel::InnerKernel *> group_convs, const int group_num)
|
||||
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
|
||||
group_convs_(std::move(group_convs)),
|
||||
group_num_(group_num) {} // opParameter(in channel, out channel) in this kernel has been split to groups, if
|
||||
|
@ -45,7 +45,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel {
|
|||
void FreeSubKernel();
|
||||
|
||||
protected:
|
||||
std::vector<kernel::LiteKernel *> group_convs_;
|
||||
std::vector<kernel::InnerKernel *> group_convs_;
|
||||
const int group_num_;
|
||||
void *ori_in_data_ = nullptr; // do not free
|
||||
void *ori_out_data_ = nullptr; // do not free
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
@ -48,7 +48,7 @@ class GroupConvCreator {
|
|||
|
||||
public:
|
||||
void SetShapeOfTensors();
|
||||
std::vector<kernel::LiteKernel *> *get_group_conv() { return &group_convs_; }
|
||||
std::vector<kernel::InnerKernel *> *get_group_conv() { return &group_convs_; }
|
||||
void CopyQuantParam(std::vector<lite::Tensor *> *tensors);
|
||||
int GetSingleConvParam(ConvParameter *conv_param, std::vector<lite::Tensor *> *new_inputs,
|
||||
std::vector<lite::Tensor *> *new_outputs, int group_id);
|
||||
|
@ -66,7 +66,7 @@ class GroupConvCreator {
|
|||
private:
|
||||
std::vector<lite::Tensor *> origin_inputs_;
|
||||
std::vector<lite::Tensor *> origin_outputs_;
|
||||
std::vector<kernel::LiteKernel *> group_convs_;
|
||||
std::vector<kernel::InnerKernel *> group_convs_;
|
||||
std::vector<int> input_shape_;
|
||||
std::vector<int> output_shape_;
|
||||
std::vector<int> filter_shape_;
|
||||
|
|
|
@ -75,7 +75,7 @@ int MergeCPUKernel::ReSize() { return RET_OK; }
|
|||
InputPart MergeCPUKernel::FindReadyPart(const std::vector<lite::Tensor *> &scope_tensors) {
|
||||
MS_ASSERT(in_tensors_.size() == 2 * out_tensors_.size());
|
||||
bool is_root_tensor_ready =
|
||||
std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *in_tensor) {
|
||||
std::all_of(this->in_tensors_.begin(), this->in_tensors_.end(), [&](lite::Tensor *in_tensor) {
|
||||
// if not in scope_tensors, not care
|
||||
if (!IsContain(scope_tensors, in_tensor)) {
|
||||
return true;
|
||||
|
@ -94,12 +94,12 @@ InputPart MergeCPUKernel::FindReadyPart(const std::vector<lite::Tensor *> &scope
|
|||
// if not in scope_tensors, not care
|
||||
// if in scope_tensors, in_tensor need to be ready
|
||||
if (std::all_of(
|
||||
this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(),
|
||||
this->in_tensors_.begin() + in_tensors().size() / 2, this->in_tensors_.end(),
|
||||
[&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) {
|
||||
return RIGHT_INPUT_PART;
|
||||
}
|
||||
if (std::all_of(
|
||||
this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2,
|
||||
this->in_tensors_.begin(), this->in_tensors_.begin() + in_tensors().size() / 2,
|
||||
[&](lite::Tensor *in_tensor) { return !IsContain(scope_tensors, in_tensor) || in_tensor->IsReady(); })) {
|
||||
return LEFT_INPUT_PART;
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ class MergeCPUKernel : public CarryDataKernel {
|
|||
MergeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: CarryDataKernel(parameter, inputs, outputs, ctx) {}
|
||||
bool IsReady(const std::vector<lite::Tensor *> &scope_tensors) override;
|
||||
bool IsReady(const std::vector<lite::Tensor *> &scope_tensors);
|
||||
~MergeCPUKernel() override = default;
|
||||
int FreeInWorkTensor() const override;
|
||||
int Init() override;
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
|
||||
// this file is going to be removed when move create actor before schedule.
|
||||
namespace mindspore::kernel {
|
||||
class PartialFusionKernel : public LiteKernel {
|
||||
class PartialFusionKernel : public InnerKernel {
|
||||
public:
|
||||
PartialFusionKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~PartialFusionKernel() override = default;
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_POOLING_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/fp32/pooling_fp32.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
|
@ -26,11 +26,11 @@ using mindspore::lite::InnerContext;
|
|||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
namespace mindspore::kernel {
|
||||
class PoolingBaseCPUKernel : public LiteKernel {
|
||||
class PoolingBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
pooling_param_ = reinterpret_cast<PoolingParameter *>(op_parameter_);
|
||||
}
|
||||
~PoolingBaseCPUKernel() = default;
|
||||
|
|
|
@ -18,18 +18,18 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_PRIOR_BOX_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/reshape_parameter.h"
|
||||
#include "nnacl/fp32/prior_box_fp32.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class PriorBoxCPUKernel : public LiteKernel {
|
||||
class PriorBoxCPUKernel : public InnerKernel {
|
||||
public:
|
||||
PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(op_parameter_);
|
||||
}
|
||||
~PriorBoxCPUKernel() = default;
|
||||
|
|
|
@ -18,14 +18,14 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_QUANTDTYPECAST_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class QuantDTypeCastCPUKernel : public LiteKernel {
|
||||
class QuantDTypeCastCPUKernel : public InnerKernel {
|
||||
public:
|
||||
QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
|
||||
~QuantDTypeCastCPUKernel() = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RANDOM_STANDARD_NORMAL_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/random_parameter.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class RandomStandardNormalCPUKernel : public LiteKernel {
|
||||
class RandomStandardNormalCPUKernel : public InnerKernel {
|
||||
public:
|
||||
RandomStandardNormalCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<RandomParam *>(parameter);
|
||||
}
|
||||
~RandomStandardNormalCPUKernel() override = default;
|
||||
|
|
|
@ -18,16 +18,16 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_REDUCE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
#include "nnacl/reduce_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ReduceBaseCPUKernel : public LiteKernel {
|
||||
class ReduceBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
ReduceBaseCPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(param, inputs, outputs, ctx) {}
|
||||
: InnerKernel(param, inputs, outputs, ctx) {}
|
||||
virtual ~ReduceBaseCPUKernel() = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/context.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
namespace mindspore::kernel {
|
||||
class ReshapeBaseCPUKernel : public LiteKernel {
|
||||
class ReshapeBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~ReshapeBaseCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -17,18 +17,18 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESIZE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/resize_parameter.h"
|
||||
|
||||
using mindspore::schema::PrimitiveType_Resize;
|
||||
using mindspore::schema::ResizeMethod;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ResizeBaseCPUKernel : public LiteKernel {
|
||||
class ResizeBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
ResizeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
|
||||
~ResizeBaseCPUKernel() override = default;
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/base/carry_data.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SLICE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/slice_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class SliceCPUKernel : public LiteKernel {
|
||||
class SliceCPUKernel : public InnerKernel {
|
||||
public:
|
||||
SliceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<SliceParameter *>(op_parameter_);
|
||||
}
|
||||
~SliceCPUKernel() = default;
|
||||
|
|
|
@ -18,15 +18,15 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SOFTMAX_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/softmax_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class SoftmaxBaseCPUKernel : public LiteKernel {
|
||||
class SoftmaxBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(ctx->thread_num_) {
|
||||
softmax_param_ = reinterpret_cast<SoftmaxParameter *>(op_parameter_);
|
||||
}
|
||||
~SoftmaxBaseCPUKernel() = default;
|
||||
|
|
|
@ -20,16 +20,16 @@
|
|||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "include/context.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/split_parameter.h"
|
||||
#include "nnacl/base/split_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class SplitBaseCPUKernel : public LiteKernel {
|
||||
class SplitBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param = reinterpret_cast<SplitParameter *>(op_parameter_);
|
||||
}
|
||||
~SplitBaseCPUKernel() override {
|
||||
|
|
|
@ -25,11 +25,11 @@
|
|||
#include "nnacl/base/split_with_over_lap_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class SplitWithOverlapBaseCPUKernel : public LiteKernel {
|
||||
class SplitWithOverlapBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
SplitWithOverlapBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param = reinterpret_cast<SplitWithOverlapParameter *>(op_parameter_);
|
||||
}
|
||||
~SplitWithOverlapBaseCPUKernel() override = default;
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/stack_parameter.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
namespace mindspore::kernel {
|
||||
class StackBaseCPUKernel : public LiteKernel {
|
||||
class StackBaseCPUKernel : public InnerKernel {
|
||||
public:
|
||||
StackBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~StackBaseCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
|
||||
#include <vector>
|
||||
#include "nnacl/fp32/strided_slice_fp32.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class StridedSliceCPUKernel : public LiteKernel {
|
||||
class StridedSliceCPUKernel : public InnerKernel {
|
||||
public:
|
||||
StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<StridedSliceParameter *>(parameter);
|
||||
}
|
||||
~StridedSliceCPUKernel() override = default;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/base/carry_data.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTFROMTENSOR_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "nnacl/tensorlist_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TensorListFromTensorCPUKernel : public LiteKernel {
|
||||
class TensorListFromTensorCPUKernel : public InnerKernel {
|
||||
public:
|
||||
TensorListFromTensorCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx),
|
||||
: InnerKernel(parameter, inputs, outputs, ctx),
|
||||
dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
|
||||
~TensorListFromTensorCPUKernel() = default;
|
||||
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTGETITEM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "nnacl/tensorlist_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TensorListGetItemCPUKernel : public LiteKernel {
|
||||
class TensorListGetItemCPUKernel : public InnerKernel {
|
||||
public:
|
||||
TensorListGetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx),
|
||||
: InnerKernel(parameter, inputs, outputs, ctx),
|
||||
dtype_(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_) {}
|
||||
~TensorListGetItemCPUKernel() = default;
|
||||
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTRESERVE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "nnacl/tensorlist_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TensorListReserveCPUKernel : public LiteKernel {
|
||||
class TensorListReserveCPUKernel : public InnerKernel {
|
||||
public:
|
||||
TensorListReserveCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx),
|
||||
: InnerKernel(parameter, inputs, outputs, ctx),
|
||||
element_dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
|
||||
~TensorListReserveCPUKernel() = default;
|
||||
|
||||
|
|
|
@ -18,17 +18,17 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_TENSORLISTSETITEM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "nnacl/tensorlist_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TensorListSetItemCPUKernel : public LiteKernel {
|
||||
class TensorListSetItemCPUKernel : public InnerKernel {
|
||||
public:
|
||||
TensorListSetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~TensorListSetItemCPUKernel() = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -19,17 +19,17 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/tensorlist.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "nnacl/tensorlist_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TensorListStackCPUKernel : public LiteKernel {
|
||||
class TensorListStackCPUKernel : public InnerKernel {
|
||||
public:
|
||||
TensorListStackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx),
|
||||
: InnerKernel(parameter, inputs, outputs, ctx),
|
||||
num_element_(reinterpret_cast<TensorListParameter *>(parameter)->num_element_),
|
||||
dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
|
||||
~TensorListStackCPUKernel() = default;
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_TILE_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/base/tile_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class TileCPUKernel : public LiteKernel {
|
||||
class TileCPUKernel : public InnerKernel {
|
||||
public:
|
||||
TileCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~TileCPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -18,16 +18,16 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_ACTIVATION_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/fp32/activation_fp32.h"
|
||||
#include "nnacl/fp16/activation_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ActivationFp16CPUKernel : public LiteKernel {
|
||||
class ActivationFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
ActivationFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(param, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {
|
||||
: InnerKernel(param, inputs, outputs, ctx), thread_count_(ctx->thread_num_) {
|
||||
type_ = (reinterpret_cast<ActivationParameter *>(param))->type_;
|
||||
alpha_ = (float16_t)((reinterpret_cast<ActivationParameter *>(param))->alpha_);
|
||||
min_val_ = (reinterpret_cast<ActivationParameter *>(param))->min_val_;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_ARITHMETIC_COMPARE_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/fp16/arithmetic_fp16.h"
|
||||
#include "schema/model_generated.h"
|
||||
|
||||
|
@ -34,11 +34,11 @@ typedef struct {
|
|||
ArithmeticCompareOptFuncFp16 opt_func_;
|
||||
} ARITHMETIC_COMPARE_FUNC_INFO_FP16;
|
||||
|
||||
class ArithmeticCompareFP16CPUKernel : public LiteKernel {
|
||||
class ArithmeticCompareFP16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
ArithmeticCompareFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
|
||||
}
|
||||
~ArithmeticCompareFP16CPUKernel() = default;
|
||||
|
|
|
@ -17,15 +17,15 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BIASADD_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BIASADD_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/fp16/arithmetic_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class BiasAddCPUFp16Kernel : public LiteKernel {
|
||||
class BiasAddCPUFp16Kernel : public InnerKernel {
|
||||
public:
|
||||
BiasAddCPUFp16Kernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
|
||||
}
|
||||
~BiasAddCPUFp16Kernel() override;
|
||||
|
|
|
@ -17,17 +17,17 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CAST_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "nnacl/base/cast_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class CastFp16CPUKernel : public LiteKernel {
|
||||
class CastFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
CastFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
|
||||
~CastFp16CPUKernel() = default;
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_COMMON_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
float16_t *ConvertInputFp32toFp16(lite::Tensor *input, const lite::InnerContext *ctx);
|
||||
|
|
|
@ -23,16 +23,16 @@
|
|||
#include "nnacl/base/concat_base.h"
|
||||
#include "nnacl/concat_parameter.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/fp16/common_fp16.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
namespace mindspore::kernel {
|
||||
class ConcatFp16CPUKernel : public LiteKernel {
|
||||
class ConcatFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
ConcatFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
concat_param_ = reinterpret_cast<ConcatParameter *>(op_parameter_);
|
||||
}
|
||||
~ConcatFp16CPUKernel() = default;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
|
|
|
@ -110,12 +110,12 @@ int ConvolutionDelegateFP16CPUKernel::ReSize() {
|
|||
return fp16_conv_kernel_->ReSize();
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const InnerContext *ctx) {
|
||||
kernel::InnerKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const InnerContext *ctx) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
if (conv_param->input_channel_ < 32) {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(opParameter, inputs, outputs, ctx);
|
||||
} else {
|
||||
|
@ -129,14 +129,14 @@ kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *>
|
|||
return kernel;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias) {
|
||||
kernel::InnerKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias) {
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
|
||||
bool use_winograd = false;
|
||||
int out_unit;
|
||||
CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param);
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
|
||||
if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
|
||||
kernel = new (std::nothrow)
|
||||
|
@ -158,9 +158,9 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &i
|
|||
return kernel;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const InnerContext *ctx) {
|
||||
kernel::InnerKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *op_parameter, const InnerContext *ctx) {
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
|
||||
GroupConvCreator group_conv_creator(inputs, outputs, op_parameter, ctx, false, kNumberTypeFloat16);
|
||||
group_conv_creator.SetShapeOfTensors();
|
||||
|
@ -183,9 +183,9 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
|
|||
}
|
||||
|
||||
/* creator func */
|
||||
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
kernel::InnerKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
|
||||
|
||||
|
@ -199,7 +199,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &
|
|||
return nullptr;
|
||||
}
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
if (conv_param->group_ == 1) {
|
||||
kernel = new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(opParameter, inputs, outputs,
|
||||
static_cast<const lite::InnerContext *>(ctx));
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
|
@ -26,11 +26,11 @@
|
|||
#define BIAS_NEED_FREE 0b10
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionDelegateFP16CPUKernel : public LiteKernel {
|
||||
class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~ConvolutionDelegateFP16CPUKernel() override {
|
||||
FreeCopiedData();
|
||||
if (fp16_conv_kernel_ != nullptr) {
|
||||
|
@ -52,12 +52,12 @@ class ConvolutionDelegateFP16CPUKernel : public LiteKernel {
|
|||
uint8_t need_free_ = 0b00;
|
||||
void *origin_weight_ = nullptr;
|
||||
void *origin_bias_ = nullptr;
|
||||
kernel::LiteKernel *fp16_conv_kernel_ = nullptr;
|
||||
kernel::InnerKernel *fp16_conv_kernel_ = nullptr;
|
||||
};
|
||||
|
||||
kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias);
|
||||
kernel::InnerKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::InnerContext *ctx, void *origin_weight, void *origin_bias);
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DELEGATE_FP16_H_
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "nnacl/fp16/conv_depthwise_fp16.h"
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_DEPTHWISE_SW_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "nnacl/fp16/conv_depthwise_fp16.h"
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "nnacl/fp16/conv_fp16.h"
|
||||
#include "nnacl/fp16/winograd_utils_fp16.h"
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include "include/errorcode.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
#include "nnacl/fp16/crop_fp16.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/crop_base.h"
|
||||
#include "src/runtime/kernel/arm/fp16/common_fp16.h"
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_DEPTHWISE_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "nnacl/fp16/conv_depthwise_fp16.h"
|
||||
|
||||
|
|
|
@ -229,13 +229,13 @@ int DeConvolutionFp16CPUKernel::Run() {
|
|||
return error_code;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
kernel::InnerKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(op_parameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
|
||||
|
||||
kernel::LiteKernel *kernel = nullptr;
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
|
||||
if (conv_param->group_ == 1) {
|
||||
if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
|
||||
|
|
|
@ -56,7 +56,7 @@ int GatherFp16CPUKernel::ReSize() { return RET_OK; }
|
|||
|
||||
int GatherFp16CPUKernel::PreProcess() {
|
||||
if (!InferShapeDone()) {
|
||||
auto ret = lite::KernelInferShape(in_tensors_, &out_tensors_, op_parameter_);
|
||||
auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
|
||||
if (ret != 0) {
|
||||
MS_LOG(ERROR) << "InferShape fail!";
|
||||
return ret;
|
||||
|
@ -68,16 +68,15 @@ int GatherFp16CPUKernel::PreProcess() {
|
|||
}
|
||||
out_tensors_[0]->set_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
|
||||
for (auto *output : this->out_tensors()) {
|
||||
for (auto *output : out_tensors_) {
|
||||
MS_ASSERT(output != nullptr);
|
||||
auto ret = output->MallocData();
|
||||
if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
|
||||
MS_LOG(ERROR) << "The size of output tensor is too big";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = output->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MallocData failed";
|
||||
MS_LOG(ERROR) << "gather out tensor malloc data failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,16 +20,16 @@
|
|||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/gather_parameter.h"
|
||||
#include "nnacl/base/gather_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class GatherFp16CPUKernel : public LiteKernel {
|
||||
class GatherFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
GatherFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {}
|
||||
~GatherFp16CPUKernel() override;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -26,7 +26,7 @@ int GroupConvolutionFP16CPUKernel::SeparateInput(int group_id) {
|
|||
int in_plane = in_tensor->Height() * in_tensor->Width() * in_tensor->Batch();
|
||||
int sub_in_channel = conv_param_->input_channel_;
|
||||
int ori_in_channel = sub_in_channel * group_num_;
|
||||
auto sub_in_data = group_convs_.at(group_id)->in_tensors().front()->data_c();
|
||||
auto sub_in_data = static_cast<lite::Tensor *>(group_convs_.at(group_id)->in_tensors().front())->data_c();
|
||||
auto in_data_type = in_tensors_.front()->data_type();
|
||||
auto sub_in_data_type = group_convs_.at(group_id)->in_tensors().front()->data_type();
|
||||
if (in_data_type != sub_in_data_type) {
|
||||
|
@ -67,7 +67,8 @@ int GroupConvolutionFP16CPUKernel::PostConcat(int group_id) {
|
|||
int out_plane = out_tensor->Height() * out_tensor->Width() * out_tensor->Batch();
|
||||
int sub_out_channel = conv_param_->output_channel_;
|
||||
int ori_out_channel = sub_out_channel * group_num_;
|
||||
auto sub_out_data = reinterpret_cast<float16_t *>(group_convs_.at(group_id)->out_tensors().front()->data_c());
|
||||
auto sub_out_data = reinterpret_cast<float16_t *>(
|
||||
static_cast<lite::Tensor *>(group_convs_.at(group_id)->out_tensors().front())->data_c());
|
||||
MS_ASSERT(sub_out_data);
|
||||
float16_t *src_ptr = sub_out_data;
|
||||
float16_t *dst_ptr = reinterpret_cast<float16_t *>(ori_out_data_) + group_id * sub_out_channel;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/op_base.h"
|
||||
#include "src/runtime/kernel/arm/base/group_convolution_base.h"
|
||||
#include "nnacl/fp16/conv_fp16.h"
|
||||
|
@ -29,7 +29,7 @@ class GroupConvolutionFP16CPUKernel : public GroupConvolutionBaseCPUKernel {
|
|||
public:
|
||||
GroupConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
std::vector<kernel::LiteKernel *> group_convs, const int group_num)
|
||||
std::vector<kernel::InnerKernel *> group_convs, const int group_num)
|
||||
: GroupConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, std::move(group_convs), group_num) {
|
||||
} // opParameter(in channel, out channel) in this kernel has been split to groups, if
|
||||
// you want to get real params, multiply in channel / out channel with group num
|
||||
|
|
|
@ -16,15 +16,15 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_GRU_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_GRU_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/gru_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class GruFp16CPUKernel : public LiteKernel {
|
||||
class GruFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
GruFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
gru_param_ = reinterpret_cast<GruParameter *>(op_parameter_);
|
||||
}
|
||||
|
||||
|
|
|
@ -16,18 +16,18 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_INSTANCE_NORM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_INSTANCE_NORM_H_
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/instance_norm_parameter.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class InstanceNormFp16CPUKernel : public LiteKernel {
|
||||
class InstanceNormFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
InstanceNormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
|
||||
}
|
||||
~InstanceNormFp16CPUKernel() override { FreeTmpBuffer(); };
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/softmax_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -18,15 +18,15 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_LSTM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/lstm_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class LstmFp16CPUKernel : public LiteKernel {
|
||||
class LstmFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
LstmFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
lstm_param_ = reinterpret_cast<LstmParameter *>(op_parameter_);
|
||||
}
|
||||
|
||||
|
|
|
@ -21,15 +21,15 @@
|
|||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class MatmulBaseFP16CPUKernel : public LiteKernel {
|
||||
class MatmulBaseFP16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
explicit MatmulBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx) {
|
||||
: InnerKernel(parameter, inputs, outputs, ctx) {
|
||||
params_ = reinterpret_cast<MatMulParameter *>(op_parameter_);
|
||||
}
|
||||
~MatmulBaseFP16CPUKernel() override;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include <cfloat>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/pooling_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -18,16 +18,16 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_POWER_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/fp16/power_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class PowerFp16CPUKernel : public LiteKernel {
|
||||
class PowerFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
PowerFp16CPUKernel(OpParameter *param, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(param, inputs, outputs, ctx),
|
||||
: InnerKernel(param, inputs, outputs, ctx),
|
||||
thread_count_(ctx->thread_num_),
|
||||
scale_(reinterpret_cast<PowerParameter *>(op_parameter_)->scale_),
|
||||
shift_(reinterpret_cast<PowerParameter *>(op_parameter_)->shift_) {}
|
||||
|
|
|
@ -174,10 +174,10 @@ int QuantDTypeCastFp16CPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuQuantDTypeCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::InnerContext *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
kernel::InnerKernel *CpuQuantDTypeCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::InnerContext *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Input opParameter is nullptr!";
|
||||
return nullptr;
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class QuantDTypeCastFp16CPUKernel : public LiteKernel {
|
||||
class QuantDTypeCastFp16CPUKernel : public InnerKernel {
|
||||
public:
|
||||
QuantDTypeCastFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
|
||||
: InnerKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
|
||||
~QuantDTypeCastFp16CPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/reduce_base.h"
|
||||
|
||||
using mindspore::schema::ReduceMode;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SCALE_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/fp32/scale_fp32.h"
|
||||
#include "nnacl/scale.h"
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SLICE_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/slice_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/softmax_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue