forked from mindspore-Ecosystem/mindspore
[MSLITE][DEVELOP] add delegate api for third_party AI framework
This commit is contained in:
parent
3ee381fb67
commit
1f4f62636c
|
@ -38,7 +38,8 @@ int TransOut2InputDimIndex(int out_dim_index, int left_pad, int in_dim, int offs
|
|||
if (out_dim_index < left_pad) {
|
||||
// left pad
|
||||
const int index_sum = left_pad + offset - 1;
|
||||
return MSMAX(index_sum - out_dim_index, offset);
|
||||
int in_index = MSMAX(index_sum - out_dim_index, offset);
|
||||
return MSMIN(in_index, in_dim - 1);
|
||||
}
|
||||
out_dim_index -= left_pad;
|
||||
if (out_dim_index < in_dim) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -66,6 +66,7 @@ struct Context {
|
|||
#else
|
||||
DeviceContextVector device_list_;
|
||||
#endif // NOT_USE_STL
|
||||
DelegatePtr delegate = nullptr;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_DELEGATE_DELEGATE_H_
|
||||
#define MINDSPORE_LITE_DELEGATE_DELEGATE_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "include/ms_tensor.h"
|
||||
#include "include/context.h"
|
||||
#include "include/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
using KernelIter = std::vector<kernel::Kernel *>::iterator;
|
||||
class DelegateModel {
|
||||
public:
|
||||
DelegateModel(std::vector<kernel::Kernel *> *kernels,
|
||||
std::map<kernel::Kernel *, const schema::Primitive *> primitives)
|
||||
: kernels_(kernels), primitives_(primitives) {}
|
||||
|
||||
~DelegateModel() = default;
|
||||
|
||||
const schema::Primitive *GetPrimitive(kernel::Kernel *kernel);
|
||||
|
||||
KernelIter BeginKernelIterator();
|
||||
|
||||
KernelIter EndKernelIterator();
|
||||
|
||||
KernelIter Replace(KernelIter from, KernelIter end, kernel::Kernel *graph_kernel);
|
||||
|
||||
protected:
|
||||
std::vector<kernel::Kernel *> *kernels_;
|
||||
std::map<kernel::Kernel *, const schema::Primitive *> primitives_;
|
||||
};
|
||||
|
||||
class Delegate {
|
||||
public:
|
||||
Delegate() = default;
|
||||
|
||||
virtual ~Delegate() = default;
|
||||
|
||||
virtual int Init() = 0;
|
||||
|
||||
virtual int Build(DelegateModel *model) = 0;
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_DELEGATE_DELEGATE_H_
|
|
@ -19,8 +19,10 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/lite_utils.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "include/context.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class Kernel {
|
||||
|
@ -43,7 +45,7 @@ class Kernel {
|
|||
|
||||
virtual int ReSize() = 0;
|
||||
|
||||
virtual schema::PrimitiveType type() { return type_; }
|
||||
virtual schema::PrimitiveType type() const { return type_; }
|
||||
|
||||
virtual void set_inputs(const std::vector<mindspore::tensor::MSTensor *> &in_tensors) { this->inputs_ = in_tensors; }
|
||||
virtual void set_input(mindspore::tensor::MSTensor *in_tensor, int index) {
|
||||
|
|
|
@ -604,6 +604,8 @@ using TensorPtrVector = Vector<mindspore::schema::Tensor *>;
|
|||
using Uint32Vector = Vector<uint32_t>;
|
||||
class Allocator;
|
||||
using AllocatorPtr = void *;
|
||||
class Delegate;
|
||||
using DelegatePtr = void *;
|
||||
using DeviceContextVector = Vector<lite::DeviceContext>;
|
||||
using KernelCallBack = void (*)(void *, void *);
|
||||
#else
|
||||
|
@ -613,6 +615,9 @@ using KernelCallBack = void (*)(void *, void *);
|
|||
class Allocator;
|
||||
using AllocatorPtr = std::shared_ptr<Allocator>;
|
||||
|
||||
class Delegate;
|
||||
using DelegatePtr = std::shared_ptr<Delegate>;
|
||||
|
||||
using TensorPtrVector = std::vector<mindspore::schema::Tensor *>;
|
||||
using Uint32Vector = std::vector<uint32_t>;
|
||||
template <typename T>
|
||||
|
|
|
@ -52,6 +52,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/dynamic_library_loader.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/delegate/delegate.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/common/quant_utils.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "include/delegate.h"
|
||||
namespace mindspore {
|
||||
const schema::Primitive *DelegateModel::GetPrimitive(kernel::Kernel *kernel) {
|
||||
if (primitives_.find(kernel) != primitives_.end()) {
|
||||
return primitives_[kernel];
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
KernelIter DelegateModel::BeginKernelIterator() { return kernels_->begin(); }
|
||||
|
||||
KernelIter DelegateModel::EndKernelIterator() { return kernels_->end(); }
|
||||
|
||||
KernelIter DelegateModel::Replace(KernelIter from, KernelIter end, kernel::Kernel *graph_kernel) {
|
||||
int insert_index = from - BeginKernelIterator();
|
||||
kernels_->erase(from, end);
|
||||
kernels_->insert(BeginKernelIterator() + insert_index, graph_kernel);
|
||||
return BeginKernelIterator() + insert_index + 1;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -29,8 +29,8 @@ class Executor {
|
|||
virtual ~Executor() = default;
|
||||
|
||||
virtual int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) {
|
||||
ctx_ = static_cast<const lite::InnerContext *>(kernels[0]->Context());
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) {
|
||||
ctx_ = ctx;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ class InnerKernel : public Kernel {
|
|||
return true;
|
||||
}
|
||||
|
||||
schema::PrimitiveType type() override {
|
||||
schema::PrimitiveType type() const override {
|
||||
return (this->op_parameter_ != nullptr) ? schema::PrimitiveType(this->op_parameter_->type_)
|
||||
: schema::PrimitiveType_NONE;
|
||||
}
|
||||
|
|
|
@ -34,9 +34,10 @@
|
|||
#include "include/context.h"
|
||||
#include "include/kernel.h"
|
||||
#include "src/inner_kernel.h"
|
||||
#include "include/delegate.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kDelegate, kKernelArch_MIN = kCPU, kKernelArch_MAX = kDelegate };
|
||||
static const char *const kBuiltin = "Builtin";
|
||||
|
||||
struct KernelKey {
|
||||
|
@ -45,6 +46,7 @@ struct KernelKey {
|
|||
int type;
|
||||
std::string kernel_arch;
|
||||
std::string provider{kBuiltin};
|
||||
std::shared_ptr<Delegate> delegate = nullptr;
|
||||
|
||||
bool operator<(const KernelKey &dst) const {
|
||||
if (provider != dst.provider) {
|
||||
|
@ -125,7 +127,13 @@ class LiteKernel {
|
|||
return kernel_->Prepare();
|
||||
}
|
||||
|
||||
virtual int Init() { return mindspore::lite::RET_OK; }
|
||||
virtual int Init() {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->Init();
|
||||
}
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
virtual int ReSize() {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
|
@ -136,7 +144,10 @@ class LiteKernel {
|
|||
|
||||
OpParameter *op_parameter() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->op_parameter();
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->op_parameter();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::string name() const {
|
||||
|
@ -151,32 +162,49 @@ class LiteKernel {
|
|||
|
||||
virtual int Train() {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->Train();
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->Train();
|
||||
}
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
virtual bool IsTrain() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->IsTrain();
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->IsTrain();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual int Eval() {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->Eval();
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->Eval();
|
||||
}
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
virtual bool IsEval() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->IsEval();
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->IsEval();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void set_trainable(bool trainable = true) {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
std::static_pointer_cast<InnerKernel>(kernel_)->set_trainable(trainable);
|
||||
if (desc_.provider == kBuiltin) {
|
||||
std::static_pointer_cast<InnerKernel>(kernel_)->set_trainable(trainable);
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool is_trainable() const {
|
||||
MS_ASSERT(kernel_ != nullptr);
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->is_trainable();
|
||||
if (desc_.provider == kBuiltin) {
|
||||
return std::static_pointer_cast<InnerKernel>(kernel_)->is_trainable();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void set_is_model_output(bool is_model_output) { this->is_model_output_ = is_model_output; }
|
||||
|
@ -330,13 +358,6 @@ kernel::InnerKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_;
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -91,11 +91,15 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
|
|||
|
||||
int ref_count = 0;
|
||||
/* set op input for calculate */
|
||||
for (auto in_node : reinterpret_cast<kernel::SubGraphKernel *>(kernel_)->in_nodes()) {
|
||||
for (size_t node_in_index = 0; node_in_index < in_node->in_tensors().size(); node_in_index++) {
|
||||
if (old_tensor == in_node->in_tensors()[node_in_index]) {
|
||||
in_node->set_in_tensor(new_tensor, node_in_index);
|
||||
ref_count++;
|
||||
if (kernel_->desc().delegate != nullptr) {
|
||||
ref_count++;
|
||||
} else {
|
||||
for (auto in_node : reinterpret_cast<kernel::SubGraphKernel *>(kernel_)->in_nodes()) {
|
||||
for (size_t node_in_index = 0; node_in_index < in_node->in_tensors().size(); node_in_index++) {
|
||||
if (old_tensor == in_node->in_tensors()[node_in_index]) {
|
||||
in_node->set_in_tensor(new_tensor, node_in_index);
|
||||
ref_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -162,6 +166,10 @@ int LiteOpActor::CompileArrowThroughOutputKernels() {
|
|||
}
|
||||
|
||||
int LiteOpActor::CompileArrowThroughPartialCall() {
|
||||
if (kernel_->desc().delegate != nullptr) {
|
||||
MS_LOG(INFO) << "kernel is delegate subgraph kernel.";
|
||||
return RET_OK;
|
||||
}
|
||||
auto *subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
|
||||
if (subgraph_kernel == nullptr) {
|
||||
MS_LOG(INFO) << "kernel is not subgraph kernel, no partial call.";
|
||||
|
@ -330,10 +338,11 @@ int LiteOpActor::PrepareOutputData() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels,
|
||||
const lite::InnerContext *ctx) {
|
||||
std::vector<std::shared_ptr<LiteOpActor>> actors;
|
||||
std::unordered_map<size_t, AID> partial_map{};
|
||||
auto thread_pool = kernels[0]->Context()->thread_pool_;
|
||||
auto thread_pool = ctx->thread_pool_;
|
||||
if (thread_pool == nullptr) {
|
||||
MS_LOG(ERROR) << "thread pool is nullptr";
|
||||
return actors;
|
||||
|
|
|
@ -185,6 +185,7 @@ class LiteSwitchOpActor : public LiteOpActor {
|
|||
int MindrtInit(bool subgraph_split = false);
|
||||
void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &);
|
||||
|
||||
std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels,
|
||||
const lite::InnerContext *ctx);
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_LITE_MINDRT_H_
|
||||
|
|
|
@ -383,6 +383,21 @@ void LiteSession::IsolateOutputTensor() {
|
|||
|
||||
/* set new tensor for calculate */
|
||||
for (auto subgraph : kernels_) {
|
||||
/* subgraph input and output */
|
||||
for (size_t i = 0; i < subgraph->in_tensors().size(); i++) {
|
||||
if (subgraph->in_tensors()[i] == src_tensor) {
|
||||
subgraph->set_in_tensor(new_tensor, i);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < subgraph->out_tensors().size(); i++) {
|
||||
if (subgraph->out_tensors()[i] == src_tensor) {
|
||||
subgraph->set_out_tensor(new_tensor, i);
|
||||
}
|
||||
}
|
||||
|
||||
if (subgraph->desc().delegate != nullptr) {
|
||||
continue;
|
||||
}
|
||||
/* node input and output */
|
||||
auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
|
||||
for (size_t i = 0; i < nodes.size(); i++) {
|
||||
|
@ -398,18 +413,6 @@ void LiteSession::IsolateOutputTensor() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* subgraph input and output */
|
||||
for (size_t i = 0; i < subgraph->in_tensors().size(); i++) {
|
||||
if (subgraph->in_tensors()[i] == src_tensor) {
|
||||
subgraph->set_in_tensor(new_tensor, i);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < subgraph->out_tensors().size(); i++) {
|
||||
if (subgraph->out_tensors()[i] == src_tensor) {
|
||||
subgraph->set_out_tensor(new_tensor, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -450,6 +453,9 @@ bool LiteSession::IfUseMindrtExecutor() {
|
|||
#endif
|
||||
|
||||
for (auto kernel : kernels_) {
|
||||
if (kernel->desc().delegate != nullptr) {
|
||||
continue;
|
||||
}
|
||||
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
|
||||
if (sub_graph->nodes()[0]->type() == schema::PrimitiveType_Merge) {
|
||||
use_mindrt_run = false; /* control-flow model */
|
||||
|
@ -489,9 +495,9 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
}
|
||||
// scheduler kernels
|
||||
#if SUPPORT_NPU
|
||||
Scheduler scheduler(context_, model, &tensors_, is_train_session_, npu_manager_, npu_pass_manager_);
|
||||
Scheduler scheduler(context_, model, &tensors_, is_train_session_, npu_manager_, npu_pass_manager_, delegate_);
|
||||
#else
|
||||
Scheduler scheduler(context_, model, &tensors_, is_train_session_);
|
||||
Scheduler scheduler(context_, model, &tensors_, is_train_session_, delegate_);
|
||||
#endif
|
||||
ret = scheduler.Schedule(&kernels_);
|
||||
if (ret != RET_OK) {
|
||||
|
@ -529,14 +535,13 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
#ifdef ENABLE_MINDRT
|
||||
}
|
||||
#endif
|
||||
|
||||
if (nullptr == executor_) {
|
||||
if (executor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New Executor failed";
|
||||
is_running_.store(false);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = executor_->Prepare(this->kernels_, this->inputs_, this->outputs_);
|
||||
ret = executor_->Prepare(this->kernels_, this->inputs_, this->outputs_, context_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare executor failed: " << ret;
|
||||
is_running_.store(false);
|
||||
|
@ -555,10 +560,14 @@ int LiteSession::PrepareKernels(Model *model, bool use_mindrt_run) {
|
|||
// find in_kernels and out_kernels for subgraphs
|
||||
for (auto kernel : this->kernels_) {
|
||||
kernel->FindInoutKernels(this->kernels_);
|
||||
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
|
||||
MS_ASSERT(sub_graph != nullptr);
|
||||
auto kernel_in_subgraph = sub_graph->nodes();
|
||||
all_kernels.insert(all_kernels.end(), kernel_in_subgraph.begin(), kernel_in_subgraph.end());
|
||||
if (kernel->desc().delegate != nullptr) {
|
||||
all_kernels.push_back(kernel);
|
||||
} else {
|
||||
auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
|
||||
MS_ASSERT(sub_graph != nullptr);
|
||||
auto kernel_in_subgraph = sub_graph->nodes();
|
||||
all_kernels.insert(all_kernels.end(), kernel_in_subgraph.begin(), kernel_in_subgraph.end());
|
||||
}
|
||||
}
|
||||
|
||||
if (!use_mindrt_run) {
|
||||
|
@ -646,6 +655,16 @@ int LiteSession::Init(const Context *context) {
|
|||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
if (context->delegate != nullptr) {
|
||||
delegate_ = context->delegate;
|
||||
}
|
||||
if (delegate_ != nullptr) {
|
||||
auto delegate_ret = delegate_->Init();
|
||||
if (delegate_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Delegate init failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
ret = KernelRegistry::GetInstance()->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "KernelRegistry Init Failed.";
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "src/executor.h"
|
||||
#include "src/tensor.h"
|
||||
#include "src/tensorlist.h"
|
||||
#include "include/delegate.h"
|
||||
#if SUPPORT_NPU
|
||||
#include "src/runtime/agent/npu/npu_manager.h"
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
|
||||
|
@ -150,6 +151,7 @@ class LiteSession : public session::LiteSession {
|
|||
#elif GPU_VULKAN
|
||||
gpu::GpuRuntimeWrapper<vulkan::VulkanRuntime> *vk_runtime_wrap_{nullptr};
|
||||
#endif
|
||||
std::shared_ptr<Delegate> delegate_ = nullptr;
|
||||
};
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -76,15 +76,15 @@ int MindrtExecutor::Resize(const std::vector<mindspore::tensor::MSTensor *> &inp
|
|||
}
|
||||
|
||||
int MindrtExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) {
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) {
|
||||
MS_ASSERT(kernels.size() != 0);
|
||||
|
||||
auto ret = MindrtInit(kernels[0]->Context()->enable_parallel_);
|
||||
auto ret = MindrtInit(ctx->enable_parallel_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MindrtInit failed";
|
||||
return ret;
|
||||
}
|
||||
op_actors_ = CreateOpActor(kernels);
|
||||
op_actors_ = CreateOpActor(kernels, ctx);
|
||||
if (op_actors_.size() != kernels.size()) {
|
||||
MS_LOG(ERROR) << "CreateOpActor failed";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -32,15 +32,15 @@ class MindrtExecutor : public Executor {
|
|||
explicit MindrtExecutor(std::unordered_map<Tensor *, Tensor *> *output_map) : output_tensor_map_(output_map) {}
|
||||
virtual ~MindrtExecutor() { MindrtTerminate(op_actors_); }
|
||||
|
||||
virtual int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs);
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) override;
|
||||
|
||||
virtual int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator = nullptr,
|
||||
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
|
||||
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator = nullptr,
|
||||
const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override;
|
||||
|
||||
virtual int Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs,
|
||||
const std::vector<std::vector<int>> &dims);
|
||||
int Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs,
|
||||
const std::vector<std::vector<int>> &dims) override;
|
||||
|
||||
private:
|
||||
void TransferGraphOutput();
|
||||
|
|
|
@ -33,7 +33,7 @@ NPUExecutor::~NPUExecutor() {
|
|||
}
|
||||
|
||||
int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) {
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) {
|
||||
MS_ASSERT(npu_manager_ != nullptr);
|
||||
this->client_ = npu_manager_->GetClient(model_name_);
|
||||
if (this->client_ == nullptr) {
|
||||
|
|
|
@ -34,7 +34,7 @@ class NPUExecutor : public Executor {
|
|||
: model_name_(model_name), npu_manager_(npu_manager) {}
|
||||
~NPUExecutor() override;
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) override;
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) override;
|
||||
|
||||
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &in_kernels, const std::vector<kernel::LiteKernel *> &kernels,
|
||||
|
|
|
@ -219,7 +219,7 @@ int SubGraphNpuKernel::Init() {
|
|||
}
|
||||
|
||||
int SubGraphNpuKernel::Prepare() {
|
||||
if (executor_->Prepare(nodes_, in_tensors(), out_tensors()) != RET_OK) {
|
||||
if (executor_->Prepare(nodes_, in_tensors(), out_tensors(), nullptr) != RET_OK) {
|
||||
MS_LOG(ERROR) << "NPU executor prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ class OpenCLExecutor : public Executor {
|
|||
~OpenCLExecutor() override = default;
|
||||
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) override {
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) override {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -109,5 +109,34 @@ int ActivationFp16CPUKernel::Run() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Activation, LiteKernelCreator<ActivationFp16CPUKernel>)
|
||||
/* creator func */
|
||||
kernel::InnerKernel *CpuActivationFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Activation);
|
||||
|
||||
auto act_param = reinterpret_cast<ActivationParameter *>(opParameter);
|
||||
auto type = act_param->type_;
|
||||
if (type != schema::ActivationType_RELU && type != schema::ActivationType_RELU6 &&
|
||||
type != schema::ActivationType_LEAKY_RELU && type != schema::ActivationType_SIGMOID &&
|
||||
type != schema::ActivationType_TANH && type != schema::ActivationType_HSWISH &&
|
||||
type != schema::ActivationType_SWISH && type != schema::ActivationType_HARD_TANH) {
|
||||
MS_LOG(ERROR) << "Activation fp16 not support type: " << type;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ActivationFp16CPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(DEBUG) << "Create activation fp16 kernel failed.";
|
||||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Activation, CpuActivationFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -213,14 +213,6 @@ kernel::InnerKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *>
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(INFO) << "Init fp16 kernel failed, name: " << opParameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -255,13 +255,6 @@ kernel::InnerKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *
|
|||
free(op_parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -175,26 +175,54 @@ int QuantDTypeCastFp16CPUKernel::Run() {
|
|||
|
||||
kernel::InnerKernel *CpuQuantDTypeCastFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::InnerContext *ctx,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
|
||||
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Input opParameter is nullptr!";
|
||||
MS_LOG(ERROR) << "opParameter is nullptr!";
|
||||
return nullptr;
|
||||
}
|
||||
auto *kernel = new (std::nothrow) QuantDTypeCastFp16CPUKernel(opParameter, inputs, outputs, ctx);
|
||||
auto in_tensor = inputs.front();
|
||||
auto out_tensor = outputs.front();
|
||||
auto param = reinterpret_cast<QuantDTypeCastParameter *>(opParameter);
|
||||
if (param->dstT == kNumberTypeInt8) {
|
||||
if (in_tensor->data_type() != kNumberTypeFloat16 || out_tensor->data_type() != kNumberTypeInt8) {
|
||||
MS_LOG(ERROR) << "param data type and tensor data type do not match.";
|
||||
return nullptr;
|
||||
}
|
||||
} else if (param->srcT == kNumberTypeInt8) {
|
||||
if (in_tensor->data_type() != kNumberTypeInt8 || out_tensor->data_type() != kNumberTypeFloat16) {
|
||||
MS_LOG(ERROR) << "param data type and tensor data type do not match.";
|
||||
return nullptr;
|
||||
}
|
||||
} else if (param->dstT == kNumberTypeUInt8) {
|
||||
if (in_tensor->data_type() != kNumberTypeFloat16 || out_tensor->data_type() != kNumberTypeUInt8) {
|
||||
MS_LOG(ERROR) << "param data type and tensor data type do not match.";
|
||||
return nullptr;
|
||||
}
|
||||
} else if (param->srcT == kNumberTypeUInt8) {
|
||||
if (in_tensor->data_type() != kNumberTypeUInt8 || out_tensor->data_type() != kNumberTypeFloat16) {
|
||||
MS_LOG(ERROR) << "param data type and tensor data type do not match.";
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "param data type not supported:"
|
||||
<< " src: " << param->srcT << " dst: " << param->dstT;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
kernel = new (std::nothrow)
|
||||
QuantDTypeCastFp16CPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new QuantDTypeCastFp16CPUKernel fail!";
|
||||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed! name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_QuantDTypeCast, LiteKernelCreator<QuantDTypeCastFp16CPUKernel>)
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_QuantDTypeCast, CpuQuantDTypeCastFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -142,5 +142,29 @@ int ReduceFp16CPUKernel::MallocTmpBuffer() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ReduceFusion, LiteKernelCreator<ReduceFp16CPUKernel>)
|
||||
/* creator func */
|
||||
kernel::InnerKernel *CpuReduceFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_ReduceFusion);
|
||||
|
||||
auto reduce_param = reinterpret_cast<ReduceParameter *>(opParameter);
|
||||
if (reduce_param->mode_ != ReduceMode_ReduceMean && reduce_param->mode_ != ReduceMode_ReduceMax) {
|
||||
MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << reduce_param->mode_;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
kernel::InnerKernel *kernel = nullptr;
|
||||
kernel = new (std::nothrow)
|
||||
kernel::ReduceFp16CPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(DEBUG) << "Create reduce fp16 kernel failed.";
|
||||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_ReduceFusion, CpuReduceFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -282,14 +282,6 @@ kernel::InnerKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
free(op_parameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK && ret != RET_INFER_INVALID) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -269,13 +269,6 @@ kernel::InnerKernel *CpuDeConvFp32KernelCreator(const std::vector<lite::Tensor *
|
|||
free(op_parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -153,14 +153,6 @@ kernel::InnerKernel *CpuAdamFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -129,14 +129,6 @@ kernel::InnerKernel *CpuApplyMomentumFp32KernelCreator(const std::vector<lite::T
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -249,14 +249,6 @@ kernel::InnerKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -105,14 +105,6 @@ kernel::InnerKernel *CpuArithmeticSelfGradFp32KernelCreator(const std::vector<li
|
|||
free(param);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,14 +79,6 @@ kernel::InnerKernel *CpuAssignFp32KernelCreator(const std::vector<lite::Tensor *
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,14 +102,6 @@ kernel::InnerKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::Tensor
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -173,13 +173,6 @@ kernel::InnerKernel *CpuBNGradFp32KernelCreator(const std::vector<lite::Tensor *
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -189,14 +189,6 @@ kernel::InnerKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tenso
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -218,14 +218,6 @@ kernel::InnerKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -144,14 +144,6 @@ kernel::InnerKernel *CpuDeConvGradFilterFp32KernelCreator(const std::vector<lite
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -125,13 +125,6 @@ kernel::InnerKernel *CpuDropoutFp32KernelCreator(const std::vector<lite::Tensor
|
|||
MS_LOG(ERROR) << "Dropout new kernel failed.";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -107,13 +107,6 @@ kernel::InnerKernel *CpuDropoutGradFp32KernelCreator(const std::vector<lite::Ten
|
|||
MS_LOG(ERROR) << "DropoutGrad new kernel failed.";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -79,14 +79,6 @@ kernel::InnerKernel *CpuNegGradFp32KernelCreator(const std::vector<lite::Tensor
|
|||
free(param);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -120,14 +120,6 @@ kernel::InnerKernel *CpuPoolingGradFp32KernelCreator(const std::vector<lite::Ten
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -96,14 +96,6 @@ kernel::InnerKernel *CpuPowerGradFp32KernelCreator(const std::vector<lite::Tenso
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -209,15 +209,6 @@ kernel::InnerKernel *CpuSgdFp32KernelCreator(const std::vector<lite::Tensor *> &
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -83,13 +83,6 @@ kernel::InnerKernel *CpuSigmoidCrossEntropyWithLogitsFp32KernelCreator(const std
|
|||
MS_LOG(ERROR) << "new SigmoidCrossEntropyWithLogits failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -83,13 +83,6 @@ kernel::InnerKernel *CpuSigmoidCrossEntropyWithLogitsGradFp32KernelCreator(const
|
|||
MS_LOG(ERROR) << "new SigmoidCrossEntropyWithLogitsGradWithLogitsCPUKernel failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -94,13 +94,6 @@ kernel::InnerKernel *CpuSmoothL1LossFp32KernelCreator(const std::vector<lite::Te
|
|||
MS_LOG(ERROR) << "new SmoothL1Loss failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -91,13 +91,6 @@ kernel::InnerKernel *CpuSmoothL1LossGradFp32KernelCreator(const std::vector<lite
|
|||
MS_LOG(ERROR) << "new SmoothL1LossGradWithLogitsCPUKernel failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -138,13 +138,6 @@ kernel::InnerKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector<l
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -100,13 +100,6 @@ kernel::InnerKernel *CpuSoftmaxGradFp32KernelCreator(const std::vector<lite::Ten
|
|||
free(opParameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -73,13 +73,6 @@ kernel::InnerKernel *CpuActivationInt8KernelCreator(const std::vector<lite::Tens
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -181,13 +181,6 @@ kernel::InnerKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::Tens
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -129,13 +129,6 @@ kernel::InnerKernel *CpuConvInt8KernelCreator(const std::vector<lite::Tensor *>
|
|||
free(op_parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -303,13 +303,6 @@ kernel::InnerKernel *CpuDeConvInt8KernelCreator(const std::vector<lite::Tensor *
|
|||
free(op_parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -81,13 +81,6 @@ kernel::InnerKernel *CpuExtractFeatureKernelCreator(const std::vector<lite::Tens
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,13 +80,6 @@ kernel::InnerKernel *CpuHashtableLookupKernelCreator(const std::vector<lite::Ten
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -148,13 +148,6 @@ kernel::InnerKernel *CpuNormalizeKernelCreator(const std::vector<lite::Tensor *>
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -109,13 +109,6 @@ kernel::InnerKernel *CpuPredictKernelCreator(const std::vector<lite::Tensor *> &
|
|||
free(parameter);
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_
|
||||
<< ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,8 @@
|
|||
namespace mindspore::lite {
|
||||
ParallelExecutor::~ParallelExecutor() { delete thread_pool_; }
|
||||
int ParallelExecutor::Prepare(const std::vector<mindspore::kernel::LiteKernel *> &kernels,
|
||||
const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
|
||||
const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs,
|
||||
const lite::InnerContext *ctx) {
|
||||
thread_pool_ = InterThreadPool::CreateThreadPool(1, max_thread_num_);
|
||||
if (thread_pool_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Memory error: fail to new ThreadPool";
|
||||
|
@ -39,7 +40,6 @@ static int RunKernel(void *data, int index, float lhs_scale, float rhs_scale) {
|
|||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ class ParallelExecutor : public Executor {
|
|||
~ParallelExecutor() override;
|
||||
|
||||
int Prepare(const std::vector<kernel::LiteKernel *> &kernels, const std::vector<Tensor *> &inputs,
|
||||
const std::vector<Tensor *> &outputs) override;
|
||||
const std::vector<Tensor *> &outputs, const lite::InnerContext *ctx) override;
|
||||
|
||||
int Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, mindspore::Allocator *allocator = nullptr,
|
||||
|
|
|
@ -103,13 +103,27 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
MS_LOG(ERROR) << "Schedule main subgraph to kernels failed.";
|
||||
return ret;
|
||||
}
|
||||
if (delegate_ != nullptr) {
|
||||
ret = ReplaceDelegateKernels(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Repalce delegate kernels failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
FindAllInoutKernels(*dst_kernels);
|
||||
// origin kernel init
|
||||
for (size_t i = 0; i < dst_kernels->size(); i++) {
|
||||
ret = (*dst_kernels)[i]->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Kernel " << (*dst_kernels)[i]->name() << " Init failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
ret = RunPass(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule run pass failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto src_kernel = *dst_kernels;
|
||||
dst_kernels->clear();
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
|
||||
|
@ -122,6 +136,61 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int Scheduler::ReplaceDelegateKernels(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
std::vector<kernel::Kernel *> kernels;
|
||||
for (size_t i = 0; i < dst_kernels->size(); i++) {
|
||||
kernels.push_back((*dst_kernels)[i]->kernel());
|
||||
}
|
||||
DelegateModel *model = new (std::nothrow) DelegateModel(&kernels, primitives_);
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "New delegate model failed.";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
auto ret = delegate_->Build(model);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Delegate prepare kernels failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto src_kernels = *dst_kernels;
|
||||
dst_kernels->clear();
|
||||
std::map<const kernel::LiteKernel *, bool> delegate_support;
|
||||
for (auto kernel : src_kernels) {
|
||||
delegate_support[kernel] = true;
|
||||
}
|
||||
for (auto kernel : kernels) {
|
||||
size_t index = 0;
|
||||
for (; index < src_kernels.size(); index++) {
|
||||
if (kernel == src_kernels[index]->kernel()) {
|
||||
// Kernels that the delegate does not support keep the original backend
|
||||
dst_kernels->push_back(src_kernels[index]);
|
||||
delegate_support[src_kernels[index]] = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (index == src_kernels.size()) {
|
||||
// New liteKernel to save delegate subgraph
|
||||
std::shared_ptr<kernel::Kernel> shared_kernel(kernel);
|
||||
auto lite_kernel = new (std::nothrow) kernel::LiteKernel(shared_kernel);
|
||||
if (lite_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "New LiteKernel for delegate subgraph failed.";
|
||||
return RET_NULL_PTR;
|
||||
}
|
||||
kernel::KernelKey delegate_desc{
|
||||
kernel::kDelegate, kernel->inputs()[0]->data_type(), schema::PrimitiveType_NONE, "", "", delegate_};
|
||||
lite_kernel->set_desc(delegate_desc);
|
||||
dst_kernels->push_back(lite_kernel);
|
||||
}
|
||||
}
|
||||
// Release the cpu kernel that has been replace by delegate subgraph
|
||||
for (auto kernel : src_kernels) {
|
||||
if (delegate_support[kernel] == true) {
|
||||
delete kernel;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void Scheduler::FindNodeInoutTensors(const lite::Model::Node &node, std::vector<Tensor *> *inputs,
|
||||
std::vector<Tensor *> *outputs) {
|
||||
MS_ASSERT(inputs != nullptr);
|
||||
|
@ -393,7 +462,7 @@ int Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std:
|
|||
RestoreTensorData(&restored_origin_tensors);
|
||||
}
|
||||
return ret;
|
||||
} // namespace mindspore::lite
|
||||
}
|
||||
|
||||
int Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
OpParameter *op_parameter, const kernel::KernelKey &desc, kernel::LiteKernel **kernel) {
|
||||
|
@ -610,6 +679,13 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
|
|||
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
|
||||
return nullptr;
|
||||
}
|
||||
for (auto kernel : sub_kernels) {
|
||||
ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule partial kernel init failed, name: " << kernel->name();
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
FindAllInoutKernels(sub_kernels);
|
||||
ret = RunPass(&sub_kernels);
|
||||
|
@ -667,6 +743,7 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
|
|||
}
|
||||
kernel->set_is_model_output(IsContain(graph_output_node_indexes_, size_t(node_index)));
|
||||
dst_kernels->emplace_back(kernel);
|
||||
primitives_.emplace(kernel->kernel(), static_cast<const schema::Primitive *>(primitive));
|
||||
}
|
||||
if (in_tensors != nullptr) {
|
||||
std::transform(subgraph->input_indices_.begin(), subgraph->input_indices_.end(), std::back_inserter(*in_tensors),
|
||||
|
@ -791,25 +868,34 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
|
|||
|
||||
head_kernels.push_back(head_kernel);
|
||||
|
||||
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernels[0]);
|
||||
auto sub_kernels = FindAllSubGraphKernels(head_kernels, is_kernel_finish);
|
||||
auto subgraph = CreateSubGraphKernel(sub_kernels, nullptr, nullptr, cur_sub_graph_type);
|
||||
if (subgraph == nullptr) {
|
||||
MS_LOG(ERROR) << "Create SubGraphKernel failed";
|
||||
return RET_ERROR;
|
||||
auto subgraph_delegate = head_kernel->desc().delegate;
|
||||
if (subgraph_delegate != nullptr) {
|
||||
dst_kernel->emplace_back(head_kernel);
|
||||
(*is_kernel_finish)[head_kernel] = true;
|
||||
} else {
|
||||
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernels[0]);
|
||||
auto sub_kernels = FindAllSubGraphKernels(head_kernels, is_kernel_finish);
|
||||
auto subgraph = CreateSubGraphKernel(sub_kernels, nullptr, nullptr, cur_sub_graph_type);
|
||||
if (subgraph == nullptr) {
|
||||
MS_LOG(ERROR) << "Create SubGraphKernel failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
dst_kernel->emplace_back(subgraph);
|
||||
}
|
||||
dst_kernel->emplace_back(subgraph);
|
||||
} /* end when all kernel converted */
|
||||
|
||||
for (auto *subgraph : *dst_kernel) {
|
||||
auto ret = subgraph->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init SubGraph failed: " << ret;
|
||||
return ret;
|
||||
auto subgraph_delegate = subgraph->desc().delegate;
|
||||
if (subgraph_delegate == nullptr) {
|
||||
auto ret = subgraph->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init SubGraph failed: " << ret;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
} // namespace mindspore::lite
|
||||
}
|
||||
|
||||
bool Scheduler::MergeOpIsReady(const kernel::LiteKernel *kernel,
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include "src/sub_graph_kernel.h"
|
||||
#include "src/inner_context.h"
|
||||
|
@ -26,21 +27,29 @@
|
|||
#if SUPPORT_NPU
|
||||
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
|
||||
#endif
|
||||
#include "include/delegate.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class Scheduler {
|
||||
public:
|
||||
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors, bool is_train_session)
|
||||
: context_(ctx), src_model_(src_model), src_tensors_(src_tensors), is_train_session_(is_train_session) {}
|
||||
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors, bool is_train_session,
|
||||
std::shared_ptr<Delegate> delegate = nullptr)
|
||||
: context_(ctx),
|
||||
src_model_(src_model),
|
||||
src_tensors_(src_tensors),
|
||||
is_train_session_(is_train_session),
|
||||
delegate_(delegate) {}
|
||||
#if SUPPORT_NPU
|
||||
Scheduler(const InnerContext *ctx, Model *src_model, std::vector<Tensor *> *src_tensors, bool is_train_session,
|
||||
NPUManager *npu_manager = nullptr, NPUPassManager *npu_pass_manager = nullptr)
|
||||
NPUManager *npu_manager = nullptr, NPUPassManager *npu_pass_manager = nullptr,
|
||||
std::shared_ptr<Delegate> delegate = nullptr)
|
||||
: context_(ctx),
|
||||
src_model_(src_model),
|
||||
src_tensors_(src_tensors),
|
||||
npu_manager_(npu_manager),
|
||||
npu_pass_manager_(npu_pass_manager),
|
||||
is_train_session_(is_train_session) {}
|
||||
is_train_session_(is_train_session),
|
||||
delegate_(delegate) {}
|
||||
#endif
|
||||
~Scheduler() = default;
|
||||
|
||||
|
@ -71,6 +80,9 @@ class Scheduler {
|
|||
|
||||
int FindProviderKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, TypeId data_type, kernel::LiteKernel **kernel);
|
||||
|
||||
int ReplaceDelegateKernels(std::vector<kernel::LiteKernel *> *dst_kernels);
|
||||
|
||||
// schedule a partial node to a subgraph_kernel
|
||||
kernel::LiteKernel *SchedulePartialToKernel(const lite::Model::Node *src_node);
|
||||
// schedule a node to a kernel
|
||||
|
@ -120,6 +132,8 @@ class Scheduler {
|
|||
std::vector<size_t> graph_output_node_indexes_;
|
||||
std::map<int, OpParameter *> op_parameters_;
|
||||
bool is_train_session_ = false;
|
||||
std::map<kernel::Kernel *, const schema::Primitive *> primitives_;
|
||||
std::shared_ptr<Delegate> delegate_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
||||
|
|
|
@ -78,7 +78,9 @@ TEST_F(TestStridedSlice, StridedSlice) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
float expect[2] = {0.2390374, 0.05051243};
|
||||
ASSERT_NEAR(output_data[0], expect[0], 0.001);
|
||||
|
@ -120,7 +122,9 @@ TEST_F(TestStridedSlice, 7d) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
float expect[2] = {0.92039955, 0.49574447};
|
||||
ASSERT_NEAR(output_data[0], expect[0], 0.001);
|
||||
|
@ -163,7 +167,9 @@ TEST_F(TestStridedSlice, 8d) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
int8_t expect[4] = {-9, -7};
|
||||
for (unsigned int i = 0; i < sizeof(expect); ++i) {
|
||||
|
@ -206,7 +212,9 @@ TEST_F(TestStridedSlice, FastRun7d) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
float expect[4] = {0.92039955, 0.49574447, 0.02647042, 0.4566604};
|
||||
ASSERT_NEAR(output_data[0], expect[0], 0.001);
|
||||
|
@ -251,7 +259,9 @@ TEST_F(TestStridedSlice, FastRun7dSingleThread) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
float expect[4] = {0.92039955, 0.49574447, 0.02647042, 0.4566604};
|
||||
ASSERT_NEAR(output_data[0], expect[0], 0.001);
|
||||
|
@ -295,7 +305,9 @@ TEST_F(TestStridedSlice, StridedSliceInt8) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
int8_t expect[4] = {-6, -5, 7, 8};
|
||||
for (unsigned int i = 0; i < sizeof(expect); ++i) {
|
||||
|
|
|
@ -76,6 +76,8 @@ void TestReduceFp16::Prepare(const std::vector<int> &input_shape, const std::vec
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
TEST_F(TestReduceFp16, Mean) {
|
||||
float in[96] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
|
||||
|
|
|
@ -128,7 +128,10 @@ TEST_F(TestActivationFp32, HSwishFp32) {
|
|||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> expect_output = {-0, -0.33333334, -0.33333334, 0, 0.6666667, 5, 6, 7};
|
||||
ASSERT_EQ(0, CompareOutputData(output.data(), expect_output.data(), 8, 0.00001));
|
||||
|
@ -171,7 +174,10 @@ TEST_F(TestActivationFp32, HardTanh1) {
|
|||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> expect_output = {-1.0, -1.0, -0.5, 0.0, 0.5, 1.0, 1.0, 1.0};
|
||||
ASSERT_EQ(0, CompareOutputData(output.data(), expect_output.data(), 8, 0.00001));
|
||||
|
@ -214,7 +220,10 @@ TEST_F(TestActivationFp32, HardTanh2) {
|
|||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> expect_output = {-2.0, -2.0, -1.0, 0.0, 1.0, 2.0, 2.0, 2.0};
|
||||
ASSERT_EQ(0, CompareOutputData(output.data(), expect_output.data(), 8, 0.00001));
|
||||
|
@ -255,8 +264,10 @@ TEST_F(TestActivationFp32, Softplus) {
|
|||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
auto ret = kernel->Run();
|
||||
ASSERT_EQ(0, ret);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
std::vector<float> expect_output = {1.3132616, 2.1269281, 3.0485871, 4.0181499, 5.0067153,
|
||||
0.31326169, 6.0024757, 7.0009117, 0.0000453989, 0.0000000002,
|
||||
20.00000000, 30.00000000, 14.0000000, 0.69314718};
|
||||
|
|
|
@ -61,7 +61,10 @@ TEST_F(TestBatchnormFp32, BNTest) {
|
|||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
|
||||
|
@ -116,7 +119,10 @@ TEST_F(TestBatchnormFp32, FusedBNTest) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx.Init());
|
||||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < output0.ElementsNum(); i++) {
|
||||
|
@ -166,7 +172,10 @@ TEST_F(TestBatchnormFp32, easyTest) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx.Init());
|
||||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < output0.ElementsNum(); i++) {
|
||||
|
|
|
@ -119,7 +119,10 @@ TEST_F(TestConvolutionDwFp32, ConvDwFp32Accuracy) {
|
|||
auto *kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
// op run
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::cout << "==================output data=================" << std::endl;
|
||||
auto output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
|
@ -169,6 +172,8 @@ TEST_F(TestConvolutionDwFp32, ConvDwFp32Performance) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto *kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
/* running warm up */
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
|
|
@ -54,7 +54,9 @@ TEST_F(TestCumsum, TestThread1) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_NEAR(1.0f, output_data0[0], 0.000001);
|
||||
EXPECT_NEAR(1.0f, output_data0[1], 0.000001);
|
||||
|
@ -106,7 +108,9 @@ TEST_F(TestCumsum, TestExclusive) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_NEAR(0.0f, output_data0[0], 0.000001);
|
||||
EXPECT_NEAR(0.0f, output_data0[1], 0.000001);
|
||||
|
@ -158,7 +162,9 @@ TEST_F(TestCumsum, TestReverse) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_NEAR(6.0f, output_data0[0], 0.000001);
|
||||
EXPECT_NEAR(6.0f, output_data0[1], 0.000001);
|
||||
|
@ -210,7 +216,9 @@ TEST_F(TestCumsum, TestReverseExclusive) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_NEAR(5.0f, output_data0[0], 0.000001);
|
||||
EXPECT_NEAR(5.0f, output_data0[1], 0.000001);
|
||||
|
@ -263,7 +271,9 @@ TEST_F(TestCumsum, TestIntRank2) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1, output_data0[0]);
|
||||
EXPECT_EQ(3, output_data0[1]);
|
||||
|
@ -309,7 +319,9 @@ TEST_F(TestCumsum, TestIntRank2Thread2) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1, output_data0[0]);
|
||||
EXPECT_EQ(3, output_data0[1]);
|
||||
|
@ -356,7 +368,9 @@ TEST_F(TestCumsum, TestIntRank2Thread4) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(parameter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_EQ(1, output_data0[0]);
|
||||
EXPECT_EQ(3, output_data0[1]);
|
||||
|
|
|
@ -71,6 +71,8 @@ void TestL2NormFp32::Init(const std::vector<int> &input_shape, const std::vector
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
// 2thread all axis no_activation
|
||||
|
|
|
@ -66,7 +66,9 @@ TEST_F(TestLshProjectionFp32, Dense1DInputs) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int32_t> except_result = {0, 0, 0, 1, 0, 0};
|
||||
|
@ -106,7 +108,9 @@ TEST_F(TestLshProjectionFp32, Sparse1DInputs) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int32_t> except_result = {0, 5, 8};
|
||||
|
@ -150,7 +154,9 @@ TEST_F(TestLshProjectionFp32, Sparse3DInputs) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int32_t> except_result = {2, 5, 9};
|
||||
|
|
|
@ -156,7 +156,10 @@ TEST_F(LstmFp32, LstmForwardFp32Accuracy) {
|
|||
auto *kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(lstm_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
// op run
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::cout << "==================output data=================" << std::endl;
|
||||
std::vector<float> output0_data = {-0.0702, 0.1225, 0.0876, -0.0357, -0.0227, -0.2294,
|
||||
|
@ -304,7 +307,10 @@ TEST_F(LstmFp32, LstmBackwardFp32Accuracy) {
|
|||
auto *kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(lstm_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
// op run
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::cout << "==================output data=================" << std::endl;
|
||||
std::vector<float> output0_data = {-0.2922, -0.1416, 0.0077, -0.0422, -0.0585, 0.2061, -0.2385, -0.0146,
|
||||
|
|
|
@ -88,6 +88,8 @@ void TestNMSFp32::Init(const std::vector<int> &box_tensor_shape, float *box_data
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc_);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
TEST_F(TestNMSFp32, TestCase1) {
|
||||
|
|
|
@ -75,6 +75,8 @@ void TestOneHotFp32::Prepare(const std::vector<int> &indices_shape, int *indices
|
|||
ctx_.Init();
|
||||
creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(param_), &ctx_, desc);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
// 3 3 axis -1 -> 3 3 4
|
||||
|
|
|
@ -90,6 +90,8 @@ void TestPadFp32::Prepare(const std::vector<int> &input_shape, const std::vector
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
TEST_F(TestPadFp32, TestPad1) {
|
||||
|
|
|
@ -89,6 +89,8 @@ void TestReduceFp32::Prepare(const std::vector<int> &in_shape, const std::vector
|
|||
}
|
||||
ctx_->thread_num_ = thread_num_;
|
||||
kernel_ = creator_(inputs, outputs, reinterpret_cast<OpParameter *>(¶m_), ctx_, desc_);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
TEST_F(TestReduceFp32, Mean1) {
|
||||
|
|
|
@ -71,6 +71,8 @@ void TestResizeBilinearFp32::Prepare(const std::vector<int> &input_shape, const
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
// 1*1 -> 1*1
|
||||
|
|
|
@ -66,6 +66,8 @@ void TestResizeNearestNeighborFp32::Prepare(const std::vector<int> &input_shape,
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
// 1*1 -> 1*1
|
||||
TEST_F(TestResizeNearestNeighborFp32, ResizeNearestNeighborTest1) {
|
||||
|
|
|
@ -54,7 +54,9 @@ TEST_F(TestReverseSequenceFp32, BatchLessSeq) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {2, 3, 0, 1, 4, 5, 6, 7, 12, 13, 10, 11, 8, 9, 14, 15, 22, 23, 20, 21, 18, 19, 16, 17,
|
||||
|
@ -98,7 +100,9 @@ TEST_F(TestReverseSequenceFp32, BatchGreaterSeq) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {8, 9, 18, 19, 20, 21, 14, 15, 0, 1, 10, 11, 12, 13, 6, 7, 16, 17, 2, 3, 4, 5, 22, 23,
|
||||
|
@ -142,7 +146,9 @@ TEST_F(TestReverseSequenceFp32, BatchSeqNotAdjacent) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {2, 3, 0, 1, 4, 5, 6, 7, 10, 11, 8, 9, 12, 13, 14, 15, 18, 19, 16, 17, 20, 21, 22, 23,
|
||||
|
|
|
@ -91,6 +91,8 @@ void TestScaleFp32::Prepare(const std::vector<int> &input_shape, const std::vect
|
|||
ASSERT_NE(creator_, nullptr);
|
||||
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc_);
|
||||
ASSERT_NE(kernel_, nullptr);
|
||||
auto ret = kernel_->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
}
|
||||
|
||||
TEST_F(TestScaleFp32, ScaleNoAct) {
|
||||
|
|
|
@ -46,7 +46,9 @@ TEST_F(TestSoftmaxFp32, 001) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f};
|
||||
|
|
|
@ -82,8 +82,10 @@ TEST_F(SpaceToDepthTestFp32, SpaceToDepthTest2) {
|
|||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
ASSERT_NE(creator, nullptr);
|
||||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
for (int i = 0; i < out_size; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
|
|
|
@ -91,7 +91,10 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test1) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> except_result = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
|
@ -174,7 +177,10 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> except_result = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
|
||||
|
@ -257,7 +263,10 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test3) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> except_result = {0, 1, 0, 1, 1, 0, 0, 0, 0, 0};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -338,7 +347,10 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test4) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> except_result = {0, 0, 0, 0, 0, 1, 0, 0, 0, 0};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -419,7 +431,10 @@ TEST_F(TestSparseToDenseFp32, SparseToDense_test5) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<float> except_result = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
|
||||
|
|
|
@ -159,7 +159,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice3) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 2, 0.000001));
|
||||
|
@ -209,7 +212,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice4) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 4, 0.000001));
|
||||
|
@ -266,7 +272,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice5) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 12, 0.000001));
|
||||
|
@ -323,7 +332,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice6) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 8, 0.000001));
|
||||
|
@ -372,7 +384,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice7) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 1, 0.000001));
|
||||
|
@ -429,7 +444,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice8) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 5, 0.000001));
|
||||
|
@ -579,7 +597,10 @@ TEST_F(TestStridedSliceFp32, StridedSlice9) {
|
|||
auto *kernel =
|
||||
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(strided_slice_param), ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
delete ctx;
|
||||
|
||||
ASSERT_EQ(0, CompareOutputData(output_data, correct, 490, 0.000001));
|
||||
|
|
|
@ -56,7 +56,9 @@ TEST_F(TestTileFp32, Tile) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {1, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 4, 1, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 4};
|
||||
|
@ -101,7 +103,9 @@ TEST_F(TestTileFp32, SimpleTile1) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), context, desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {1, 2, 3, 4, 1, 2, 3, 4};
|
||||
|
@ -146,7 +150,9 @@ TEST_F(TestTileFp32, SimpleTile2) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), context, desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[] = {1, 2, 1, 2, 3, 4, 3, 4};
|
||||
|
|
|
@ -51,7 +51,9 @@ TEST_F(TestTopKFp32, TopK) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect0[] = {3, 2, 6, 5, 9, 8, 12, 11};
|
||||
|
|
|
@ -56,8 +56,11 @@ TEST_F(TestTransposeFp32, 10D) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[24] = {1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24};
|
||||
for (int i = 0; i < 24; ++i) {
|
||||
ASSERT_NEAR(out[i], expect[i], 0.001);
|
||||
|
@ -94,8 +97,11 @@ TEST_F(TestTransposeFp32, 10DSingleThread) {
|
|||
ASSERT_EQ(lite::RET_OK, ctx->Init());
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect[24] = {1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23, 4, 8, 12, 16, 20, 24};
|
||||
for (int i = 0; i < 24; ++i) {
|
||||
ASSERT_NEAR(out[i], expect[i], 0.001);
|
||||
|
@ -239,8 +245,11 @@ TEST_F(TestTransposeFp32, TransposeFp32_test5) { /* 1x2x3x2x2 */
|
|||
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
|
||||
ASSERT_NE(creator, nullptr);
|
||||
auto *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
for (int i = 0; i < 24; ++i) {
|
||||
std::cout << output[i] << " ";
|
||||
}
|
||||
|
|
|
@ -46,7 +46,9 @@ TEST_F(TestUniformRealFp32, UniformReal) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
EXPECT_NEAR(0.138693, output_data0[0], 0.000001);
|
||||
EXPECT_NEAR(0.511552, output_data0[1], 0.000001);
|
||||
|
|
|
@ -50,7 +50,9 @@ TEST_F(TestUniqueFp32, Unique) {
|
|||
auto kernel = creator(inputs, outputs, ¶meter, ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect0[] = {1, 2, 4, 7, 8};
|
||||
|
|
|
@ -56,7 +56,9 @@ TEST_F(TestUnstackFp32, Unstack) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect0[] = {1, 2, 9, 10, 17, 18};
|
||||
|
@ -104,7 +106,9 @@ TEST_F(TestUnstackFp32, Unstack2) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
EXPECT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float expect0[] = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
|
|
|
@ -212,7 +212,10 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -254,7 +257,10 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -298,7 +304,10 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -343,7 +352,10 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -388,7 +400,10 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -431,6 +446,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
int loop_count = 1000;
|
||||
auto time_start = mindspore::lite::GetTimeUs();
|
||||
for (int i = 0; i < loop_count; i++) {
|
||||
|
@ -483,7 +501,10 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -527,7 +548,10 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -571,7 +595,10 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -615,7 +642,10 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -659,7 +689,10 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -704,7 +737,10 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -748,7 +784,10 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
@ -830,7 +869,10 @@ TEST_F(TestArithmeticGradFp32, TestMaximumGradBroadcastFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
float *output_ptr = reinterpret_cast<float *>(outputs[1]->MutableData());
|
||||
printf("==================output data=================\n");
|
||||
|
|
|
@ -59,7 +59,10 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 7; i++) {
|
||||
|
@ -108,7 +111,10 @@ TEST_F(TestBiasGradFp32, BiasGrad2DFp32) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(bias_param), &ctx, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
|
|
|
@ -88,7 +88,10 @@ TEST_F(TestBNGradFp32, BNGradFp32) {
|
|||
ASSERT_NE(kernel_obj, nullptr);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel_obj->workspace_size());
|
||||
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
std::cout << "==========dx==========\n";
|
||||
auto dx = reinterpret_cast<float *>(outputs[0]->MutableData());
|
||||
for (int i = 0; i < 7; i++) std::cout << dx[i] << " ";
|
||||
|
@ -189,6 +192,9 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) {
|
|||
float *curr_mean = reinterpret_cast<float *>(mean_tensor.MutableData());
|
||||
float *curr_var = reinterpret_cast<float *>(var_tensor.MutableData());
|
||||
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
kernel_obj->Train();
|
||||
kernel_obj->set_trainable(true);
|
||||
kernel_obj->Run();
|
||||
|
|
|
@ -119,6 +119,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@ -196,6 +199,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -272,6 +277,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
kernel->Run();
|
||||
|
||||
|
@ -345,6 +352,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
// warm up loop
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@ -420,6 +429,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -496,6 +507,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
int loop_count = 100;
|
||||
|
@ -673,6 +686,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -780,6 +795,8 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
|
|
@ -98,6 +98,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -204,6 +206,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
for (int i = 0; i < 3; i++) {
|
||||
}
|
||||
|
@ -310,6 +314,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -413,6 +419,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -519,6 +527,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
@ -628,7 +638,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) {
|
|||
ASSERT_NE(creator, nullptr);
|
||||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(conv_param), &context, desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel->workspace_size());
|
||||
|
||||
// warm up loop
|
||||
|
|
|
@ -160,7 +160,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) {
|
|||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
|
@ -229,7 +232,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) {
|
|||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(pooling_param), &context, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
|
@ -297,9 +303,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) {
|
|||
auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin";
|
||||
auto res = CompareRelativeOutput(out_data, output_path);
|
||||
|
@ -364,9 +371,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) {
|
|||
auto kernel = pool_creator(inputs, outputs, reinterpret_cast<OpParameter *>(pool), &context, pool_desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::string output_path = "./test_data/pooling/avgpoolgradfp32_s3_dx_3_28_28_3.bin";
|
||||
auto res = CompareRelativeOutput(out_data, output_path);
|
||||
|
@ -498,9 +506,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) {
|
|||
maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, maxpool_desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_3_28_28_3.bin";
|
||||
auto res = CompareRelativeOutput(out_data, output_path);
|
||||
|
@ -576,9 +585,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) {
|
|||
maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, maxpool_desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_s2_xgrad_3_28_28_3.bin";
|
||||
auto res = CompareRelativeOutput(out_data, output_path);
|
||||
|
@ -654,8 +664,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) {
|
|||
maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast<OpParameter *>(maxpool), &context, maxpool_desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
kernel->Init();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin";
|
||||
auto res = CompareRelativeOutput(out_data, output_path);
|
||||
|
|
|
@ -77,7 +77,10 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) {
|
|||
auto kernel_obj = creator(inputs, outputs, reinterpret_cast<OpParameter *>(sce_param), &context, desc);
|
||||
ASSERT_NE(kernel_obj, nullptr);
|
||||
mindspore::kernel::InnerKernel::AllocWorkspace(kernel_obj->workspace_size());
|
||||
kernel_obj->Run();
|
||||
auto ret = kernel_obj->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel_obj->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================total loss=================\n");
|
||||
std::cout << loss[0] << " ," << std::endl;
|
||||
|
|
|
@ -60,7 +60,9 @@ TEST_F(TestQuantizedAdd, Add) {
|
|||
auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(¶meter), ctx.get(), desc);
|
||||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto ret = kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
int8_t expect0[10] = {-64, 76, 13, -13, -64, 76, 13, -13, -64, 76}; // -0.5 0.6 0.1 -0.1
|
||||
|
|
|
@ -74,7 +74,10 @@ TEST_F(TestArithmeticSelfInt8, floor_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -133,7 +136,10 @@ TEST_F(TestArithmeticSelfInt8, floor_quant1_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -192,7 +198,10 @@ TEST_F(TestArithmeticSelfInt8, round_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -251,7 +260,10 @@ TEST_F(TestArithmeticSelfInt8, round_quant1_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 7};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -310,7 +322,10 @@ TEST_F(TestArithmeticSelfInt8, ceil_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -369,7 +384,10 @@ TEST_F(TestArithmeticSelfInt8, ceil_quant1_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 7};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -428,7 +446,10 @@ TEST_F(TestArithmeticSelfInt8, abs_quant0_thread0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -487,7 +508,10 @@ TEST_F(TestArithmeticSelfInt8, abs_quant1_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -546,7 +570,10 @@ TEST_F(TestArithmeticSelfInt8, sin_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 1, 0, -1};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -605,7 +632,10 @@ TEST_F(TestArithmeticSelfInt8, cos_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 0, -1, -1};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -664,7 +694,10 @@ TEST_F(TestArithmeticSelfInt8, log_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -723,7 +756,10 @@ TEST_F(TestArithmeticSelfInt8, sqrt_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -782,7 +818,10 @@ TEST_F(TestArithmeticSelfInt8, rsqrt_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -841,7 +880,10 @@ TEST_F(TestArithmeticSelfInt8, square_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 127};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -900,7 +942,10 @@ TEST_F(TestArithmeticSelfInt8, square_quant1_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 2, 4, 7, 11, 16, 21, 28, 35, 43, 52, 62};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -959,7 +1004,10 @@ TEST_F(TestArithmeticSelfInt8, logical_not_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1};
|
||||
PrintData("output data", output, output_size);
|
||||
|
|
|
@ -108,7 +108,10 @@ TEST_F(TestBatchnormInt8, FusedTest) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
|
||||
|
@ -188,7 +191,10 @@ TEST_F(TestBatchnormInt8, BNTest) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
|
||||
auto output_tensor_shape = output0_tensor.shape();
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
printf("==================output data=================\n");
|
||||
for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
|
||||
|
|
|
@ -87,7 +87,10 @@ TEST_F(TestConcatInt8, Concat1_axis0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
|
||||
PrintData("output data", output, input1.size() + input2.size());
|
||||
|
@ -158,7 +161,10 @@ TEST_F(TestConcatInt8, Concat1_axis1_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {10, 11, 12, 13, 14, 15, 30, 31, 20, 21, 22, 23, 24, 25, 32, 33};
|
||||
PrintData("output data", output, input1.size() + input2.size());
|
||||
|
@ -230,7 +236,10 @@ TEST_F(TestConcatInt8, Concat1_axis1_thread2_quant1) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {5, 6, 6, 7, 7, 8, 15, 16, 10, 11, 11, 12, 12, 13, 16, 17};
|
||||
PrintData("output data", output, input1.size() + input2.size());
|
||||
|
|
|
@ -79,7 +79,10 @@ TEST_F(TestCropInt8, crop_1d_axis0_offset0_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {2, 3, 4, 5, 6, 7, 8};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -142,7 +145,10 @@ TEST_F(TestCropInt8, crop_2d_axis1_offset0_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -205,7 +211,10 @@ TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {4, 8};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -269,7 +278,10 @@ TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {4, 6, 8, 10, 12, 14, 16, 20, 22, 24, 26, 28, 30, 32};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -332,7 +344,10 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {16};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -395,7 +410,10 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset0_quant0_thread0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {8, 16};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -461,7 +479,10 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant0_thread0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {13, 14, 15, 16};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -527,7 +548,10 @@ TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant1_thread0) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {7, 7, 8, 8};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -592,7 +616,10 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread2) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {40, 44, 48, 52, 56, 60, 64};
|
||||
PrintData("output data", output, output_size);
|
||||
|
@ -657,7 +684,10 @@ TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread3) {
|
|||
ASSERT_NE(kernel, nullptr);
|
||||
auto output_tensor_shape = output0_tensor->shape();
|
||||
ASSERT_EQ(output_tensor_shape, output_shape);
|
||||
kernel->Run();
|
||||
auto ret = kernel->Init();
|
||||
EXPECT_EQ(0, ret);
|
||||
ret = kernel->Run();
|
||||
EXPECT_EQ(0, ret);
|
||||
|
||||
std::vector<int8_t> except_result = {40, 44, 48, 52, 56, 60, 64};
|
||||
PrintData("output data", output, output_size);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue