fix_decoder_3-master
This commit is contained in:
parent
b6bf797ae7
commit
13dc35973a
|
@ -15,7 +15,7 @@
|
||||||
*/
|
*/
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "src/kernel_registry.h"
|
#include "src/kernel_registry.h"
|
||||||
#include "src/runtime/kernel/arm/fp32/tensorlist_fromtensor_fp32.h"
|
#include "src/runtime/kernel/arm/base/tensorlist_fromtensor.h"
|
||||||
#include "src/runtime/runtime_api.h"
|
#include "src/runtime/runtime_api.h"
|
||||||
|
|
||||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||||
|
@ -52,14 +52,7 @@ int TensorListFromTensorCPUKernel::IsCompatibleShape() {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int TensorListFromTensorCPUKernel::Init() {
|
int TensorListFromTensorCPUKernel::Init() { return RET_OK; }
|
||||||
#ifdef ENABLE_FP16
|
|
||||||
if (lite::IsSupportFloat16() && context_->IsCpuFloat16Enabled() && dtype_ == kNumberTypeFloat32) {
|
|
||||||
dtype_ = kNumberTypeFloat16;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TensorListFromTensorCPUKernel::ReSize() { return RET_OK; }
|
int TensorListFromTensorCPUKernel::ReSize() { return RET_OK; }
|
||||||
|
|
||||||
|
@ -71,6 +64,7 @@ int TensorListFromTensorCPUKernel::Run() {
|
||||||
MS_LOG(ERROR) << "IsNotCompatibleShape!";
|
MS_LOG(ERROR) << "IsNotCompatibleShape!";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
dtype_ = in_tensors_[0]->data_type();
|
||||||
if (input0_->shape().size() == 0) {
|
if (input0_->shape().size() == 0) {
|
||||||
MS_LOG(ERROR) << "input0_->shape().size():" << input0_->shape().size() << " must be greater than 0";
|
MS_LOG(ERROR) << "input0_->shape().size():" << input0_->shape().size() << " must be greater than 0";
|
||||||
}
|
}
|
||||||
|
@ -97,9 +91,10 @@ int TensorListFromTensorCPUKernel::Run() {
|
||||||
<< " must be euqal to devision_dim0:" << devision_dim0;
|
<< " must be euqal to devision_dim0:" << devision_dim0;
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
auto out_data = out_ptr->MutableData();
|
auto out_data = out_ptr->data_c();
|
||||||
MS_ASSERT(out_data != nullptr);
|
MS_ASSERT(out_data != nullptr);
|
||||||
memcpy(out_data, in_data, data_offset);
|
memcpy(out_data, in_data, data_offset);
|
||||||
|
out_ptr->set_data_type(dtype_);
|
||||||
in_data += data_offset;
|
in_data += data_offset;
|
||||||
}
|
}
|
||||||
output0->set_tensors_data_type(dtype_);
|
output0->set_tensors_data_type(dtype_);
|
|
@ -16,7 +16,7 @@
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "include/ms_tensor.h"
|
#include "include/ms_tensor.h"
|
||||||
#include "src/kernel_registry.h"
|
#include "src/kernel_registry.h"
|
||||||
#include "src/runtime/kernel/arm/fp32/tensorlist_getitem_fp32.h"
|
#include "src/runtime/kernel/arm/base/tensorlist_getitem.h"
|
||||||
#include "src/runtime/runtime_api.h"
|
#include "src/runtime/runtime_api.h"
|
||||||
|
|
||||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||||
|
@ -28,16 +28,7 @@ using mindspore::schema::PrimitiveType_TensorListGetItem;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
|
|
||||||
int TensorListGetItemCPUKernel::Init() {
|
int TensorListGetItemCPUKernel::Init() { return RET_OK; }
|
||||||
MS_ASSERT(in_tensors_.size() >= 2);
|
|
||||||
MS_ASSERT(in_tensors_.at(0) != nullptr);
|
|
||||||
#ifdef ENABLE_FP16
|
|
||||||
if (lite::IsSupportFloat16() && context_->IsCpuFloat16Enabled() && dtype_ == kNumberTypeFloat32) {
|
|
||||||
dtype_ = kNumberTypeFloat16;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TensorListGetItemCPUKernel::Run() {
|
int TensorListGetItemCPUKernel::Run() {
|
||||||
MS_ASSERT(in_tensors_.size() >= 2);
|
MS_ASSERT(in_tensors_.size() >= 2);
|
||||||
|
@ -48,10 +39,7 @@ int TensorListGetItemCPUKernel::Run() {
|
||||||
if (input0->root_tensor() != nullptr) {
|
if (input0->root_tensor() != nullptr) {
|
||||||
input0 = reinterpret_cast<lite::TensorList *>(input0->root_tensor());
|
input0 = reinterpret_cast<lite::TensorList *>(input0->root_tensor());
|
||||||
}
|
}
|
||||||
if (dtype_ != input0->tensors_data_type()) {
|
dtype_ = input0->tensors_data_type();
|
||||||
MS_LOG(ERROR) << "op dtype: " << dtype_ << " is not equal in_tensor[0] dtype: " << input0->tensors_data_type();
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
MS_ASSERT(in_tensors_.at(1)->data_c() != nullptr);
|
MS_ASSERT(in_tensors_.at(1)->data_c() != nullptr);
|
||||||
index_ = reinterpret_cast<int *>(in_tensors_.at(1)->data_c())[0];
|
index_ = reinterpret_cast<int *>(in_tensors_.at(1)->data_c())[0];
|
||||||
int dim0 = input0->ElementsNum() - 1;
|
int dim0 = input0->ElementsNum() - 1;
|
|
@ -16,7 +16,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "src/kernel_registry.h"
|
#include "src/kernel_registry.h"
|
||||||
#include "src/runtime/kernel/arm/fp32/tensorlist_reserve_fp32.h"
|
#include "src/runtime/kernel/arm/base/tensorlist_reserve.h"
|
||||||
|
|
||||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||||
using mindspore::lite::KernelRegistrar;
|
using mindspore::lite::KernelRegistrar;
|
||||||
|
@ -27,14 +27,7 @@ using mindspore::schema::PrimitiveType_TensorListReserve;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
|
|
||||||
int TensorListReserveCPUKernel::Init() {
|
int TensorListReserveCPUKernel::Init() { return RET_OK; }
|
||||||
#ifdef ENABLE_FP16
|
|
||||||
if (lite::IsSupportFloat16() && context_->IsCpuFloat16Enabled() && element_dtype_ == kNumberTypeFloat32) {
|
|
||||||
element_dtype_ = kNumberTypeFloat16;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TensorListReserveCPUKernel::Run() {
|
int TensorListReserveCPUKernel::Run() {
|
||||||
auto input0 = in_tensors_.at(0);
|
auto input0 = in_tensors_.at(0);
|
|
@ -16,7 +16,7 @@
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "include/ms_tensor.h"
|
#include "include/ms_tensor.h"
|
||||||
#include "src/kernel_registry.h"
|
#include "src/kernel_registry.h"
|
||||||
#include "src/runtime/kernel/arm/fp32/tensorlist_setitem_fp32.h"
|
#include "src/runtime/kernel/arm/base/tensorlist_setitem.h"
|
||||||
#include "src/runtime/runtime_api.h"
|
#include "src/runtime/runtime_api.h"
|
||||||
|
|
||||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||||
|
@ -28,21 +28,9 @@ using mindspore::schema::PrimitiveType_TensorListSetItem;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
|
|
||||||
int TensorListSetItemCPUKernel::Init() {
|
int TensorListSetItemCPUKernel::Init() { return RET_OK; }
|
||||||
#ifdef ENABLE_FP16
|
|
||||||
if (lite::IsSupportFloat16() && context_->IsCpuFloat16Enabled() && dtype_ == kNumberTypeFloat32) {
|
|
||||||
dtype_ = kNumberTypeFloat16;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int TensorListSetItemCPUKernel::CheckParam() {
|
int TensorListSetItemCPUKernel::CheckParam() {
|
||||||
if (dtype_ != kTypeUnknown && input0_->tensors_data_type() != kTypeUnknown &&
|
|
||||||
dtype_ != input0_->tensors_data_type()) {
|
|
||||||
MS_LOG(ERROR) << "op dtype:" << dtype_ << " is not equal in_tensors[0] dtype:" << input0_->tensors_data_type();
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
if (in_tensors_[1]->data_type() != kNumberTypeInt && in_tensors_[1]->data_type() != kNumberTypeInt32) {
|
if (in_tensors_[1]->data_type() != kNumberTypeInt && in_tensors_[1]->data_type() != kNumberTypeInt32) {
|
||||||
MS_LOG(ERROR) << "in_tensors_[1]->data_type():" << in_tensors_[1]->data_type() << " must be int";
|
MS_LOG(ERROR) << "in_tensors_[1]->data_type():" << in_tensors_[1]->data_type() << " must be int";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
|
@ -70,7 +58,6 @@ int TensorListSetItemCPUKernel::IncrementOutputSize(int origin_size) {
|
||||||
|
|
||||||
int TensorListSetItemCPUKernel::Run() {
|
int TensorListSetItemCPUKernel::Run() {
|
||||||
input0_ = reinterpret_cast<lite::TensorList *>(in_tensors_[0]);
|
input0_ = reinterpret_cast<lite::TensorList *>(in_tensors_[0]);
|
||||||
|
|
||||||
if (CheckParam() != RET_OK) {
|
if (CheckParam() != RET_OK) {
|
||||||
MS_LOG(ERROR) << "check param failed.";
|
MS_LOG(ERROR) << "check param failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
|
@ -28,8 +28,7 @@ class TensorListSetItemCPUKernel : public LiteKernel {
|
||||||
public:
|
public:
|
||||||
TensorListSetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
TensorListSetItemCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
|
||||||
: LiteKernel(parameter, inputs, outputs, ctx),
|
: LiteKernel(parameter, inputs, outputs, ctx) {}
|
||||||
dtype_(static_cast<TypeId>(reinterpret_cast<TensorListParameter *>(parameter)->element_dtype_)) {}
|
|
||||||
~TensorListSetItemCPUKernel() = default;
|
~TensorListSetItemCPUKernel() = default;
|
||||||
|
|
||||||
int Init() override;
|
int Init() override;
|
||||||
|
@ -43,7 +42,6 @@ class TensorListSetItemCPUKernel : public LiteKernel {
|
||||||
lite::Tensor *input2_ = nullptr;
|
lite::Tensor *input2_ = nullptr;
|
||||||
lite::TensorList *output0_ = nullptr;
|
lite::TensorList *output0_ = nullptr;
|
||||||
int index_ = 0;
|
int index_ = 0;
|
||||||
TypeId dtype_ = kTypeUnknown;
|
|
||||||
};
|
};
|
||||||
} // namespace mindspore::kernel
|
} // namespace mindspore::kernel
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
#include "include/errorcode.h"
|
#include "include/errorcode.h"
|
||||||
#include "ir/dtype/type_id.h"
|
#include "ir/dtype/type_id.h"
|
||||||
#include "src/kernel_registry.h"
|
#include "src/kernel_registry.h"
|
||||||
#include "src/runtime/kernel/arm/fp32/tensorlist_stack_fp32.h"
|
#include "src/runtime/kernel/arm/base/tensorlist_stack.h"
|
||||||
|
|
||||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||||
using mindspore::lite::KernelRegistrar;
|
using mindspore::lite::KernelRegistrar;
|
||||||
|
@ -31,11 +31,6 @@ using mindspore::schema::PrimitiveType_TensorListStack;
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
|
|
||||||
int TensorListStackCPUKernel::CheckParam() {
|
int TensorListStackCPUKernel::CheckParam() {
|
||||||
if (dtype_ != kTypeUnknown && input0_->tensors_data_type() != dtype_) {
|
|
||||||
MS_LOG(ERROR) << "in_tensors_[0].tensors_data_type:[" << input0_->tensors_data_type() << "] must be equal "
|
|
||||||
<< "param.data_type:[" << dtype_ << "]";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
if (num_element_ != -1 && input0_->ElementsNum() != num_element_) {
|
if (num_element_ != -1 && input0_->ElementsNum() != num_element_) {
|
||||||
MS_LOG(ERROR) << "in_tensors_[0].ElementsNum():[" << input0_->ElementsNum() << "] must be equal "
|
MS_LOG(ERROR) << "in_tensors_[0].ElementsNum():[" << input0_->ElementsNum() << "] must be equal "
|
||||||
<< "param.elements_num:[" << num_element_ << "]";
|
<< "param.elements_num:[" << num_element_ << "]";
|
||||||
|
@ -60,11 +55,6 @@ int TensorListStackCPUKernel::Init() {
|
||||||
MS_ASSERT(input0_ != nullptr);
|
MS_ASSERT(input0_ != nullptr);
|
||||||
output0_ = out_tensors_[0];
|
output0_ = out_tensors_[0];
|
||||||
MS_ASSERT(output0_ != nullptr);
|
MS_ASSERT(output0_ != nullptr);
|
||||||
#ifdef ENABLE_FP16
|
|
||||||
if (lite::IsSupportFloat16() && context_->IsCpuFloat16Enabled() && dtype_ == kNumberTypeFloat32) {
|
|
||||||
dtype_ = kNumberTypeFloat16;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,18 +136,11 @@ int TensorListStackCPUKernel::MergeSubShape(const std::vector<int> &shape) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int TensorListStackCPUKernel::Run() {
|
int TensorListStackCPUKernel::Run() {
|
||||||
if (dtype_ == kTypeUnknown) {
|
|
||||||
dtype_ = input0_->tensors_data_type();
|
|
||||||
#ifdef ENABLE_FP16
|
|
||||||
if (lite::IsSupportFloat16() && context_->IsCpuFloat16Enabled() && dtype_ == kNumberTypeFloat32) {
|
|
||||||
dtype_ = kNumberTypeFloat16;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if (CheckParam() != RET_OK) {
|
if (CheckParam() != RET_OK) {
|
||||||
MS_LOG(ERROR) << "CheckParam failed!";
|
MS_LOG(ERROR) << "CheckParam failed!";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
dtype_ = input0_->tensors_data_type();
|
||||||
if (output0_->ElementsNum() == 0) {
|
if (output0_->ElementsNum() == 0) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
|
@ -76,7 +76,11 @@ int ArithmeticSelfFp16CPUKernel::Run() {
|
||||||
auto input_tensor = in_tensors_.at(0);
|
auto input_tensor = in_tensors_.at(0);
|
||||||
auto output_tensor = out_tensors_.at(0);
|
auto output_tensor = out_tensors_.at(0);
|
||||||
|
|
||||||
input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
if (input_tensor->data_type() == kNumberTypeFloat32) {
|
||||||
|
input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, context_);
|
||||||
|
} else {
|
||||||
|
input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
|
||||||
|
}
|
||||||
output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
|
||||||
|
|
||||||
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
auto ret = ParallelLaunch(this->context_->thread_pool_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
|
||||||
|
|
|
@ -199,7 +199,8 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index, bool *infer_shape_inter
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in_tensors,
|
kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in_tensors,
|
||||||
const std::vector<Tensor *> &out_tensors, const Model::Node *node) {
|
const std::vector<Tensor *> &out_tensors, const Model::Node *node,
|
||||||
|
TypeId prefer_data_type) {
|
||||||
kernel::LiteKernel *kernel = nullptr;
|
kernel::LiteKernel *kernel = nullptr;
|
||||||
TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
|
TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
|
||||||
OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)];
|
OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)];
|
||||||
|
@ -272,7 +273,8 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (mindspore::lite::IsSupportFloat16() &&
|
if ((prefer_data_type == kNumberTypeFloat16 || prefer_data_type == kTypeUnknown) &&
|
||||||
|
mindspore::lite::IsSupportFloat16() &&
|
||||||
((context_->IsCpuFloat16Enabled() && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16)) {
|
((context_->IsCpuFloat16Enabled() && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16)) {
|
||||||
kernel::KernelKey fp16_cpu_desc{desc.arch, kNumberTypeFloat16, desc.type};
|
kernel::KernelKey fp16_cpu_desc{desc.arch, kNumberTypeFloat16, desc.type};
|
||||||
auto tensor_origin_data_map =
|
auto tensor_origin_data_map =
|
||||||
|
@ -301,15 +303,17 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
|
||||||
MS_LOG(DEBUG) << "Get fp16 op failed, back to fp32 op.";
|
MS_LOG(DEBUG) << "Get fp16 op failed, back to fp32 op.";
|
||||||
desc.data_type = kNumberTypeFloat32;
|
desc.data_type = kNumberTypeFloat32;
|
||||||
}
|
}
|
||||||
auto tensor_origin_data_map = DequantUtil::DequantTensor(op_parameter, in_tensors, desc.data_type, need_restore);
|
if (prefer_data_type == kNumberTypeFloat32 || prefer_data_type == kTypeUnknown) {
|
||||||
auto ret = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, desc, op_parameter, &kernel);
|
auto tensor_origin_data_map = DequantUtil::DequantTensor(op_parameter, in_tensors, desc.data_type, need_restore);
|
||||||
DequantUtil::RestoreTensorData(tensor_origin_data_map);
|
auto ret = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, desc, op_parameter, &kernel);
|
||||||
if (ret == RET_OK) {
|
DequantUtil::RestoreTensorData(tensor_origin_data_map);
|
||||||
return kernel;
|
if (ret == RET_OK) {
|
||||||
} else if (ret == RET_ERROR) {
|
return kernel;
|
||||||
ret = InferNodeShape(node, &infer_shape_interrupt);
|
} else if (ret == RET_ERROR) {
|
||||||
if (!(ret == RET_INFER_INVALID || ret == RET_OK)) {
|
ret = InferNodeShape(node, &infer_shape_interrupt);
|
||||||
MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_;
|
if (!(ret == RET_INFER_INVALID || ret == RET_OK)) {
|
||||||
|
MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -327,7 +331,7 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
|
||||||
std::vector<kernel::LiteKernel *> sub_kernels;
|
std::vector<kernel::LiteKernel *> sub_kernels;
|
||||||
std::vector<lite::Tensor *> in_tensors;
|
std::vector<lite::Tensor *> in_tensors;
|
||||||
std::vector<lite::Tensor *> out_tensors;
|
std::vector<lite::Tensor *> out_tensors;
|
||||||
auto ret = ScheduleSubGraphToKernels(sub_graph_index, &sub_kernels, &in_tensors, &out_tensors);
|
auto ret = ScheduleSubGraphToKernels(sub_graph_index, &sub_kernels, &in_tensors, &out_tensors, kNumberTypeFloat32);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
|
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -338,11 +342,11 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
|
||||||
return subgraph;
|
return subgraph;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src_node) {
|
kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src_node, TypeId prefer_data_type) {
|
||||||
std::vector<Tensor *> inputs;
|
std::vector<Tensor *> inputs;
|
||||||
std::vector<Tensor *> outputs;
|
std::vector<Tensor *> outputs;
|
||||||
FindNodeInoutTensors(*src_node, &inputs, &outputs);
|
FindNodeInoutTensors(*src_node, &inputs, &outputs);
|
||||||
auto *kernel = this->FindBackendKernel(inputs, outputs, src_node);
|
auto *kernel = this->FindBackendKernel(inputs, outputs, src_node, prefer_data_type);
|
||||||
if (kernel == nullptr) {
|
if (kernel == nullptr) {
|
||||||
MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << src_node->name_
|
MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << src_node->name_
|
||||||
<< ", type: " << PrimitiveTypeName(GetPrimitiveType(src_node->primitive_));
|
<< ", type: " << PrimitiveTypeName(GetPrimitiveType(src_node->primitive_));
|
||||||
|
@ -355,7 +359,7 @@ kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src
|
||||||
|
|
||||||
int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kernel::LiteKernel *> *dst_kernels,
|
int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kernel::LiteKernel *> *dst_kernels,
|
||||||
std::vector<lite::Tensor *> *in_tensors,
|
std::vector<lite::Tensor *> *in_tensors,
|
||||||
std::vector<lite::Tensor *> *out_tensors) {
|
std::vector<lite::Tensor *> *out_tensors, TypeId prefer_data_type) {
|
||||||
MS_ASSERT(src_model_ != nullptr);
|
MS_ASSERT(src_model_ != nullptr);
|
||||||
MS_ASSERT(!src_model_->sub_graphs_.empty());
|
MS_ASSERT(!src_model_->sub_graphs_.empty());
|
||||||
MS_ASSERT(src_model_->sub_graphs_.size() > subgraph_index);
|
MS_ASSERT(src_model_->sub_graphs_.size() > subgraph_index);
|
||||||
|
@ -372,7 +376,7 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
|
||||||
if (IsPartialNode(primitive)) { // sub_graph
|
if (IsPartialNode(primitive)) { // sub_graph
|
||||||
kernel = SchedulePartialToKernel(node);
|
kernel = SchedulePartialToKernel(node);
|
||||||
} else { // kernel
|
} else { // kernel
|
||||||
kernel = ScheduleNodeToKernel(node);
|
kernel = ScheduleNodeToKernel(node, prefer_data_type);
|
||||||
}
|
}
|
||||||
if (kernel == nullptr) {
|
if (kernel == nullptr) {
|
||||||
MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << node->name_
|
MS_LOG(ERROR) << "FindBackendKernel return nullptr, name: " << node->name_
|
||||||
|
|
|
@ -57,14 +57,16 @@ class Scheduler {
|
||||||
|
|
||||||
// schedule a node to kernel according to context and kernels registered
|
// schedule a node to kernel according to context and kernels registered
|
||||||
kernel::LiteKernel *FindBackendKernel(const std::vector<Tensor *> &in_tensors,
|
kernel::LiteKernel *FindBackendKernel(const std::vector<Tensor *> &in_tensors,
|
||||||
const std::vector<Tensor *> &out_tensors, const Model::Node *node);
|
const std::vector<Tensor *> &out_tensors, const Model::Node *node,
|
||||||
|
TypeId prefer_data_type = kTypeUnknown);
|
||||||
// schedule a partial node to a subgraph_kernel
|
// schedule a partial node to a subgraph_kernel
|
||||||
kernel::LiteKernel *SchedulePartialToKernel(const lite::Model::Node *src_node);
|
kernel::LiteKernel *SchedulePartialToKernel(const lite::Model::Node *src_node);
|
||||||
// schedule a node to a kernel
|
// schedule a node to a kernel
|
||||||
kernel::LiteKernel *ScheduleNodeToKernel(const lite::Model::Node *src_node);
|
kernel::LiteKernel *ScheduleNodeToKernel(const lite::Model::Node *src_node, TypeId prefer_data_type = kTypeUnknown);
|
||||||
// schedule a Model::SubGraph into a vector of kernel and subgraph_kernel
|
// schedule a Model::SubGraph into a vector of kernel and subgraph_kernel
|
||||||
int ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kernel::LiteKernel *> *dst_kernels,
|
int ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kernel::LiteKernel *> *dst_kernels,
|
||||||
std::vector<lite::Tensor *> *in_tensors, std::vector<lite::Tensor *> *out_tensors);
|
std::vector<lite::Tensor *> *in_tensors, std::vector<lite::Tensor *> *out_tensors,
|
||||||
|
TypeId prefer_data_type = kTypeUnknown);
|
||||||
|
|
||||||
// find in_kernels_ and out_kernels of kernel, sub_graph and nodes_ in sub_graph
|
// find in_kernels_ and out_kernels of kernel, sub_graph and nodes_ in sub_graph
|
||||||
static void FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &kernels);
|
static void FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &kernels);
|
||||||
|
|
|
@ -366,7 +366,8 @@ int CpuFp16SubGraph::PostProcess() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (real_tensor->data_type() == kNumberTypeFloat16 && origin_input_data_.at(real_tensor) != nullptr) {
|
if (real_tensor->data_type() == kNumberTypeFloat16 &&
|
||||||
|
origin_input_data_.find(real_tensor) != origin_input_data_.end()) {
|
||||||
auto origin_tensor_data = origin_input_data_.at(real_tensor);
|
auto origin_tensor_data = origin_input_data_.at(real_tensor);
|
||||||
real_tensor->FreeData();
|
real_tensor->FreeData();
|
||||||
MS_ASSERT(origin_tensor_data->data_ != nullptr);
|
MS_ASSERT(origin_tensor_data->data_ != nullptr);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
decoder_step_201217_modified.pb 5
|
|
@ -214,6 +214,21 @@ function Run_Converter() {
|
||||||
fi
|
fi
|
||||||
done < ${models_tflite_fp16_config}
|
done < ${models_tflite_fp16_config}
|
||||||
|
|
||||||
|
while read line; do
|
||||||
|
fp16_line_info=${line}
|
||||||
|
if [[ $fp16_line_info == \#* ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
model_name=`echo ${fp16_line_info}|awk -F ' ' '{print $1}'`
|
||||||
|
echo 'cp '${ms_models_path}'/'${model_name}'.ms' ${ms_models_path}'/'${model_name}'.fp16.ms'
|
||||||
|
cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
converter_result='converter fp16 '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file}
|
||||||
|
else
|
||||||
|
converter_result='converter fp16 '${model_name}' failed';echo ${converter_result} >> ${run_converter_result_file};return 1
|
||||||
|
fi
|
||||||
|
done < ${models_tf_fp16_config}
|
||||||
|
|
||||||
# Convert tflite weightquant models:
|
# Convert tflite weightquant models:
|
||||||
while read line; do
|
while read line; do
|
||||||
weight_quant_line_info=${line}
|
weight_quant_line_info=${line}
|
||||||
|
@ -1832,6 +1847,34 @@ function Run_arm64_fp16() {
|
||||||
run_result='arm64_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
run_result='arm64_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||||
fi
|
fi
|
||||||
done < ${models_multiple_inputs_fp16_config}
|
done < ${models_multiple_inputs_fp16_config}
|
||||||
|
|
||||||
|
# Run tf fp16 models
|
||||||
|
while read line; do
|
||||||
|
model_name_and_input_num=${line%;*}
|
||||||
|
length=${#model_name_and_input_num}
|
||||||
|
input_shapes=${line:length+1}
|
||||||
|
tf_line_info=${model_name_and_input_num}
|
||||||
|
if [[ $model_name == \#* ]]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
model_name=`echo ${tf_line_info}|awk -F ' ' '{print $1}'`
|
||||||
|
input_num=`echo ${tf_line_info}|awk -F ' ' '{print $2}'`
|
||||||
|
input_files=''
|
||||||
|
for i in $(seq 1 $input_num)
|
||||||
|
do
|
||||||
|
input_files=$input_files'/data/local/tmp/input_output/input/'$model_name'.ms_'$i'.bin,'
|
||||||
|
done
|
||||||
|
echo ${model_name} >> "${run_arm64_fp16_log_file}"
|
||||||
|
echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt
|
||||||
|
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --inputShapes='${input_shapes}' --modelFile='${model_name}'.ms --inDataFile='${input_files}' --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true' >> "${run_arm64_fp16_log_file}"
|
||||||
|
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --inputShapes='${input_shapes}' --modelFile='${model_name}'.ms --inDataFile='${input_files}' --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --enableFp16=true' >> adb_run_cmd.txt
|
||||||
|
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_fp16_log_file}"
|
||||||
|
if [ $? = 0 ]; then
|
||||||
|
run_result='arm64_fp16: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||||
|
else
|
||||||
|
run_result='arm64_fp16: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
|
||||||
|
fi
|
||||||
|
done < ${models_tf_fp16_config}
|
||||||
}
|
}
|
||||||
# Run on gpu platform:
|
# Run on gpu platform:
|
||||||
function Run_gpu() {
|
function Run_gpu() {
|
||||||
|
@ -2069,6 +2112,7 @@ models_npu_config=${basepath}/models_npu.cfg
|
||||||
models_compatibility_config=${basepath}/models_compatibility.cfg
|
models_compatibility_config=${basepath}/models_compatibility.cfg
|
||||||
models_with_multiple_inputs_config=${basepath}/models_with_multiple_inputs.cfg
|
models_with_multiple_inputs_config=${basepath}/models_with_multiple_inputs.cfg
|
||||||
models_for_process_only_config=${basepath}/models_for_process_only.cfg
|
models_for_process_only_config=${basepath}/models_for_process_only.cfg
|
||||||
|
models_tf_fp16_config=${basepath}/models_tf_fp16.cfg
|
||||||
|
|
||||||
ms_models_path=${basepath}/ms_models
|
ms_models_path=${basepath}/ms_models
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue