support partial fp16

This commit is contained in:
mengyuanli 2021-07-08 11:10:37 +08:00
parent e637285abb
commit aac9c77228
6 changed files with 146 additions and 360 deletions

View File

@ -381,8 +381,7 @@ int LiteOpActor::SetInputData() {
if (src_tensor->data_type() != dst_tensor->data_type()) {
CastInputData(dst_tensor, src_tensor);
} else if (src_tensor->allocator() == nullptr && !(src_tensor->IsConst()) && !(src_tensor->IsGraphInput()) &&
src_tensor->own_data()) {
} else if (src_tensor->allocator() == nullptr && !(src_tensor->IsConst()) && !(src_tensor->IsGraphInput())) {
// delegate graph kernel output tensor
CopyInputData(dst_tensor, src_tensor);
} else {

View File

@ -137,13 +137,22 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
}
}
FindAllInoutKernels(*dst_kernels);
auto src_kernel = *dst_kernels;
dst_kernels->clear();
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
return ret;
if (IsControlFlowParttern(*dst_kernels)) {
ret = ConstructControlFlowMainGraph(dst_kernels);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructControlFlowMainGraph failed.";
return ret;
}
} else {
auto src_kernel = *dst_kernels;
dst_kernels->clear();
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
return ret;
}
}
ret = InitKernels(*dst_kernels);
@ -832,47 +841,94 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
if (!IsPartialNode(primitive)) {
return nullptr;
}
auto sub_graph_index = GetPartialGraphIndex(src_node->primitive_);
std::vector<kernel::LiteKernel *> sub_kernels;
std::vector<lite::Tensor *> in_tensors;
std::vector<lite::Tensor *> out_tensors;
auto ret = ScheduleSubGraphToKernels(sub_graph_index, &sub_kernels, &in_tensors, &out_tensors, kNumberTypeFloat32);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
return nullptr;
}
FindAllInoutKernels(sub_kernels);
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(sub_kernels.front());
auto subgraph = CreateSubGraphKernel(sub_kernels, &in_tensors, &out_tensors, cur_sub_graph_type);
subgraph->set_name("subgraph_" + src_node->name_);
return subgraph;
auto subgraph_index = GetPartialGraphIndex(src_node->primitive_);
auto subgraph_kernel = SchedulePartialToSubGraphKernel(subgraph_index);
subgraph_kernel->set_name("subgraph_" + std::to_string(subgraph_index));
return subgraph_kernel;
}
std::vector<kernel::LiteKernel *> Scheduler::ScheduleSubGraphToSubGraphKernels(const int &subgraph_index) {
int Scheduler::SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type) {
if (!context_->IsCpuFloat16Enabled()) {
*prefer_data_type = kNumberTypeFloat32;
return RET_OK;
}
auto subgraph = src_model_->sub_graphs_.at(subgraph_index);
for (auto node_index : subgraph->node_indices_) {
auto node = src_model_->all_nodes_[node_index];
MS_ASSERT(node != nullptr);
MS_ASSERT(!node->output_indices_.empty());
OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)];
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Can not find OpParameter!type: " << PrimitiveTypeName(GetPrimitiveType(node->primitive_));
return RET_ERROR;
}
kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16,
static_cast<schema::PrimitiveType>(op_parameter->type_)};
if (!KernelRegistry::GetInstance()->SupportKernel(desc)) {
*prefer_data_type = kNumberTypeFloat32;
return RET_OK;
}
std::vector<Tensor *> inputs;
std::vector<Tensor *> outputs;
FindNodeInoutTensors(*node, &inputs, &outputs);
TypeId data_type =
(node->quant_type_ == schema::QuantType_QUANT_WEIGHT) ? kNumberTypeFloat32 : GetFirstFp32Fp16OrInt8Type(inputs);
if (data_type != kNumberTypeFloat32 || data_type != kNumberTypeFloat16) {
*prefer_data_type = kNumberTypeFloat32;
return RET_OK;
}
}
*prefer_data_type = kNumberTypeFloat16;
return RET_OK;
}
std::vector<kernel::LiteKernel *> Scheduler::ScheduleMainSubGraphToKernels() {
std::vector<kernel::LiteKernel *> kernels;
std::vector<lite::Tensor *> in_tensors;
std::vector<lite::Tensor *> out_tensors;
auto ret = ScheduleSubGraphToKernels(subgraph_index, &kernels, &in_tensors, &out_tensors);
auto ret = ScheduleSubGraphToKernels(kMainSubGraphIndex, &kernels, &in_tensors, &out_tensors);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule subgraph failed, index: " << kMainSubGraphIndex;
return {};
}
return kernels;
}
kernel::LiteKernel *Scheduler::SchedulePartialToSubGraphKernel(const int &subgraph_index) {
TypeId prefer_data_type = kTypeUnknown;
if (SubGraphPreferDataType(subgraph_index, &prefer_data_type) != RET_OK) {
MS_LOG(ERROR) << "SubGraphPreferDataType failed, subgraph index: " << subgraph_index;
return nullptr;
}
std::vector<kernel::LiteKernel *> kernels;
std::vector<lite::Tensor *> in_tensors;
std::vector<lite::Tensor *> out_tensors;
auto ret = ScheduleSubGraphToKernels(subgraph_index, &kernels, &in_tensors, &out_tensors, prefer_data_type);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Schedule subgraph failed, index: " << subgraph_index;
return {};
}
if (subgraph_index != kMainSubGraphIndex) {
FindAllInoutKernels(kernels);
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(kernels.front());
MS_LOG(INFO) << "cur_sub_graph_type: " << cur_sub_graph_type;
auto subgraph_kernel = CreateSubGraphKernel(kernels, &in_tensors, &out_tensors, cur_sub_graph_type);
if (subgraph_kernel == nullptr) {
MS_LOG(ERROR) << "CreateSubGraphKernel failed, cur_sub_graph_type: " << cur_sub_graph_type;
return {};
}
subgraph_index_subgraph_kernel_map_[subgraph_index] = subgraph_kernel;
kernels = {subgraph_kernel};
FindAllInoutKernels(kernels);
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(kernels.front());
MS_LOG(INFO) << "cur_sub_graph_type: " << cur_sub_graph_type;
auto subgraph_kernel = CreateSubGraphKernel(kernels, &in_tensors, &out_tensors, cur_sub_graph_type);
if (subgraph_kernel == nullptr) {
MS_LOG(ERROR) << "CreateSubGraphKernel failed, cur_sub_graph_type: " << cur_sub_graph_type;
return nullptr;
}
return kernels;
return subgraph_kernel;
}
std::vector<kernel::LiteKernel *> Scheduler::ScheduleSubGraphToSubGraphKernels(const int &subgraph_index) {
if (subgraph_index == kMainSubGraphIndex) {
return ScheduleMainSubGraphToKernels();
}
auto subgraph_kernel = SchedulePartialToSubGraphKernel(subgraph_index);
subgraph_kernel->set_name("subgraph_" + std::to_string(subgraph_index));
subgraph_index_subgraph_kernel_map_[subgraph_index] = subgraph_kernel;
return {subgraph_kernel};
}
kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src_node, TypeId prefer_data_type) {
@ -1064,13 +1120,8 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
if (std::find(head_kernels.begin(), head_kernels.end(), kernel) != head_kernels.end()) {
return false;
}
// when merge is removed, this if is removed automatically
if (kernel->type() == schema::PrimitiveType_Merge) {
return MergeOpIsReady(kernel, (*is_kernel_finish));
} else {
return std::all_of(kernel_inputs.begin(), kernel_inputs.end(),
[&](kernel::LiteKernel *kernel) { return (*is_kernel_finish)[kernel]; });
}
return std::all_of(kernel_inputs.begin(), kernel_inputs.end(),
[&](kernel::LiteKernel *kernel) { return (*is_kernel_finish)[kernel]; });
});
if (head_kernel_iter == src_kernel.end()) {
break;
@ -1118,32 +1169,6 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
return RET_OK;
}
bool Scheduler::MergeOpIsReady(const kernel::LiteKernel *kernel,
std::map<const kernel::LiteKernel *, bool> is_kernel_finish) {
MS_ASSERT(kernel != nullptr);
std::map<const lite::Tensor *, bool> merge_in_tensors_map;
for (auto merge_in_tensor : kernel->in_tensors()) {
merge_in_tensors_map[merge_in_tensor] = false;
if (merge_in_tensor->category() == Tensor::CONST_TENSOR || merge_in_tensor->category() == Tensor::CONST_SCALAR ||
merge_in_tensor->category() == Tensor::GRAPH_INPUT) {
merge_in_tensors_map[merge_in_tensor] = true;
}
for (auto merge_in_kernel : kernel->in_kernels()) {
for (auto tensor : merge_in_kernel->out_tensors()) {
if (tensor == merge_in_tensor && is_kernel_finish[merge_in_kernel]) {
merge_in_tensors_map[merge_in_tensor] = true;
}
}
}
}
auto kernel_in_tensors_num = kernel->in_tensors().size();
auto &in_tensors = kernel->in_tensors();
return std::all_of(in_tensors.begin(), in_tensors.begin() + kernel_in_tensors_num / 2,
[&](lite::Tensor *in_tensor) { return merge_in_tensors_map[in_tensor]; }) ||
std::all_of(in_tensors.begin() + kernel_in_tensors_num / 2, in_tensors.end(),
[&](lite::Tensor *in_tensor) { return merge_in_tensors_map[in_tensor]; });
}
kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
const std::vector<lite::Tensor *> *in_tensors,
const std::vector<lite::Tensor *> *out_tensors,
@ -1303,4 +1328,45 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
kernel->FindInoutKernels(kernels);
}
}
kernel::SubGraphType Scheduler::PartialSubGraphType(const std::vector<kernel::LiteKernel *> &kernels) {
if (std::any_of(kernels.begin(), kernels.end(),
[](kernel::LiteKernel *item) { return item->desc().data_type == kNumberTypeFloat16; })) {
return kernel::kCpuFP16SubGraph;
}
return kernel::kCpuFP32SubGraph;
}
bool Scheduler::IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels) {
if (std::any_of(kernels.begin(), kernels.end(), [](kernel::LiteKernel *item) {
if (item->op_parameter()) {
return item->op_parameter()->type_ == schema::PrimitiveType_PartialFusion;
}
return false;
})) {
return true;
}
return false;
}
int Scheduler::ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels) {
auto back_kernels = *kernels;
kernels->clear();
std::vector<kernel::LiteKernel *> main_graph_kernels{};
for (auto &kernel : back_kernels) {
if (kernel->subgraph_type() != kernel::kNotSubGraph) {
kernels->push_back(kernel);
} else {
main_graph_kernels.push_back(kernel);
}
}
auto cur_subgraph_type = PartialSubGraphType(main_graph_kernels);
auto subgraph_kernel = CreateSubGraphKernel(main_graph_kernels, nullptr, nullptr, cur_subgraph_type);
if (subgraph_kernel == nullptr) {
MS_LOG(ERROR) << "create main graph for control flow model failed.";
return RET_ERROR;
}
kernels->insert(kernels->begin(), subgraph_kernel);
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -94,10 +94,14 @@ class Scheduler {
const std::vector<lite::Tensor *> *in_tensors,
const std::vector<lite::Tensor *> *out_tensors,
kernel::SubGraphType type);
bool MergeOpIsReady(const kernel::LiteKernel *kernel, std::map<const kernel::LiteKernel *, bool> is_kernel_finish);
bool KernelFitCurrentSubGraph(const kernel::SubGraphType subgraph_type, const kernel::LiteKernel &kernel);
std::vector<kernel::LiteKernel *> FindAllSubGraphKernels(
std::vector<kernel::LiteKernel *> head_kernels, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
std::vector<kernel::LiteKernel *> ScheduleMainSubGraphToKernels();
kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index);
kernel::SubGraphType PartialSubGraphType(const std::vector<kernel::LiteKernel *> &kernels);
bool IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels);
int ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels);
// other methods
static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors);
@ -109,6 +113,7 @@ class Scheduler {
void SubGraphMarkScheduled(const int &index);
void SetSubgraphForPartialNode();
bool IsControlFlowPattern(const lite::Model::Node &partial_node);
int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type);
protected:
const InnerContext *context_ = nullptr;

View File

@ -239,228 +239,4 @@ int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &aft
}
return RET_OK;
}
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
void CpuFp16SubGraph::FreeOriginInputData() {
for (auto &iter : this->origin_input_data_) {
auto *data_store = iter.second;
if (data_store == nullptr) {
continue;
}
// free data in data_store
if (data_store->data_ != nullptr) {
if (data_store->allocator_ == nullptr) {
free(data_store->data_);
} else {
data_store->allocator_->Free(data_store->data_);
}
}
// free data_store
if (this->Context()->allocator != nullptr) {
this->Context()->allocator->Free(data_store);
} else {
free(data_store);
}
data_store = nullptr;
}
this->origin_input_data_.clear();
}
int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) {
MS_ASSERT(tensor != nullptr);
auto float32_data = tensor->data_c();
auto own_data = tensor->own_data();
tensor->set_data_type(TypeId::kNumberTypeFloat16);
if (float32_data == nullptr) {
// the input data may be nullptr of merge.
MS_LOG(INFO) << "tensor data is null.";
return lite::RET_OK;
}
tensor->set_data(nullptr);
auto ret = tensor->MallocData();
if (ret != RET_OK) {
MS_LOG(ERROR) << "malloc data failed";
return RET_ERROR;
}
MS_ASSERT(tensor->data_c() != nullptr);
Float32ToFloat16_fp16_handler(float32_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_);
if (tensor->allocator() != nullptr) {
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float32_data));
}
auto *data_store =
DataStore::CreateDataStore(float32_data, own_data, tensor->allocator().get(), this->Context()->allocator.get());
if (data_store == nullptr) {
MS_LOG(ERROR) << "Create DataStore failed";
return RET_ERROR;
}
origin_input_data_[tensor] = data_store;
return RET_OK;
}
int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) {
auto float16_data = tensor->data_c();
if (float16_data == nullptr) {
MS_LOG(ERROR) << "tensor data is null.";
return lite::RET_NULL_PTR;
}
tensor->set_data(nullptr);
tensor->set_data_type(TypeId::kNumberTypeFloat32);
auto ret = tensor->MallocData();
if (ret != RET_OK) {
MS_LOG(ERROR) << "malloc data failed";
if (this->Context() != nullptr && this->Context()->allocator != nullptr) {
this->Context()->allocator->Free(float16_data);
} else {
free(float16_data);
}
return RET_ERROR;
}
MS_ASSERT(tensor->data_c() != nullptr);
Float16ToFloat32_fp16_handler(float16_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_);
if (tensor->allocator() != nullptr) {
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float16_data));
tensor->allocator()->Free(float16_data);
} else {
free(float16_data);
}
return RET_OK;
}
int CpuFp16SubGraph::PreProcess() {
#ifdef ENABLE_FP16
int ret;
for (auto tensor : this->in_tensors()) {
MS_ASSERT(tensor != nullptr);
auto real_tensor = tensor;
if (tensor->root_tensor() != nullptr) {
real_tensor = tensor->root_tensor();
if (tensor->data_type() == kNumberTypeFloat32) {
tensor->set_data_type(kNumberTypeFloat16);
} else if (tensor->data_type() == kObjectTypeTensorType) {
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
}
}
}
if (real_tensor->data_type() == kNumberTypeFloat32) {
ret = Float32TensorToFloat16Tensor(real_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
this->FreeOriginInputData();
return ret;
}
} else if (real_tensor->data_type() == kObjectTypeTensorType) {
auto tensorlist = reinterpret_cast<lite::TensorList *>(real_tensor);
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
for (auto inner_tensor : tensorlist->tensors()) {
ret = Float32TensorToFloat16Tensor(inner_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
this->FreeOriginInputData();
return ret;
}
}
}
}
}
for (auto kernel : this->nodes_) {
for (auto tensor : kernel->out_tensors()) {
if (kernel->type() == schema::PrimitiveType_Cast) {
continue;
}
if (tensor->data_type() == kNumberTypeFloat32) {
tensor->set_data_type(kNumberTypeFloat16);
} else if (tensor->data_type() == kObjectTypeTensorType) {
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
}
}
}
}
return RET_OK;
#else
return RET_OK;
#endif
}
int CpuFp16SubGraph::PostProcess() {
#ifdef ENABLE_FP16
int ret;
for (auto tensor : this->out_tensors()) {
MS_ASSERT(tensor != nullptr);
if (tensor->data_type() == kNumberTypeFloat16) {
ret = Float16TensorToFloat32Tensor(tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Float16TensorToFloat32Tensor failed.";
return ret;
}
} else if (tensor->data_type() == kObjectTypeTensorType) {
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
for (auto inner_tensor : tensorlist->tensors()) {
ret = Float16TensorToFloat32Tensor(inner_tensor);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
return ret;
}
}
}
}
}
int tensor_count = 0;
auto in_tensors = this->in_tensors();
for (size_t i = 0; i < in_tensors.size(); i++) {
auto tensor = in_tensors.at(i);
MS_ASSERT(tensor != nullptr);
auto real_tensor = tensor;
if (tensor->root_tensor() != nullptr) {
real_tensor = tensor->root_tensor();
if (tensor->data_type() == kNumberTypeFloat16) {
tensor->set_data_type(kNumberTypeFloat32);
} else if (tensor->data_type() == kObjectTypeTensorType) {
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
}
}
}
if (real_tensor->data_type() == kNumberTypeFloat16 &&
origin_input_data_.find(real_tensor) != origin_input_data_.end()) {
auto origin_tensor_data = origin_input_data_.at(real_tensor);
real_tensor->FreeData();
MS_ASSERT(origin_tensor_data->data_ != nullptr);
real_tensor->set_data(origin_tensor_data->data_);
real_tensor->set_own_data(origin_tensor_data->own_data_);
real_tensor->set_data_type(kNumberTypeFloat32);
origin_tensor_data->data_ = nullptr;
tensor_count++;
} else if (real_tensor->data_type() == kObjectTypeTensorType) {
auto tensorlist = reinterpret_cast<lite::TensorList *>(real_tensor);
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
for (auto inner_tensor : tensorlist->tensors()) {
MS_ASSERT(inner_tensor != nullptr);
auto origin_tensor_data = origin_input_data_.at(inner_tensor);
inner_tensor->FreeData();
MS_ASSERT(origin_tensor_data->data_ != nullptr);
inner_tensor->set_data(origin_tensor_data->data_);
inner_tensor->set_own_data(origin_tensor_data->own_data_);
inner_tensor->set_data_type(kNumberTypeFloat32);
origin_tensor_data->data_ = nullptr;
tensor_count++;
}
}
}
}
this->FreeOriginInputData();
return RET_OK;
#else
return RET_OK;
#endif
}
#endif
} // namespace mindspore::kernel

View File

@ -169,68 +169,8 @@ class CpuFp16SubGraph : public CpuSubGraph {
}
~CpuFp16SubGraph() override = default;
int Init() override { return CpuSubGraph::Init(); }
int PreProcess();
int Execute() override {
auto ret = PreProcess();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
return ret;
}
ret = CpuSubGraph::Execute();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
return ret;
}
ret = PostProcess();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
return ret;
}
return lite::RET_OK;
}
int Execute(const KernelCallBack &before, const KernelCallBack &after) override {
auto ret = PreProcess();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
return ret;
}
#ifdef Debug
for (const auto *node : nodes_) {
if (node->type() == schema::PrimitiveType_PartialFusion) {
continue;
}
for (const auto *in_tensor : node->in_tensors()) {
if (in_tensor->data_type() == kNumberTypeFloat32) {
MS_LOG(ERROR) << "FP16 kernel can not accept float32 input";
return lite::RET_ERROR;
}
}
}
#endif
ret = CpuSubGraph::Execute(before, after);
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
return ret;
}
ret = PostProcess();
if (lite::RET_OK != ret) {
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
return ret;
}
return lite::RET_OK;
};
int PostProcess();
private:
void FreeOriginInputData();
int Float32TensorToFloat16Tensor(lite::Tensor *tensor);
int Float16TensorToFloat32Tensor(lite::Tensor *tensor);
private:
std::map<lite::Tensor *, DataStore *> origin_input_data_;
bool support_fp16_ = false;
};
#endif

View File

@ -80,8 +80,8 @@ ml_video_edit_oneclick_adaptis.pb;3 6
#encoder_0111.pb;4;1:1,44:1:1
ml_female_model_step6_noiseout.pb;66 2
ml_male_model_step6_noiseout.pb;66 2.5
#ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5 to open
#ml_tts_decoder_control_flow.pb;5 1 need update
ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5
#ml_tts_decoder_control_flow.pb;5 1 to open
#ml_tts_decoder.pb;5 2.5 to open
ml_tts_vocoder.pb;66 53
hiai_transformer_encoder.pb;15 4