support partial fp16
This commit is contained in:
parent
e637285abb
commit
aac9c77228
|
@ -381,8 +381,7 @@ int LiteOpActor::SetInputData() {
|
|||
|
||||
if (src_tensor->data_type() != dst_tensor->data_type()) {
|
||||
CastInputData(dst_tensor, src_tensor);
|
||||
} else if (src_tensor->allocator() == nullptr && !(src_tensor->IsConst()) && !(src_tensor->IsGraphInput()) &&
|
||||
src_tensor->own_data()) {
|
||||
} else if (src_tensor->allocator() == nullptr && !(src_tensor->IsConst()) && !(src_tensor->IsGraphInput())) {
|
||||
// delegate graph kernel output tensor
|
||||
CopyInputData(dst_tensor, src_tensor);
|
||||
} else {
|
||||
|
|
|
@ -137,13 +137,22 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
|||
}
|
||||
}
|
||||
FindAllInoutKernels(*dst_kernels);
|
||||
auto src_kernel = *dst_kernels;
|
||||
dst_kernels->clear();
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
|
||||
ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||
return ret;
|
||||
|
||||
if (IsControlFlowParttern(*dst_kernels)) {
|
||||
ret = ConstructControlFlowMainGraph(dst_kernels);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructControlFlowMainGraph failed.";
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
auto src_kernel = *dst_kernels;
|
||||
dst_kernels->clear();
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
|
||||
ret = ConstructSubGraphs(src_kernel, dst_kernels, &is_kernel_finish);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "ConstructSubGraphs failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = InitKernels(*dst_kernels);
|
||||
|
@ -832,47 +841,94 @@ kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *
|
|||
if (!IsPartialNode(primitive)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto sub_graph_index = GetPartialGraphIndex(src_node->primitive_);
|
||||
std::vector<kernel::LiteKernel *> sub_kernels;
|
||||
std::vector<lite::Tensor *> in_tensors;
|
||||
std::vector<lite::Tensor *> out_tensors;
|
||||
auto ret = ScheduleSubGraphToKernels(sub_graph_index, &sub_kernels, &in_tensors, &out_tensors, kNumberTypeFloat32);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule partial failed, name: " << src_node->name_;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
FindAllInoutKernels(sub_kernels);
|
||||
|
||||
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(sub_kernels.front());
|
||||
auto subgraph = CreateSubGraphKernel(sub_kernels, &in_tensors, &out_tensors, cur_sub_graph_type);
|
||||
subgraph->set_name("subgraph_" + src_node->name_);
|
||||
return subgraph;
|
||||
auto subgraph_index = GetPartialGraphIndex(src_node->primitive_);
|
||||
auto subgraph_kernel = SchedulePartialToSubGraphKernel(subgraph_index);
|
||||
subgraph_kernel->set_name("subgraph_" + std::to_string(subgraph_index));
|
||||
return subgraph_kernel;
|
||||
}
|
||||
|
||||
std::vector<kernel::LiteKernel *> Scheduler::ScheduleSubGraphToSubGraphKernels(const int &subgraph_index) {
|
||||
int Scheduler::SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type) {
|
||||
if (!context_->IsCpuFloat16Enabled()) {
|
||||
*prefer_data_type = kNumberTypeFloat32;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
auto subgraph = src_model_->sub_graphs_.at(subgraph_index);
|
||||
for (auto node_index : subgraph->node_indices_) {
|
||||
auto node = src_model_->all_nodes_[node_index];
|
||||
MS_ASSERT(node != nullptr);
|
||||
MS_ASSERT(!node->output_indices_.empty());
|
||||
OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)];
|
||||
if (op_parameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Can not find OpParameter!type: " << PrimitiveTypeName(GetPrimitiveType(node->primitive_));
|
||||
return RET_ERROR;
|
||||
}
|
||||
kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat16,
|
||||
static_cast<schema::PrimitiveType>(op_parameter->type_)};
|
||||
if (!KernelRegistry::GetInstance()->SupportKernel(desc)) {
|
||||
*prefer_data_type = kNumberTypeFloat32;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::vector<Tensor *> inputs;
|
||||
std::vector<Tensor *> outputs;
|
||||
FindNodeInoutTensors(*node, &inputs, &outputs);
|
||||
TypeId data_type =
|
||||
(node->quant_type_ == schema::QuantType_QUANT_WEIGHT) ? kNumberTypeFloat32 : GetFirstFp32Fp16OrInt8Type(inputs);
|
||||
if (data_type != kNumberTypeFloat32 || data_type != kNumberTypeFloat16) {
|
||||
*prefer_data_type = kNumberTypeFloat32;
|
||||
return RET_OK;
|
||||
}
|
||||
}
|
||||
*prefer_data_type = kNumberTypeFloat16;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
std::vector<kernel::LiteKernel *> Scheduler::ScheduleMainSubGraphToKernels() {
|
||||
std::vector<kernel::LiteKernel *> kernels;
|
||||
std::vector<lite::Tensor *> in_tensors;
|
||||
std::vector<lite::Tensor *> out_tensors;
|
||||
auto ret = ScheduleSubGraphToKernels(subgraph_index, &kernels, &in_tensors, &out_tensors);
|
||||
auto ret = ScheduleSubGraphToKernels(kMainSubGraphIndex, &kernels, &in_tensors, &out_tensors);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule subgraph failed, index: " << kMainSubGraphIndex;
|
||||
return {};
|
||||
}
|
||||
return kernels;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *Scheduler::SchedulePartialToSubGraphKernel(const int &subgraph_index) {
|
||||
TypeId prefer_data_type = kTypeUnknown;
|
||||
if (SubGraphPreferDataType(subgraph_index, &prefer_data_type) != RET_OK) {
|
||||
MS_LOG(ERROR) << "SubGraphPreferDataType failed, subgraph index: " << subgraph_index;
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<kernel::LiteKernel *> kernels;
|
||||
std::vector<lite::Tensor *> in_tensors;
|
||||
std::vector<lite::Tensor *> out_tensors;
|
||||
auto ret = ScheduleSubGraphToKernels(subgraph_index, &kernels, &in_tensors, &out_tensors, prefer_data_type);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Schedule subgraph failed, index: " << subgraph_index;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (subgraph_index != kMainSubGraphIndex) {
|
||||
FindAllInoutKernels(kernels);
|
||||
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(kernels.front());
|
||||
MS_LOG(INFO) << "cur_sub_graph_type: " << cur_sub_graph_type;
|
||||
auto subgraph_kernel = CreateSubGraphKernel(kernels, &in_tensors, &out_tensors, cur_sub_graph_type);
|
||||
if (subgraph_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "CreateSubGraphKernel failed, cur_sub_graph_type: " << cur_sub_graph_type;
|
||||
return {};
|
||||
}
|
||||
subgraph_index_subgraph_kernel_map_[subgraph_index] = subgraph_kernel;
|
||||
kernels = {subgraph_kernel};
|
||||
FindAllInoutKernels(kernels);
|
||||
auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(kernels.front());
|
||||
MS_LOG(INFO) << "cur_sub_graph_type: " << cur_sub_graph_type;
|
||||
auto subgraph_kernel = CreateSubGraphKernel(kernels, &in_tensors, &out_tensors, cur_sub_graph_type);
|
||||
if (subgraph_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "CreateSubGraphKernel failed, cur_sub_graph_type: " << cur_sub_graph_type;
|
||||
return nullptr;
|
||||
}
|
||||
return kernels;
|
||||
return subgraph_kernel;
|
||||
}
|
||||
|
||||
std::vector<kernel::LiteKernel *> Scheduler::ScheduleSubGraphToSubGraphKernels(const int &subgraph_index) {
|
||||
if (subgraph_index == kMainSubGraphIndex) {
|
||||
return ScheduleMainSubGraphToKernels();
|
||||
}
|
||||
auto subgraph_kernel = SchedulePartialToSubGraphKernel(subgraph_index);
|
||||
subgraph_kernel->set_name("subgraph_" + std::to_string(subgraph_index));
|
||||
subgraph_index_subgraph_kernel_map_[subgraph_index] = subgraph_kernel;
|
||||
return {subgraph_kernel};
|
||||
}
|
||||
|
||||
kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src_node, TypeId prefer_data_type) {
|
||||
|
@ -1064,13 +1120,8 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
|
|||
if (std::find(head_kernels.begin(), head_kernels.end(), kernel) != head_kernels.end()) {
|
||||
return false;
|
||||
}
|
||||
// when merge is removed, this if is removed automatically
|
||||
if (kernel->type() == schema::PrimitiveType_Merge) {
|
||||
return MergeOpIsReady(kernel, (*is_kernel_finish));
|
||||
} else {
|
||||
return std::all_of(kernel_inputs.begin(), kernel_inputs.end(),
|
||||
[&](kernel::LiteKernel *kernel) { return (*is_kernel_finish)[kernel]; });
|
||||
}
|
||||
return std::all_of(kernel_inputs.begin(), kernel_inputs.end(),
|
||||
[&](kernel::LiteKernel *kernel) { return (*is_kernel_finish)[kernel]; });
|
||||
});
|
||||
if (head_kernel_iter == src_kernel.end()) {
|
||||
break;
|
||||
|
@ -1118,32 +1169,6 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> src_kernel,
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
bool Scheduler::MergeOpIsReady(const kernel::LiteKernel *kernel,
|
||||
std::map<const kernel::LiteKernel *, bool> is_kernel_finish) {
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
std::map<const lite::Tensor *, bool> merge_in_tensors_map;
|
||||
for (auto merge_in_tensor : kernel->in_tensors()) {
|
||||
merge_in_tensors_map[merge_in_tensor] = false;
|
||||
if (merge_in_tensor->category() == Tensor::CONST_TENSOR || merge_in_tensor->category() == Tensor::CONST_SCALAR ||
|
||||
merge_in_tensor->category() == Tensor::GRAPH_INPUT) {
|
||||
merge_in_tensors_map[merge_in_tensor] = true;
|
||||
}
|
||||
for (auto merge_in_kernel : kernel->in_kernels()) {
|
||||
for (auto tensor : merge_in_kernel->out_tensors()) {
|
||||
if (tensor == merge_in_tensor && is_kernel_finish[merge_in_kernel]) {
|
||||
merge_in_tensors_map[merge_in_tensor] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
auto kernel_in_tensors_num = kernel->in_tensors().size();
|
||||
auto &in_tensors = kernel->in_tensors();
|
||||
return std::all_of(in_tensors.begin(), in_tensors.begin() + kernel_in_tensors_num / 2,
|
||||
[&](lite::Tensor *in_tensor) { return merge_in_tensors_map[in_tensor]; }) ||
|
||||
std::all_of(in_tensors.begin() + kernel_in_tensors_num / 2, in_tensors.end(),
|
||||
[&](lite::Tensor *in_tensor) { return merge_in_tensors_map[in_tensor]; });
|
||||
}
|
||||
|
||||
kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
|
||||
const std::vector<lite::Tensor *> *in_tensors,
|
||||
const std::vector<lite::Tensor *> *out_tensors,
|
||||
|
@ -1303,4 +1328,45 @@ void Scheduler::FindAllInoutKernels(const std::vector<kernel::LiteKernel *> &ker
|
|||
kernel->FindInoutKernels(kernels);
|
||||
}
|
||||
}
|
||||
|
||||
kernel::SubGraphType Scheduler::PartialSubGraphType(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
if (std::any_of(kernels.begin(), kernels.end(),
|
||||
[](kernel::LiteKernel *item) { return item->desc().data_type == kNumberTypeFloat16; })) {
|
||||
return kernel::kCpuFP16SubGraph;
|
||||
}
|
||||
return kernel::kCpuFP32SubGraph;
|
||||
}
|
||||
|
||||
bool Scheduler::IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
if (std::any_of(kernels.begin(), kernels.end(), [](kernel::LiteKernel *item) {
|
||||
if (item->op_parameter()) {
|
||||
return item->op_parameter()->type_ == schema::PrimitiveType_PartialFusion;
|
||||
}
|
||||
return false;
|
||||
})) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int Scheduler::ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels) {
|
||||
auto back_kernels = *kernels;
|
||||
kernels->clear();
|
||||
std::vector<kernel::LiteKernel *> main_graph_kernels{};
|
||||
for (auto &kernel : back_kernels) {
|
||||
if (kernel->subgraph_type() != kernel::kNotSubGraph) {
|
||||
kernels->push_back(kernel);
|
||||
} else {
|
||||
main_graph_kernels.push_back(kernel);
|
||||
}
|
||||
}
|
||||
auto cur_subgraph_type = PartialSubGraphType(main_graph_kernels);
|
||||
auto subgraph_kernel = CreateSubGraphKernel(main_graph_kernels, nullptr, nullptr, cur_subgraph_type);
|
||||
if (subgraph_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "create main graph for control flow model failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
kernels->insert(kernels->begin(), subgraph_kernel);
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -94,10 +94,14 @@ class Scheduler {
|
|||
const std::vector<lite::Tensor *> *in_tensors,
|
||||
const std::vector<lite::Tensor *> *out_tensors,
|
||||
kernel::SubGraphType type);
|
||||
bool MergeOpIsReady(const kernel::LiteKernel *kernel, std::map<const kernel::LiteKernel *, bool> is_kernel_finish);
|
||||
bool KernelFitCurrentSubGraph(const kernel::SubGraphType subgraph_type, const kernel::LiteKernel &kernel);
|
||||
std::vector<kernel::LiteKernel *> FindAllSubGraphKernels(
|
||||
std::vector<kernel::LiteKernel *> head_kernels, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
|
||||
std::vector<kernel::LiteKernel *> ScheduleMainSubGraphToKernels();
|
||||
kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index);
|
||||
kernel::SubGraphType PartialSubGraphType(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
bool IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
int ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels);
|
||||
|
||||
// other methods
|
||||
static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors);
|
||||
|
@ -109,6 +113,7 @@ class Scheduler {
|
|||
void SubGraphMarkScheduled(const int &index);
|
||||
void SetSubgraphForPartialNode();
|
||||
bool IsControlFlowPattern(const lite::Model::Node &partial_node);
|
||||
int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type);
|
||||
|
||||
protected:
|
||||
const InnerContext *context_ = nullptr;
|
||||
|
|
|
@ -239,228 +239,4 @@ int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &aft
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
|
||||
void CpuFp16SubGraph::FreeOriginInputData() {
|
||||
for (auto &iter : this->origin_input_data_) {
|
||||
auto *data_store = iter.second;
|
||||
if (data_store == nullptr) {
|
||||
continue;
|
||||
}
|
||||
// free data in data_store
|
||||
if (data_store->data_ != nullptr) {
|
||||
if (data_store->allocator_ == nullptr) {
|
||||
free(data_store->data_);
|
||||
} else {
|
||||
data_store->allocator_->Free(data_store->data_);
|
||||
}
|
||||
}
|
||||
// free data_store
|
||||
if (this->Context()->allocator != nullptr) {
|
||||
this->Context()->allocator->Free(data_store);
|
||||
} else {
|
||||
free(data_store);
|
||||
}
|
||||
data_store = nullptr;
|
||||
}
|
||||
this->origin_input_data_.clear();
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto float32_data = tensor->data_c();
|
||||
auto own_data = tensor->own_data();
|
||||
tensor->set_data_type(TypeId::kNumberTypeFloat16);
|
||||
if (float32_data == nullptr) {
|
||||
// the input data may be nullptr of merge.
|
||||
MS_LOG(INFO) << "tensor data is null.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
tensor->set_data(nullptr);
|
||||
auto ret = tensor->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "malloc data failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_ASSERT(tensor->data_c() != nullptr);
|
||||
Float32ToFloat16_fp16_handler(float32_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_);
|
||||
if (tensor->allocator() != nullptr) {
|
||||
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float32_data));
|
||||
}
|
||||
auto *data_store =
|
||||
DataStore::CreateDataStore(float32_data, own_data, tensor->allocator().get(), this->Context()->allocator.get());
|
||||
if (data_store == nullptr) {
|
||||
MS_LOG(ERROR) << "Create DataStore failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
origin_input_data_[tensor] = data_store;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) {
|
||||
auto float16_data = tensor->data_c();
|
||||
if (float16_data == nullptr) {
|
||||
MS_LOG(ERROR) << "tensor data is null.";
|
||||
return lite::RET_NULL_PTR;
|
||||
}
|
||||
tensor->set_data(nullptr);
|
||||
tensor->set_data_type(TypeId::kNumberTypeFloat32);
|
||||
auto ret = tensor->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "malloc data failed";
|
||||
if (this->Context() != nullptr && this->Context()->allocator != nullptr) {
|
||||
this->Context()->allocator->Free(float16_data);
|
||||
} else {
|
||||
free(float16_data);
|
||||
}
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_ASSERT(tensor->data_c() != nullptr);
|
||||
Float16ToFloat32_fp16_handler(float16_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_);
|
||||
if (tensor->allocator() != nullptr) {
|
||||
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float16_data));
|
||||
tensor->allocator()->Free(float16_data);
|
||||
} else {
|
||||
free(float16_data);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::PreProcess() {
|
||||
#ifdef ENABLE_FP16
|
||||
int ret;
|
||||
for (auto tensor : this->in_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto real_tensor = tensor;
|
||||
if (tensor->root_tensor() != nullptr) {
|
||||
real_tensor = tensor->root_tensor();
|
||||
if (tensor->data_type() == kNumberTypeFloat32) {
|
||||
tensor->set_data_type(kNumberTypeFloat16);
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (real_tensor->data_type() == kNumberTypeFloat32) {
|
||||
ret = Float32TensorToFloat16Tensor(real_tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
|
||||
this->FreeOriginInputData();
|
||||
return ret;
|
||||
}
|
||||
} else if (real_tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(real_tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
|
||||
for (auto inner_tensor : tensorlist->tensors()) {
|
||||
ret = Float32TensorToFloat16Tensor(inner_tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
|
||||
this->FreeOriginInputData();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto kernel : this->nodes_) {
|
||||
for (auto tensor : kernel->out_tensors()) {
|
||||
if (kernel->type() == schema::PrimitiveType_Cast) {
|
||||
continue;
|
||||
}
|
||||
if (tensor->data_type() == kNumberTypeFloat32) {
|
||||
tensor->set_data_type(kNumberTypeFloat16);
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
#else
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::PostProcess() {
|
||||
#ifdef ENABLE_FP16
|
||||
int ret;
|
||||
for (auto tensor : this->out_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
if (tensor->data_type() == kNumberTypeFloat16) {
|
||||
ret = Float16TensorToFloat32Tensor(tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float16TensorToFloat32Tensor failed.";
|
||||
return ret;
|
||||
}
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
|
||||
for (auto inner_tensor : tensorlist->tensors()) {
|
||||
ret = Float16TensorToFloat32Tensor(inner_tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int tensor_count = 0;
|
||||
auto in_tensors = this->in_tensors();
|
||||
for (size_t i = 0; i < in_tensors.size(); i++) {
|
||||
auto tensor = in_tensors.at(i);
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto real_tensor = tensor;
|
||||
if (tensor->root_tensor() != nullptr) {
|
||||
real_tensor = tensor->root_tensor();
|
||||
if (tensor->data_type() == kNumberTypeFloat16) {
|
||||
tensor->set_data_type(kNumberTypeFloat32);
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (real_tensor->data_type() == kNumberTypeFloat16 &&
|
||||
origin_input_data_.find(real_tensor) != origin_input_data_.end()) {
|
||||
auto origin_tensor_data = origin_input_data_.at(real_tensor);
|
||||
real_tensor->FreeData();
|
||||
MS_ASSERT(origin_tensor_data->data_ != nullptr);
|
||||
real_tensor->set_data(origin_tensor_data->data_);
|
||||
real_tensor->set_own_data(origin_tensor_data->own_data_);
|
||||
real_tensor->set_data_type(kNumberTypeFloat32);
|
||||
origin_tensor_data->data_ = nullptr;
|
||||
tensor_count++;
|
||||
} else if (real_tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(real_tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
|
||||
for (auto inner_tensor : tensorlist->tensors()) {
|
||||
MS_ASSERT(inner_tensor != nullptr);
|
||||
auto origin_tensor_data = origin_input_data_.at(inner_tensor);
|
||||
inner_tensor->FreeData();
|
||||
MS_ASSERT(origin_tensor_data->data_ != nullptr);
|
||||
inner_tensor->set_data(origin_tensor_data->data_);
|
||||
inner_tensor->set_own_data(origin_tensor_data->own_data_);
|
||||
inner_tensor->set_data_type(kNumberTypeFloat32);
|
||||
origin_tensor_data->data_ = nullptr;
|
||||
tensor_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this->FreeOriginInputData();
|
||||
return RET_OK;
|
||||
#else
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -169,68 +169,8 @@ class CpuFp16SubGraph : public CpuSubGraph {
|
|||
}
|
||||
|
||||
~CpuFp16SubGraph() override = default;
|
||||
int Init() override { return CpuSubGraph::Init(); }
|
||||
int PreProcess();
|
||||
int Execute() override {
|
||||
auto ret = PreProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
ret = CpuSubGraph::Execute();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PostProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
int Execute(const KernelCallBack &before, const KernelCallBack &after) override {
|
||||
auto ret = PreProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
#ifdef Debug
|
||||
for (const auto *node : nodes_) {
|
||||
if (node->type() == schema::PrimitiveType_PartialFusion) {
|
||||
continue;
|
||||
}
|
||||
for (const auto *in_tensor : node->in_tensors()) {
|
||||
if (in_tensor->data_type() == kNumberTypeFloat32) {
|
||||
MS_LOG(ERROR) << "FP16 kernel can not accept float32 input";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
ret = CpuSubGraph::Execute(before, after);
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PostProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
};
|
||||
int PostProcess();
|
||||
|
||||
private:
|
||||
void FreeOriginInputData();
|
||||
int Float32TensorToFloat16Tensor(lite::Tensor *tensor);
|
||||
int Float16TensorToFloat32Tensor(lite::Tensor *tensor);
|
||||
|
||||
private:
|
||||
std::map<lite::Tensor *, DataStore *> origin_input_data_;
|
||||
bool support_fp16_ = false;
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -80,8 +80,8 @@ ml_video_edit_oneclick_adaptis.pb;3 6
|
|||
#encoder_0111.pb;4;1:1,44:1:1
|
||||
ml_female_model_step6_noiseout.pb;66 2
|
||||
ml_male_model_step6_noiseout.pb;66 2.5
|
||||
#ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5 to open
|
||||
#ml_tts_decoder_control_flow.pb;5 1 need update
|
||||
ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5
|
||||
#ml_tts_decoder_control_flow.pb;5 1 to open
|
||||
#ml_tts_decoder.pb;5 2.5 to open
|
||||
ml_tts_vocoder.pb;66 53
|
||||
hiai_transformer_encoder.pb;15 4
|
||||
|
|
Loading…
Reference in New Issue