diff --git a/mindspore/lite/src/lite_mindrt.cc b/mindspore/lite/src/lite_mindrt.cc index eb03a680c41..c8c3450430c 100644 --- a/mindspore/lite/src/lite_mindrt.cc +++ b/mindspore/lite/src/lite_mindrt.cc @@ -83,6 +83,16 @@ void LiteOpActor::IsolateInputData(std::vector> *ac Tensor *old_tensor = kernel_->in_tensors()[i]; if (OfflineIsolated(kernels, *kernel_, *old_tensor)) { + if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) { + old_tensor->set_data_type(kernel_->desc().data_type); + } + if (old_tensor->data_type() == kObjectTypeTensorType) { + auto old_tensorlist = reinterpret_cast(old_tensor); + if (old_tensorlist->tensors_data_type() == kNumberTypeFloat16 || + old_tensorlist->tensors_data_type() == kNumberTypeFloat32) { + old_tensorlist->set_tensors_data_type(kernel_->desc().data_type); + } + } continue; } @@ -310,7 +320,31 @@ void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) { } int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) { + int ret = RET_OK; dst->ResetRefCount(); + if (src->data_type() != kObjectTypeTensorType) { + ret = CastTensorInputData(dst, src); + } else { + ret = CastTensorListInputData(reinterpret_cast(dst), reinterpret_cast(src)); + } + src->DecRefCount(); + return ret; +} + +bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) { + if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType && + dst_tensor->data_type() != src_tensor->data_type()) { + return true; + } + if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType && + reinterpret_cast(dst_tensor)->tensors_data_type() != + reinterpret_cast(src_tensor)->tensors_data_type()) { + return true; + } + return false; +} + +int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) { dst->MallocData(); #if defined(ENABLE_ARM) && defined(ENABLE_FP16) if (dst->shape() != src->shape()) { @@ -332,13 +366,37 @@ int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) { MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type; return RET_NOT_SUPPORT; } - src->DecRefCount(); return RET_OK; #endif - src->DecRefCount(); return RET_ERROR; } +int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) { + MS_ASSERT(src_tensorlist != nullptr); + MS_ASSERT(dst_tensorlist != nullptr); + dst_tensorlist->set_shape(src_tensorlist->shape()); + std::vector> tensors_shapes{}; + tensors_shapes.resize(src_tensorlist->tensors().size()); + for (size_t i = 0; i < tensors_shapes.size(); ++i) { + tensors_shapes[i] = src_tensorlist->tensors()[i]->shape(); + } + if (src_tensorlist->tensors_data_type() == kNumberTypeFloat16) { + dst_tensorlist->MallocTensorListData(kNumberTypeFloat32, tensors_shapes); + } + if (src_tensorlist->tensors_data_type() == kNumberTypeFloat32) { + dst_tensorlist->MallocTensorListData(kNumberTypeFloat16, tensors_shapes); + } + dst_tensorlist->ResetRefCount(); + dst_tensorlist->set_allocator(src_tensorlist->allocator()); + + for (size_t i = 0; i < src_tensorlist->tensors().size(); ++i) { + auto &src_tensor = src_tensorlist->tensors()[i]; + auto &dst_tensor = dst_tensorlist->tensors()[i]; + CastTensorInputData(dst_tensor, src_tensor); + } + return RET_OK; +} + void LiteOpActor::SetInputShape() { for (size_t i = 0; i < inputs_data_.size(); ++i) { auto &input_tensor = kernel_->in_tensors()[i]; @@ -377,8 +435,7 @@ int LiteOpActor::InitInputData() { continue; } - if (src_tensor->data_type() != dst_tensor->data_type()) { - /* fp16 & fp32 transfer */ + if (NeedCastData(dst_tensor, src_tensor)) { CastInputData(dst_tensor, src_tensor); continue; } diff --git a/mindspore/lite/src/lite_mindrt.h b/mindspore/lite/src/lite_mindrt.h index 5a23febfde6..cb362ab4955 100644 --- a/mindspore/lite/src/lite_mindrt.h +++ b/mindspore/lite/src/lite_mindrt.h @@ -99,6 +99,9 @@ class LiteOpActor : public OpActor { void MoveInputData(Tensor *dst_tensor, Tensor *src_tensor); void SetInputData(Tensor *dst_tensor, Tensor *src_tensor); int CastInputData(Tensor *dst_tensor, Tensor *src_tensor); + bool NeedCastData(Tensor *dst_tensor, Tensor *src_tensor); + int CastTensorInputData(Tensor *dst_tensor, Tensor *src_tensor); + int CastTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor); private: kernel::LiteKernel *partial_node_ = nullptr; diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc index c3465e56035..c75b955fea9 100644 --- a/mindspore/lite/src/sub_graph_kernel.cc +++ b/mindspore/lite/src/sub_graph_kernel.cc @@ -240,228 +240,4 @@ int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &aft } return RET_OK; } -#if defined(ENABLE_ARM) && defined(ENABLE_FP16) -void CpuFp16SubGraph::FreeOriginInputData() { - for (auto &iter : this->origin_input_data_) { - auto *data_store = iter.second; - if (data_store == nullptr) { - continue; - } - // free data in data_store - if (data_store->data_ != nullptr) { - if (data_store->allocator_ == nullptr) { - free(data_store->data_); - } else { - data_store->allocator_->Free(data_store->data_); - } - } - // free data_store - if (this->Context()->allocator != nullptr) { - this->Context()->allocator->Free(data_store); - } else { - free(data_store); - } - data_store = nullptr; - } - this->origin_input_data_.clear(); -} - -int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) { - MS_ASSERT(tensor != nullptr); - auto float32_data = tensor->data_c(); - auto own_data = tensor->own_data(); - tensor->set_data_type(TypeId::kNumberTypeFloat16); - if (float32_data == nullptr) { - // the input data may be nullptr of merge. - MS_LOG(INFO) << "tensor data is null."; - return lite::RET_OK; - } - tensor->set_data(nullptr); - auto ret = tensor->MallocData(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "malloc data failed"; - return RET_ERROR; - } - MS_ASSERT(tensor->data_c() != nullptr); - Float32ToFloat16_fp16_handler(float32_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_); - if (tensor->allocator() != nullptr) { - tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float32_data)); - } - auto *data_store = - DataStore::CreateDataStore(float32_data, own_data, tensor->allocator().get(), this->Context()->allocator.get()); - if (data_store == nullptr) { - MS_LOG(ERROR) << "Create DataStore failed"; - return RET_ERROR; - } - origin_input_data_[tensor] = data_store; - return RET_OK; -} - -int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) { - auto float16_data = tensor->data_c(); - if (float16_data == nullptr) { - MS_LOG(ERROR) << "tensor data is null."; - return lite::RET_NULL_PTR; - } - tensor->set_data(nullptr); - tensor->set_data_type(TypeId::kNumberTypeFloat32); - auto ret = tensor->MallocData(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "malloc data failed"; - if (this->Context() != nullptr && this->Context()->allocator != nullptr) { - this->Context()->allocator->Free(float16_data); - } else { - free(float16_data); - } - return RET_ERROR; - } - MS_ASSERT(tensor->data_c() != nullptr); - Float16ToFloat32_fp16_handler(float16_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_); - if (tensor->allocator() != nullptr) { - tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float16_data)); - tensor->allocator()->Free(float16_data); - } else { - free(float16_data); - } - return RET_OK; -} - -int CpuFp16SubGraph::PreProcess() { -#ifdef ENABLE_FP16 - int ret; - for (auto tensor : this->in_tensors()) { - MS_ASSERT(tensor != nullptr); - auto real_tensor = tensor; - if (tensor->root_tensor() != nullptr) { - real_tensor = tensor->root_tensor(); - if (tensor->data_type() == kNumberTypeFloat32) { - tensor->set_data_type(kNumberTypeFloat16); - } else if (tensor->data_type() == kObjectTypeTensorType) { - auto tensorlist = reinterpret_cast(tensor); - if (tensorlist->tensors_data_type() == kNumberTypeFloat32) { - tensorlist->set_tensors_data_type(kNumberTypeFloat16); - } - } - } - if (real_tensor->data_type() == kNumberTypeFloat32) { - ret = Float32TensorToFloat16Tensor(real_tensor); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; - this->FreeOriginInputData(); - return ret; - } - } else if (real_tensor->data_type() == kObjectTypeTensorType) { - auto tensorlist = reinterpret_cast(real_tensor); - if (tensorlist->tensors_data_type() == kNumberTypeFloat32) { - tensorlist->set_tensors_data_type(kNumberTypeFloat16); - for (auto inner_tensor : tensorlist->tensors()) { - ret = Float32TensorToFloat16Tensor(inner_tensor); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; - this->FreeOriginInputData(); - return ret; - } - } - } - } - } - for (auto kernel : this->nodes_) { - for (auto tensor : kernel->out_tensors()) { - if (kernel->type() == schema::PrimitiveType_Cast) { - continue; - } - if (tensor->data_type() == kNumberTypeFloat32) { - tensor->set_data_type(kNumberTypeFloat16); - } else if (tensor->data_type() == kObjectTypeTensorType) { - auto tensorlist = reinterpret_cast(tensor); - if (tensorlist->tensors_data_type() == kNumberTypeFloat32) { - tensorlist->set_tensors_data_type(kNumberTypeFloat16); - } - } - } - } - return RET_OK; -#else - return RET_OK; -#endif -} - -int CpuFp16SubGraph::PostProcess() { -#ifdef ENABLE_FP16 - int ret; - for (auto tensor : this->out_tensors()) { - MS_ASSERT(tensor != nullptr); - if (tensor->data_type() == kNumberTypeFloat16) { - ret = Float16TensorToFloat32Tensor(tensor); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Float16TensorToFloat32Tensor failed."; - return ret; - } - } else if (tensor->data_type() == kObjectTypeTensorType) { - auto tensorlist = reinterpret_cast(tensor); - if (tensorlist->tensors_data_type() == kNumberTypeFloat16) { - tensorlist->set_tensors_data_type(kNumberTypeFloat32); - for (auto inner_tensor : tensorlist->tensors()) { - ret = Float16TensorToFloat32Tensor(inner_tensor); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; - return ret; - } - } - } - } - } - - int tensor_count = 0; - auto in_tensors = this->in_tensors(); - for (size_t i = 0; i < in_tensors.size(); i++) { - auto tensor = in_tensors.at(i); - MS_ASSERT(tensor != nullptr); - auto real_tensor = tensor; - if (tensor->root_tensor() != nullptr) { - real_tensor = tensor->root_tensor(); - if (tensor->data_type() == kNumberTypeFloat16) { - tensor->set_data_type(kNumberTypeFloat32); - } else if (tensor->data_type() == kObjectTypeTensorType) { - auto tensorlist = reinterpret_cast(tensor); - if (tensorlist->tensors_data_type() == kNumberTypeFloat16) { - tensorlist->set_tensors_data_type(kNumberTypeFloat32); - } - } - } - if (real_tensor->data_type() == kNumberTypeFloat16 && - origin_input_data_.find(real_tensor) != origin_input_data_.end()) { - auto origin_tensor_data = origin_input_data_.at(real_tensor); - real_tensor->FreeData(); - MS_ASSERT(origin_tensor_data->data_ != nullptr); - real_tensor->set_data(origin_tensor_data->data_); - real_tensor->set_own_data(origin_tensor_data->own_data_); - real_tensor->set_data_type(kNumberTypeFloat32); - origin_tensor_data->data_ = nullptr; - tensor_count++; - } else if (real_tensor->data_type() == kObjectTypeTensorType) { - auto tensorlist = reinterpret_cast(real_tensor); - if (tensorlist->tensors_data_type() == kNumberTypeFloat16) { - tensorlist->set_tensors_data_type(kNumberTypeFloat32); - for (auto inner_tensor : tensorlist->tensors()) { - MS_ASSERT(inner_tensor != nullptr); - auto origin_tensor_data = origin_input_data_.at(inner_tensor); - inner_tensor->FreeData(); - MS_ASSERT(origin_tensor_data->data_ != nullptr); - inner_tensor->set_data(origin_tensor_data->data_); - inner_tensor->set_own_data(origin_tensor_data->own_data_); - inner_tensor->set_data_type(kNumberTypeFloat32); - origin_tensor_data->data_ = nullptr; - tensor_count++; - } - } - } - } - this->FreeOriginInputData(); - return RET_OK; -#else - return RET_OK; -#endif -} -#endif } // namespace mindspore::kernel diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index 997c07fd477..22d1e3874ac 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -175,67 +175,7 @@ class CpuFp16SubGraph : public CpuSubGraph { return CpuSubGraph::Init(); } - int PreProcess(); - int Execute() override { - auto ret = PreProcess(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name(); - return ret; - } - ret = CpuSubGraph::Execute(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel failed, name: " << this->name(); - return ret; - } - - ret = PostProcess(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name(); - return ret; - } - return lite::RET_OK; - } - int Execute(const KernelCallBack &before, const KernelCallBack &after) override { - auto ret = PreProcess(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name(); - return ret; - } -#ifdef Debug - for (const auto *node : nodes_) { - if (node->type() == schema::PrimitiveType_PartialFusion) { - continue; - } - for (const auto *in_tensor : node->in_tensors()) { - if (in_tensor->data_type() == kNumberTypeFloat32) { - MS_LOG(ERROR) << "FP16 kernel can not accept float32 input"; - return lite::RET_ERROR; - } - } - } -#endif - ret = CpuSubGraph::Execute(before, after); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel failed, name: " << this->name(); - return ret; - } - - ret = PostProcess(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name(); - return ret; - } - return lite::RET_OK; - }; - int PostProcess(); - private: - void FreeOriginInputData(); - int Float32TensorToFloat16Tensor(lite::Tensor *tensor); - int Float16TensorToFloat32Tensor(lite::Tensor *tensor); - - private: - std::map origin_input_data_; bool support_fp16_ = false; }; #endif diff --git a/mindspore/lite/test/config/models_onnx_fp16.cfg b/mindspore/lite/test/config/models_onnx_fp16.cfg index 8e61999bd95..4c23284359c 100644 --- a/mindspore/lite/test/config/models_onnx_fp16.cfg +++ b/mindspore/lite/test/config/models_onnx_fp16.cfg @@ -84,7 +84,7 @@ Q_face_recognition.onnx 3.2 ml_video_edit_enhance_update_tmp.onnx 0.5 Q888_face_recognition.onnx 3.5 Q888_iris_detect.onnx 0.5 -#ssd_mobilenet_v1_10.onnx;1;1,383,640,3 0.5 to open +ssd_mobilenet_v1_10.onnx;1;1,383,640,3 0.5 # The output from a conv in the later part contains many minus values, the following leakyRelu makes them become very # close to 0 (-e^-4). The fp16 precision lost a lot in this case and it affects the following computation. Harmony_Voiceprint.onnx;1;1,200,40,1 21.5 # small output causes big bias diff --git a/mindspore/lite/test/config/models_tf_fp16.cfg b/mindspore/lite/test/config/models_tf_fp16.cfg index 9ea326c57e7..cc9fd6c97d2 100644 --- a/mindspore/lite/test/config/models_tf_fp16.cfg +++ b/mindspore/lite/test/config/models_tf_fp16.cfg @@ -65,7 +65,7 @@ siteAI_trans_nonlinear134g.pb;1;1,137 0.5 siteAI_trans_nonlinear134g_nrz.pb;1;1,182 0.6 ml_vision_guide_detection2.pb;1;1,320,320,1 1 # ml_tts_encoder.pb has a round op, which will cause round-off error when the decimal of input value is near 0.5 -#ml_tts_encoder.pb;4;1:1,44:1:1 9 to open +ml_tts_encoder.pb;4;1:1,44:1:1 9 # encoder_0111_control_flow.pb is same as ml_tts_encoder_control_flow.pb #encoder_0111_control_flow.pb;4;1:1,44:1:1 10 ml_video_edit_video_segment_gauss_adaptis_part2.pb;2 11 @@ -81,8 +81,8 @@ ml_video_edit_oneclick_adaptis.pb;3 6 ml_female_model_step6_noiseout.pb;66 2 ml_male_model_step6_noiseout.pb;66 2.5 ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5 -#ml_tts_decoder_control_flow.pb;5 1 to open -#ml_tts_decoder.pb;5 2.5 to open +ml_tts_decoder_control_flow.pb;5 1 +ml_tts_decoder.pb;5 2.5 ml_tts_vocoder.pb;66 53 hiai_transformer_encoder.pb;15 4 decoder_step_nocumsum_v5.pb;13;1:1,512:1,1429,2:1,127:1,127:1,127:1,127,320:1,80:1,512:1,512:1,512:1,512:1,512 1.2