forked from mindspore-Ecosystem/mindspore
!20010 [MS][LITE]fix bug of control model
Merge pull request !20010 from mengyuanli/bugfix
This commit is contained in:
commit
85e20508eb
|
@ -83,6 +83,16 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
|
|||
Tensor *old_tensor = kernel_->in_tensors()[i];
|
||||
|
||||
if (OfflineIsolated(kernels, *kernel_, *old_tensor)) {
|
||||
if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) {
|
||||
old_tensor->set_data_type(kernel_->desc().data_type);
|
||||
}
|
||||
if (old_tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto old_tensorlist = reinterpret_cast<TensorList *>(old_tensor);
|
||||
if (old_tensorlist->tensors_data_type() == kNumberTypeFloat16 ||
|
||||
old_tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
old_tensorlist->set_tensors_data_type(kernel_->desc().data_type);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -310,7 +320,31 @@ void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) {
|
|||
}
|
||||
|
||||
int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) {
|
||||
int ret = RET_OK;
|
||||
dst->ResetRefCount();
|
||||
if (src->data_type() != kObjectTypeTensorType) {
|
||||
ret = CastTensorInputData(dst, src);
|
||||
} else {
|
||||
ret = CastTensorListInputData(reinterpret_cast<TensorList *>(dst), reinterpret_cast<TensorList *>(src));
|
||||
}
|
||||
src->DecRefCount();
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) {
|
||||
if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType &&
|
||||
dst_tensor->data_type() != src_tensor->data_type()) {
|
||||
return true;
|
||||
}
|
||||
if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType &&
|
||||
reinterpret_cast<TensorList *>(dst_tensor)->tensors_data_type() !=
|
||||
reinterpret_cast<TensorList *>(src_tensor)->tensors_data_type()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) {
|
||||
dst->MallocData();
|
||||
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
|
||||
if (dst->shape() != src->shape()) {
|
||||
|
@ -332,13 +366,37 @@ int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) {
|
|||
MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type;
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
src->DecRefCount();
|
||||
return RET_OK;
|
||||
#endif
|
||||
src->DecRefCount();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) {
|
||||
MS_ASSERT(src_tensorlist != nullptr);
|
||||
MS_ASSERT(dst_tensorlist != nullptr);
|
||||
dst_tensorlist->set_shape(src_tensorlist->shape());
|
||||
std::vector<std::vector<int>> tensors_shapes{};
|
||||
tensors_shapes.resize(src_tensorlist->tensors().size());
|
||||
for (size_t i = 0; i < tensors_shapes.size(); ++i) {
|
||||
tensors_shapes[i] = src_tensorlist->tensors()[i]->shape();
|
||||
}
|
||||
if (src_tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
dst_tensorlist->MallocTensorListData(kNumberTypeFloat32, tensors_shapes);
|
||||
}
|
||||
if (src_tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
dst_tensorlist->MallocTensorListData(kNumberTypeFloat16, tensors_shapes);
|
||||
}
|
||||
dst_tensorlist->ResetRefCount();
|
||||
dst_tensorlist->set_allocator(src_tensorlist->allocator());
|
||||
|
||||
for (size_t i = 0; i < src_tensorlist->tensors().size(); ++i) {
|
||||
auto &src_tensor = src_tensorlist->tensors()[i];
|
||||
auto &dst_tensor = dst_tensorlist->tensors()[i];
|
||||
CastTensorInputData(dst_tensor, src_tensor);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void LiteOpActor::SetInputShape() {
|
||||
for (size_t i = 0; i < inputs_data_.size(); ++i) {
|
||||
auto &input_tensor = kernel_->in_tensors()[i];
|
||||
|
@ -377,8 +435,7 @@ int LiteOpActor::InitInputData() {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (src_tensor->data_type() != dst_tensor->data_type()) {
|
||||
/* fp16 & fp32 transfer */
|
||||
if (NeedCastData(dst_tensor, src_tensor)) {
|
||||
CastInputData(dst_tensor, src_tensor);
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -99,6 +99,9 @@ class LiteOpActor : public OpActor<lite::Tensor> {
|
|||
void MoveInputData(Tensor *dst_tensor, Tensor *src_tensor);
|
||||
void SetInputData(Tensor *dst_tensor, Tensor *src_tensor);
|
||||
int CastInputData(Tensor *dst_tensor, Tensor *src_tensor);
|
||||
bool NeedCastData(Tensor *dst_tensor, Tensor *src_tensor);
|
||||
int CastTensorInputData(Tensor *dst_tensor, Tensor *src_tensor);
|
||||
int CastTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
|
||||
|
||||
private:
|
||||
kernel::LiteKernel *partial_node_ = nullptr;
|
||||
|
|
|
@ -240,228 +240,4 @@ int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &aft
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
|
||||
void CpuFp16SubGraph::FreeOriginInputData() {
|
||||
for (auto &iter : this->origin_input_data_) {
|
||||
auto *data_store = iter.second;
|
||||
if (data_store == nullptr) {
|
||||
continue;
|
||||
}
|
||||
// free data in data_store
|
||||
if (data_store->data_ != nullptr) {
|
||||
if (data_store->allocator_ == nullptr) {
|
||||
free(data_store->data_);
|
||||
} else {
|
||||
data_store->allocator_->Free(data_store->data_);
|
||||
}
|
||||
}
|
||||
// free data_store
|
||||
if (this->Context()->allocator != nullptr) {
|
||||
this->Context()->allocator->Free(data_store);
|
||||
} else {
|
||||
free(data_store);
|
||||
}
|
||||
data_store = nullptr;
|
||||
}
|
||||
this->origin_input_data_.clear();
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto float32_data = tensor->data_c();
|
||||
auto own_data = tensor->own_data();
|
||||
tensor->set_data_type(TypeId::kNumberTypeFloat16);
|
||||
if (float32_data == nullptr) {
|
||||
// the input data may be nullptr of merge.
|
||||
MS_LOG(INFO) << "tensor data is null.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
tensor->set_data(nullptr);
|
||||
auto ret = tensor->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "malloc data failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_ASSERT(tensor->data_c() != nullptr);
|
||||
Float32ToFloat16_fp16_handler(float32_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_);
|
||||
if (tensor->allocator() != nullptr) {
|
||||
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float32_data));
|
||||
}
|
||||
auto *data_store =
|
||||
DataStore::CreateDataStore(float32_data, own_data, tensor->allocator().get(), this->Context()->allocator.get());
|
||||
if (data_store == nullptr) {
|
||||
MS_LOG(ERROR) << "Create DataStore failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
origin_input_data_[tensor] = data_store;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) {
|
||||
auto float16_data = tensor->data_c();
|
||||
if (float16_data == nullptr) {
|
||||
MS_LOG(ERROR) << "tensor data is null.";
|
||||
return lite::RET_NULL_PTR;
|
||||
}
|
||||
tensor->set_data(nullptr);
|
||||
tensor->set_data_type(TypeId::kNumberTypeFloat32);
|
||||
auto ret = tensor->MallocData();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "malloc data failed";
|
||||
if (this->Context() != nullptr && this->Context()->allocator != nullptr) {
|
||||
this->Context()->allocator->Free(float16_data);
|
||||
} else {
|
||||
free(float16_data);
|
||||
}
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_ASSERT(tensor->data_c() != nullptr);
|
||||
Float16ToFloat32_fp16_handler(float16_data, tensor->data_c(), tensor->ElementsNum(), support_fp16_);
|
||||
if (tensor->allocator() != nullptr) {
|
||||
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float16_data));
|
||||
tensor->allocator()->Free(float16_data);
|
||||
} else {
|
||||
free(float16_data);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::PreProcess() {
|
||||
#ifdef ENABLE_FP16
|
||||
int ret;
|
||||
for (auto tensor : this->in_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto real_tensor = tensor;
|
||||
if (tensor->root_tensor() != nullptr) {
|
||||
real_tensor = tensor->root_tensor();
|
||||
if (tensor->data_type() == kNumberTypeFloat32) {
|
||||
tensor->set_data_type(kNumberTypeFloat16);
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (real_tensor->data_type() == kNumberTypeFloat32) {
|
||||
ret = Float32TensorToFloat16Tensor(real_tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
|
||||
this->FreeOriginInputData();
|
||||
return ret;
|
||||
}
|
||||
} else if (real_tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(real_tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
|
||||
for (auto inner_tensor : tensorlist->tensors()) {
|
||||
ret = Float32TensorToFloat16Tensor(inner_tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
|
||||
this->FreeOriginInputData();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto kernel : this->nodes_) {
|
||||
for (auto tensor : kernel->out_tensors()) {
|
||||
if (kernel->type() == schema::PrimitiveType_Cast) {
|
||||
continue;
|
||||
}
|
||||
if (tensor->data_type() == kNumberTypeFloat32) {
|
||||
tensor->set_data_type(kNumberTypeFloat16);
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat32) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
#else
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
|
||||
int CpuFp16SubGraph::PostProcess() {
|
||||
#ifdef ENABLE_FP16
|
||||
int ret;
|
||||
for (auto tensor : this->out_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
if (tensor->data_type() == kNumberTypeFloat16) {
|
||||
ret = Float16TensorToFloat32Tensor(tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float16TensorToFloat32Tensor failed.";
|
||||
return ret;
|
||||
}
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
|
||||
for (auto inner_tensor : tensorlist->tensors()) {
|
||||
ret = Float16TensorToFloat32Tensor(inner_tensor);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int tensor_count = 0;
|
||||
auto in_tensors = this->in_tensors();
|
||||
for (size_t i = 0; i < in_tensors.size(); i++) {
|
||||
auto tensor = in_tensors.at(i);
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
auto real_tensor = tensor;
|
||||
if (tensor->root_tensor() != nullptr) {
|
||||
real_tensor = tensor->root_tensor();
|
||||
if (tensor->data_type() == kNumberTypeFloat16) {
|
||||
tensor->set_data_type(kNumberTypeFloat32);
|
||||
} else if (tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (real_tensor->data_type() == kNumberTypeFloat16 &&
|
||||
origin_input_data_.find(real_tensor) != origin_input_data_.end()) {
|
||||
auto origin_tensor_data = origin_input_data_.at(real_tensor);
|
||||
real_tensor->FreeData();
|
||||
MS_ASSERT(origin_tensor_data->data_ != nullptr);
|
||||
real_tensor->set_data(origin_tensor_data->data_);
|
||||
real_tensor->set_own_data(origin_tensor_data->own_data_);
|
||||
real_tensor->set_data_type(kNumberTypeFloat32);
|
||||
origin_tensor_data->data_ = nullptr;
|
||||
tensor_count++;
|
||||
} else if (real_tensor->data_type() == kObjectTypeTensorType) {
|
||||
auto tensorlist = reinterpret_cast<lite::TensorList *>(real_tensor);
|
||||
if (tensorlist->tensors_data_type() == kNumberTypeFloat16) {
|
||||
tensorlist->set_tensors_data_type(kNumberTypeFloat32);
|
||||
for (auto inner_tensor : tensorlist->tensors()) {
|
||||
MS_ASSERT(inner_tensor != nullptr);
|
||||
auto origin_tensor_data = origin_input_data_.at(inner_tensor);
|
||||
inner_tensor->FreeData();
|
||||
MS_ASSERT(origin_tensor_data->data_ != nullptr);
|
||||
inner_tensor->set_data(origin_tensor_data->data_);
|
||||
inner_tensor->set_own_data(origin_tensor_data->own_data_);
|
||||
inner_tensor->set_data_type(kNumberTypeFloat32);
|
||||
origin_tensor_data->data_ = nullptr;
|
||||
tensor_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
this->FreeOriginInputData();
|
||||
return RET_OK;
|
||||
#else
|
||||
return RET_OK;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -175,67 +175,7 @@ class CpuFp16SubGraph : public CpuSubGraph {
|
|||
return CpuSubGraph::Init();
|
||||
}
|
||||
|
||||
int PreProcess();
|
||||
int Execute() override {
|
||||
auto ret = PreProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
ret = CpuSubGraph::Execute();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PostProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
int Execute(const KernelCallBack &before, const KernelCallBack &after) override {
|
||||
auto ret = PreProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
#ifdef Debug
|
||||
for (const auto *node : nodes_) {
|
||||
if (node->type() == schema::PrimitiveType_PartialFusion) {
|
||||
continue;
|
||||
}
|
||||
for (const auto *in_tensor : node->in_tensors()) {
|
||||
if (in_tensor->data_type() == kNumberTypeFloat32) {
|
||||
MS_LOG(ERROR) << "FP16 kernel can not accept float32 input";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
ret = CpuSubGraph::Execute(before, after);
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = PostProcess();
|
||||
if (lite::RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
|
||||
return ret;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
};
|
||||
int PostProcess();
|
||||
|
||||
private:
|
||||
void FreeOriginInputData();
|
||||
int Float32TensorToFloat16Tensor(lite::Tensor *tensor);
|
||||
int Float16TensorToFloat32Tensor(lite::Tensor *tensor);
|
||||
|
||||
private:
|
||||
std::map<lite::Tensor *, DataStore *> origin_input_data_;
|
||||
bool support_fp16_ = false;
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -84,7 +84,7 @@ Q_face_recognition.onnx 3.2
|
|||
ml_video_edit_enhance_update_tmp.onnx 0.5
|
||||
Q888_face_recognition.onnx 3.5
|
||||
Q888_iris_detect.onnx 0.5
|
||||
#ssd_mobilenet_v1_10.onnx;1;1,383,640,3 0.5 to open
|
||||
ssd_mobilenet_v1_10.onnx;1;1,383,640,3 0.5
|
||||
# The output from a conv in the later part contains many minus values, the following leakyRelu makes them become very
|
||||
# close to 0 (-e^-4). The fp16 precision lost a lot in this case and it affects the following computation.
|
||||
Harmony_Voiceprint.onnx;1;1,200,40,1 21.5 # small output causes big bias
|
||||
|
|
|
@ -65,7 +65,7 @@ siteAI_trans_nonlinear134g.pb;1;1,137 0.5
|
|||
siteAI_trans_nonlinear134g_nrz.pb;1;1,182 0.6
|
||||
ml_vision_guide_detection2.pb;1;1,320,320,1 1
|
||||
# ml_tts_encoder.pb has a round op, which will cause round-off error when the decimal of input value is near 0.5
|
||||
#ml_tts_encoder.pb;4;1:1,44:1:1 9 to open
|
||||
ml_tts_encoder.pb;4;1:1,44:1:1 9
|
||||
# encoder_0111_control_flow.pb is same as ml_tts_encoder_control_flow.pb
|
||||
#encoder_0111_control_flow.pb;4;1:1,44:1:1 10
|
||||
ml_video_edit_video_segment_gauss_adaptis_part2.pb;2 11
|
||||
|
@ -81,8 +81,8 @@ ml_video_edit_oneclick_adaptis.pb;3 6
|
|||
ml_female_model_step6_noiseout.pb;66 2
|
||||
ml_male_model_step6_noiseout.pb;66 2.5
|
||||
ml_tts_encoder_control_flow.pb;4;1:1,22:1:1 1.5
|
||||
#ml_tts_decoder_control_flow.pb;5 1 to open
|
||||
#ml_tts_decoder.pb;5 2.5 to open
|
||||
ml_tts_decoder_control_flow.pb;5 1
|
||||
ml_tts_decoder.pb;5 2.5
|
||||
ml_tts_vocoder.pb;66 53
|
||||
hiai_transformer_encoder.pb;15 4
|
||||
decoder_step_nocumsum_v5.pb;13;1:1,512:1,1429,2:1,127:1,127:1,127:1,127,320:1,80:1,512:1,512:1,512:1,512:1,512 1.2
|
||||
|
|
Loading…
Reference in New Issue