post training delete unused quantization op

This commit is contained in:
yeyunpeng2020 2021-12-16 17:13:45 +08:00 committed by vpoul
parent 44b958c92d
commit 546b448441
2 changed files with 15 additions and 27 deletions

View File

@ -1,10 +1,10 @@
ml_face_mnet 64.6 816040
ml_face_landmark_2 0.6 466408
mobilenet.tflite 0.4 25592
transformer_20200831_encoder_fp32.tflite;36 73.5 54250848
transformer_20200831_decoder_fp32.tflite;11 15.8 12947024
ml_face_mnet_image 54.1 816048
resnet.tflite 0.4 63864
0916_ct_ddd_culane_dlav0_withSigmoid_noMerge.onnx 48 21963056
v3plus512_512_op11.onnx 47 6026648
resnet_image.mindir 9.8 38580896
ml_face_mnet 58.2 827064
ml_face_landmark_2 0.8 472136
mobilenet.tflite 0.4 26040
transformer_20200831_encoder_fp32.tflite;36 13.4 54319144
transformer_20200831_decoder_fp32.tflite;11 10.0 12970744
ml_face_mnet_image 47.9 827072
resnet.tflite 0.4 69272
0916_ct_ddd_culane_dlav0_withSigmoid_noMerge.onnx 46.6 22077872
v3plus512_512_op11.onnx 43.1 6027648
resnet_image.mindir 7.0 38911216

View File

@ -538,10 +538,6 @@ void FullQuantQuantizer::InitCpuConfig() {
prim::kPrimConv2DFusion,
prim::kPrimFullConnection,
prim::kPrimMatMul,
prim::kPrimMaxPoolFusion,
prim::kPrimAvgPoolFusion,
prim::kPrimLayerNormFusion,
// prim::kPrimConv2dTransposeFusion, // Precision needs to be optimized.
// Memory
prim::kPrimReshape,
prim::kPrimTranspose,
@ -554,14 +550,6 @@ void FullQuantQuantizer::InitCpuConfig() {
prim::kPrimGather,
prim::kPrimReduceFusion,
prim::kPrimAffine,
// Arithmetic
prim::kPrimAddFusion,
prim::kPrimActivation,
prim::kPrimMulFusion,
prim::kPrimDivFusion,
prim::kPrimSqrt,
prim::kPrimPowFusion,
prim::kPrimEltwise,
};
skip_check_dtype_ops_ = {prim::kPrimTupleGetItem, prim::kPrimShape};
per_channel_ops_ = {prim::kPrimConv2DFusion, prim::kPrimConv2dTransposeFusion, prim::kPrimMatMul,
@ -587,19 +575,19 @@ void FullQuantQuantizer::InitKirinConfig() {
void FullQuantQuantizer::InitQMinMax() {
MS_ASSERT(activation_quant_data_type_ == kNumberTypeInt8 || activation_quant_data_type_ == kNumberTypeUInt8);
if (activation_quant_data_type_ == kNumberTypeInt8) {
activation_q_min_ = QuantMin(this->bit_num_, false, true); // -127
activation_q_max_ = QuantMax(this->bit_num_, false); // 127
activation_q_min_ = QuantMin(this->bit_num_, false, activation_symmetry_); // -128
activation_q_max_ = QuantMax(this->bit_num_, false); // 127
} else if (activation_quant_data_type_ == kNumberTypeUInt8) {
activation_q_min_ = QuantMin(this->bit_num_, true, false); // 0
activation_q_max_ = QuantMax(this->bit_num_, true); // 255
}
MS_ASSERT(weight_data_type_ == kNumberTypeInt8 || weight_data_type_ == kNumberTypeUInt8);
if (weight_data_type_ == kNumberTypeInt8) {
weight_q_max_ = QuantMax(this->bit_num_, false); // 127
weight_q_min_ = QuantMin(this->bit_num_, false, true); // -127
weight_q_min_ = QuantMin(this->bit_num_, false, weight_symmetry_); // -127
weight_q_max_ = QuantMax(this->bit_num_, false); // 127
} else if (activation_quant_data_type_ == kNumberTypeUInt8) {
weight_q_max_ = QuantMax(this->bit_num_, true); // 255
weight_q_min_ = QuantMin(this->bit_num_, true, false); // 0
weight_q_max_ = QuantMax(this->bit_num_, true); // 255
}
}