diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc index 36474a0218b..2a65a81c458 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/matmul_base_int8.cc @@ -116,7 +116,7 @@ int MatmulBaseInt8CPUKernel::Arm64SdotImpl(int task_id) { int32_t *cur_zp = filter_per_channel_ ? quant_param_->filter_zp_ + cur_stride : quant_param_->filter_zp_; MatmulInt8DpOpt(pack_a_ptr_, batch_b_ptr_ + cur_stride * param_->deep_align_, batch_c_ptr_ + cur_stride, param_->row_, - cur_oc, param_->deep_align_, input_sums_, weight_bias_sums_ + cur_stride, quant_param_->out_act_min_, + cur_oc, param_->deep_align_, input_sums_, batch_sums_ + cur_stride, quant_param_->out_act_min_, quant_param_->out_act_max_, quant_param_->output_.zp_, cur_mul, cur_left, cur_right, param_->col_, filter_per_channel_, cur_zp); @@ -140,7 +140,7 @@ int MatmulBaseInt8CPUKernel::RunImpl(int task_id) { int32_t *cur_zp = filter_per_channel_ ? quant_param_->filter_zp_ + cur_stride : quant_param_->filter_zp_; MatmulInt8Opt(pack_a_ptr_, batch_b_ptr_ + cur_stride * param_->deep_align_, batch_c_ptr_ + cur_stride, param_->row_, - cur_oc, param_->deep_align_, input_sums_, weight_bias_sums_ + cur_stride, quant_param_->out_act_min_, + cur_oc, param_->deep_align_, input_sums_, batch_sums_ + cur_stride, quant_param_->out_act_min_, quant_param_->out_act_max_, quant_param_->output_.zp_, cur_mul, cur_left, cur_right, param_->col_, filter_per_channel_, cur_zp); diff --git a/mindspore/lite/test/config/models_posttraining.cfg b/mindspore/lite/test/config/models_posttraining.cfg index d95eda099ac..98dc7357b24 100644 --- a/mindspore/lite/test/config/models_posttraining.cfg +++ b/mindspore/lite/test/config/models_posttraining.cfg @@ -1,6 +1,6 @@ -ml_face_mnet 105 -ml_face_landmark_2 2 -mobilenet.tflite 0.5 -transformer_20200831_encoder_fp32.tflite;36 82.7 -transformer_20200831_decoder_fp32.tflite;11 18.3 -ml_face_mnet_image 105 +ml_face_mnet 64.6 +ml_face_landmark_2 0.6 +mobilenet.tflite 0.4 +transformer_20200831_encoder_fp32.tflite;36 73.5 +transformer_20200831_decoder_fp32.tflite;11 15.8 +ml_face_mnet_image 54.1 diff --git a/mindspore/lite/test/config/ut_arm64.cfg b/mindspore/lite/test/config/ut_arm64.cfg index e9868a888f5..f14608cb8b1 100644 --- a/mindspore/lite/test/config/ut_arm64.cfg +++ b/mindspore/lite/test/config/ut_arm64.cfg @@ -147,4 +147,4 @@ MindrtRuntimeTest.RuntimeFp16 MixDataTypeTest.mix1 SchedulerTest.TestScheduleInt32OpToFp16Subgraph TestGPURegistryCustomOp.TestGPUCustomAdd - +QuantCastInt8Test.* diff --git a/mindspore/lite/tools/converter/quantizer/data_distribution.cc b/mindspore/lite/tools/converter/quantizer/data_distribution.cc index 01614cbd0f6..81bde577606 100644 --- a/mindspore/lite/tools/converter/quantizer/data_distribution.cc +++ b/mindspore/lite/tools/converter/quantizer/data_distribution.cc @@ -176,14 +176,29 @@ double DataDistribution::CalculateScale(float min_value, float max_value) { min_value = -abs_max; max_value = abs_max; } - this->encode_min_ = min_value; - this->encode_max_ = max_value; - // Optimize Handle 0. + + encode_min_ = min_value; + encode_max_ = max_value; + // Handling 0 + // Inputs are strictly positive, set the real min to 0. e.g. input range = [1.0, 5.0] -> [0.0, 5.0] + if (encode_min_ > 0.0f) { + MS_LOG(DEBUG) << "min " << encode_min_ << " is bigger then 0, set to 0, this may course low precision"; + encode_min_ = 0.0f; + } + // Inputs are strictly negative, set the real max to 0. e.g. input range = [-5.0, -1.0] -> [-5.0, 0.0] + if (encode_max_ < 0.0f) { + MS_LOG(DEBUG) << "real_max " << encode_max_ << " is smaller than 0, set to 0, this may course low precision"; + encode_max_ = 0.0f; + } + // Inputs are both negative and positive, real_min and real_max are slightly shifted to make the floating point zero + // exactly representable. e.g. input range = [-5.1, 5.1] -> [-5.12, 5.08] MS_ASSERT(quant_max_ - quant_min_ > 0); return (encode_max_ - encode_min_) / (quant_max_ - quant_min_); } -double DataDistribution::CalculateKLScale() { return CalculateScale(this->best_T_, this->real_max_); } +double DataDistribution::CalculateKLScale() { + return CalculateScale(-std::abs(this->best_T_), std::abs(this->best_T_)); +} double DataDistribution::GetScale() { switch (this->activation_quant_method_) { diff --git a/mindspore/lite/tools/converter/quantizer/data_distribution.h b/mindspore/lite/tools/converter/quantizer/data_distribution.h index f7b67115eac..0fd685b7b3d 100644 --- a/mindspore/lite/tools/converter/quantizer/data_distribution.h +++ b/mindspore/lite/tools/converter/quantizer/data_distribution.h @@ -36,7 +36,11 @@ class DataDistribution { this->quant_max_ = quant_max; this->quant_min_ = quant_min; std::fill(histogram_.begin(), histogram_.end(), 1.0e-7); - symmetry_ = symmetry; + if (this->activation_quant_method_ == KL) { + symmetry_ = true; + } else { + symmetry_ = symmetry; + } } int RecordMaxMinValueArray(const std::vector &data); diff --git a/mindspore/lite/tools/converter/quantizer/debug_info_manager.cc b/mindspore/lite/tools/converter/quantizer/debug_info_manager.cc index 709919e732c..717118c373c 100644 --- a/mindspore/lite/tools/converter/quantizer/debug_info_manager.cc +++ b/mindspore/lite/tools/converter/quantizer/debug_info_manager.cc @@ -471,6 +471,16 @@ KernelCallBack DebugInfoManager::GetAfterCallBack(const std::mapquant_params().empty()) { + QuantParamExtend quant_param; + quant_param.node_name = call_param.node_name; + quant_param.node_type = call_param.node_type; + quant_param.quant_params = tensor->quant_params(); + quant_param.tensor_name = tensor->tensor_name(); + quant_param.element_num = tensor->ElementsNum(); + quant_param.dims = tensor->shape(); + quant_params_.push_back(quant_param); + } AddOriginInfo(call_param, op_parameters.at(call_param.node_name), false, i, static_cast(tensor)); } diff --git a/mindspore/lite/tools/converter/quantizer/full_quant_quantizer.cc b/mindspore/lite/tools/converter/quantizer/full_quant_quantizer.cc index ed827440032..d8230016434 100644 --- a/mindspore/lite/tools/converter/quantizer/full_quant_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/full_quant_quantizer.cc @@ -495,8 +495,8 @@ int FullQuantQuantizer::UpdateDivergeInterval() { void FullQuantQuantizer::InitCpuConfig() { this->target_data_type_ = kNumberTypeInt8; - activation_symmetry_ = true; - weight_symmetry_ = false; + activation_symmetry_ = false; + weight_symmetry_ = true; } void FullQuantQuantizer::InitQMinMax() { diff --git a/mindspore/lite/tools/converter/quantizer/quant_strategy.cc b/mindspore/lite/tools/converter/quantizer/quant_strategy.cc index 245293d1a7b..c1f3c58cea2 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_strategy.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_strategy.cc @@ -123,9 +123,6 @@ bool QuantStrategy::CanOpFullQuantized(const AnfNodePtr &node) { } bool QuantStrategy::IsSkipOp(const AnfNodePtr &input_node) { - if (skip_node_.find(input_node->fullname_with_scope()) == skip_node_.end()) { - return false; - } - return true; + return !(skip_node_.find(input_node->fullname_with_scope()) == skip_node_.end()); } } // namespace mindspore::lite::quant