!32624 add zp align

Merge pull request !32624 from yeyunpeng2020/zp_align
This commit is contained in:
i-robot 2022-04-08 08:02:40 +00:00 committed by Gitee
commit 11fee2698d
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
14 changed files with 99 additions and 123 deletions

View File

@ -33,67 +33,64 @@ int CalQuantizationParams(schema::QuantParamT *quant_param, double real_min, dou
narrow_range); narrow_range);
} }
int CalQuantizationParams(schema::QuantParamT *quant_param, double real_min, double real_max, int num_bits, void EncodeMinMax(float min_value, float max_value, int quant_min, int quant_max, bool symmetric, float *encode_min,
int quant_min, int quant_max, bool symmetric, bool narrow_range) { float *encode_max) {
CHECK_NULL_RETURN(quant_param); // handle case where encode_min_ == encode_max_
float epsilon = 1e-10;
if (max_value - min_value < epsilon) {
MS_LOG(INFO) << min_value << " - " << max_value;
}
max_value = std::max(max_value, min_value + epsilon);
if (symmetric) { if (symmetric) {
auto abs_max = std::max(std::abs(real_min), std::abs(real_max)); auto abs_max = std::max(std::fabs(min_value), std::fabs(max_value));
real_min = -abs_max; *encode_min = -abs_max;
real_max = abs_max; *encode_max = abs_max;
narrow_range = true; } else {
*encode_min = min_value;
*encode_max = max_value;
} }
// Handling 0 // Handling 0
// Inputs are strictly positive, set the real min to 0. e.g. input range = [1.0, 5.0] -> [0.0, 5.0] // Inputs are strictly positive, set the real min to 0. e.g. input range = [1.0, 5.0] -> [0.0, 5.0]
if (real_min > 0.0f) { if (*encode_min > 0.0f) {
MS_LOG(DEBUG) << "min " << real_min << " is bigger then 0, set to 0, this may course low precision"; MS_LOG(DEBUG) << "min " << *encode_min << " is bigger then 0, set to 0, this may course low precision";
real_min = 0.0f; *encode_min = 0.0f;
} }
// Inputs are strictly negative, set the real max to 0. e.g. input range = [-5.0, -1.0] -> [-5.0, 0.0] // Inputs are strictly negative, set the real max to 0. e.g. input range = [-5.0, -1.0] -> [-5.0, 0.0]
if (real_max < 0.0f) { if (*encode_max < 0.0f) {
MS_LOG(DEBUG) << "real_max " << real_max << " is smaller than 0, set to 0, this may course low precision"; MS_LOG(DEBUG) << "real_max " << *encode_max << " is smaller than 0, set to 0, this may course low precision";
real_max = 0.0f; *encode_max = 0.0f;
} }
auto q_range = quant_max - quant_min;
MS_ASSERT(quant_max - quant_min > 0);
// Inputs are both negative and positive, real_min and real_max are slightly shifted to make the floating point zero // Inputs are both negative and positive, real_min and real_max are slightly shifted to make the floating point zero
// exactly representable. e.g. input range = [-5.1, 5.1] -> [-5.12, 5.08] // exactly representable. e.g. input range = [-5.1, 5.1] -> [-5.12, 5.08]
double step_size = static_cast<double>(*encode_max - *encode_min) / q_range;
if (real_min > real_max) { auto close_0 = std::round(-(*encode_min) / step_size);
MS_LOG(ERROR) << "cal error while min" << real_min << ">" << real_max; *encode_min = (0 - close_0) * step_size;
return RET_PARAM_INVALID; *encode_max = (q_range - close_0) * step_size;
}
if (real_max - real_min <= 0.0f) {
if (real_min != 0.0f) {
MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
return RET_ERROR;
}
MS_LOG(INFO) << "The maximum and minimum values are equal to 0.";
quant_param->inited = true;
quant_param->min = real_min;
quant_param->max = real_max;
quant_param->scale = 1;
quant_param->zeroPoint = 0;
quant_param->narrowRange = narrow_range;
quant_param->numBits = num_bits;
return RET_OK;
} }
if (quant_max - quant_min == 0) { int CalQuantizationParams(schema::QuantParamT *quant_param, double real_min, double real_max, int num_bits,
MS_LOG(ERROR) << "divisor cannot be 0"; int quant_min, int quant_max, bool symmetric, bool narrow_range) {
return RET_ERROR; CHECK_NULL_RETURN(quant_param);
} float encode_min = real_min;
double scale = (real_max - real_min) / (quant_max - quant_min); float encode_max = real_max;
EncodeMinMax(real_min, real_max, quant_min, quant_max, symmetric, &encode_min, &encode_max);
auto q_range = quant_max - quant_min;
double scale = (encode_max - encode_min) / q_range;
if (fabs(scale) <= 0.0f) { if (fabs(scale) <= 0.0f) {
MS_LOG(ERROR) << "divisor 'scale' cannot be 0"; MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
return RET_ERROR; return RET_ERROR;
} }
int zero_point = static_cast<int32_t>(std::round(quant_min - real_min / scale)); int zero_point = static_cast<int32_t>(std::round(quant_min - encode_min / scale));
// The zero point should always be in the range of quantized value, // The zero point should always be in the range of quantized value,
// [qmin, qmax]. // [qmin, qmax].
MS_ASSERT(zero_point >= quant_min); MS_ASSERT(zero_point >= quant_min);
MS_ASSERT(zero_point <= quant_max); MS_ASSERT(zero_point <= quant_max);
quant_param->inited = true; quant_param->inited = true;
quant_param->min = real_min; quant_param->min = encode_min;
quant_param->max = real_max; quant_param->max = encode_max;
quant_param->scale = scale; quant_param->scale = scale;
quant_param->zeroPoint = zero_point; quant_param->zeroPoint = zero_point;
quant_param->narrowRange = narrow_range; quant_param->narrowRange = narrow_range;

View File

@ -68,6 +68,8 @@ int CalQuantizationParams(schema::QuantParamT *quant_param, double real_min, dou
int CalQuantizationParams(schema::QuantParamT *quant_param, double real_min, double real_max, int num_bits, int CalQuantizationParams(schema::QuantParamT *quant_param, double real_min, double real_max, int num_bits,
bool symmetric, bool narrow_range = false); bool symmetric, bool narrow_range = false);
void EncodeMinMax(float min_value, float max_value, int quant_min, int quant_max, bool symmetric, float *encode_min,
float *encode_max);
template <typename T> template <typename T>
T QuantizeData(float origin_data, const schema::QuantParamT *quant_param, int quant_max, int quant_min) { T QuantizeData(float origin_data, const schema::QuantParamT *quant_param, int quant_max, int quant_min) {
MS_ASSERT(quant_param != nullptr); MS_ASSERT(quant_param != nullptr);
@ -102,7 +104,7 @@ T QuantizeData(const float origin_data, const schema::QuantParamT *quant_param)
template <typename T> template <typename T>
int DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schema::QuantParamT> *quant_params, int DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schema::QuantParamT> *quant_params,
const int &quant_max, const int &quant_min, const size_t &bit_num, std::vector<T> *quant_datas, const int &quant_max, const int &quant_min, const size_t &bit_num, std::vector<T> *quant_datas,
bool symmetry = false, bool narrow_range = false, bool k_means = false) { bool symmetric = false, bool narrow_range = false, bool k_means = false) {
if (k_means) { if (k_means) {
MS_LOG(ERROR) << "Unsupported K-means."; MS_LOG(ERROR) << "Unsupported K-means.";
return RET_ERROR; return RET_ERROR;
@ -115,7 +117,7 @@ int DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schem
} }
schema::QuantParamT quant_param; schema::QuantParamT quant_param;
int status = CalQuantizationParams(&quant_param, min, max, bit_num, quant_min, quant_max, symmetry, narrow_range); int status = CalQuantizationParams(&quant_param, min, max, bit_num, quant_min, quant_max, symmetric, narrow_range);
if (status != RET_OK) { if (status != RET_OK) {
MS_LOG(ERROR) << "CalQuantizationParams failed" << status; MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
return status; return status;
@ -148,7 +150,7 @@ template <typename T>
int DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::QuantType &quant_type, int DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::QuantType &quant_type,
std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
const size_t &bit_num, std::vector<T> *quant_datas, const std::vector<int> &dims, const size_t &bit_num, std::vector<T> *quant_datas, const std::vector<int> &dims,
int preferred_dim, bool symmetry = false, bool narrow_range = false, bool k_means = false) { int preferred_dim, bool symmetric = false, bool narrow_range = false, bool k_means = false) {
if (raw_datas == nullptr || quant_params == nullptr || quant_datas == nullptr) { if (raw_datas == nullptr || quant_params == nullptr || quant_datas == nullptr) {
MS_LOG(ERROR) << "raw_data, quant_params or quant_data is nullptr."; MS_LOG(ERROR) << "raw_data, quant_params or quant_data is nullptr.";
return RET_ERROR; return RET_ERROR;
@ -186,7 +188,7 @@ int DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::Q
float min = min_max_map.second.min; float min = min_max_map.second.min;
float max = min_max_map.second.max; float max = min_max_map.second.max;
schema::QuantParamT quant_param; schema::QuantParamT quant_param;
ret = CalQuantizationParams(&quant_param, min, max, bit_num, quant_min, quant_max, symmetry, narrow_range); ret = CalQuantizationParams(&quant_param, min, max, bit_num, quant_min, quant_max, symmetric, narrow_range);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Cal quantization params failed."; MS_LOG(ERROR) << "Cal quantization params failed.";
return ret; return ret;

View File

@ -1,6 +1,6 @@
ml_asr_decoder_202103.onnx;2:encodermemoryinput:0,decoderinput:0;1,64,512:1,64 1.5 33689560 ml_asr_decoder_202103.onnx;2:encodermemoryinput:0,decoderinput:0;1,64,512:1,64 1.5 33689560
hiai_asr_ctc.pb;2:x,state 3.5 8194952 hiai_asr_ctc.pb;2:x,state 3.5 8194952
hdc_text.mindir;1;1,32 53 50877752 hdc_text.mindir;1;1,32 53 50877752
model_split_decoder_dynamic.onnx;6;1,2,312:1,55,312:1,55,312:1,55,312:1,55,312:1,2,57 35 12074896 model_split_decoder_dynamic.onnx;6;1,2,312:1,55,312:1,55,312:1,55,312:1,55,312:1,2,57 36 12074896
model_split_encoder_dynamic.onnx;2;1,5,312:1,50,2048 51 5514536 model_split_encoder_dynamic.onnx;2;1,5,312:1,50,2048 51 5514536
model_split_embedding_dynamic.onnx;3;1,5:1,5:1,5 15 7189704 model_split_embedding_dynamic.onnx;3;1,5:1,5:1,5 15 7189704

View File

@ -1,10 +1,10 @@
ml_face_mnet 85 832368 ml_face_mnet 85 832368
ml_face_landmark_2 0.8 472136 ml_face_landmark_2 0.8 472112
mobilenet.tflite 0.4 26040 mobilenet.tflite 0.4 26040
transformer_20200831_encoder_fp32.tflite;36 21 54319144 transformer_20200831_encoder_fp32.tflite;36 21 54314232
transformer_20200831_decoder_fp32.tflite;11 17 15425680 transformer_20200831_decoder_fp32.tflite;11 17 15425680
ml_face_mnet_image 61 832360 ml_face_mnet_image 61 832360
resnet.tflite 0.4 69272 resnet.tflite 0.4 69272
0916_ct_ddd_culane_dlav0_withSigmoid_noMerge.onnx 47 22487224 0916_ct_ddd_culane_dlav0_withSigmoid_noMerge.onnx 12 22487224
v3plus512_512_op11.onnx 45 6028728 v3plus512_512_op11.onnx 43 6028728
resnet_image.mindir 7.0 38911216 resnet_image.mindir 7.0 39092152

View File

@ -1,2 +1,2 @@
mobilenetv2.r1.1.mindir 13 3758904 mobilenetv2.r1.1.mindir 12 3758904
ml_segmentation_matting 88 158840 ml_segmentation_matting 88 158840

View File

@ -1,6 +1,6 @@
ssd.r1.1.mindir 1.3 5401040 ssd.r1.1.mindir 1.3 5401000
ml_segmentation_matting 130 160224 ml_segmentation_matting 130 149176
ml_video_edit_enhance.pb 22 546552 ml_video_edit_enhance.pb 22 545552
hiai_ghostnet.tflite 4.7 5745336 hiai_ghostnet.tflite 4.7 5589488
siteAI_digcom_AI_ECN.tflite 22 113152 siteAI_digcom_AI_ECN.tflite 22 113152
hiai_nlu_model.pb;3:input_ids,input_mask,segment_ids;1,16:1,16:1,16 0.5 12971000 hiai_nlu_model.pb;3:input_ids,input_mask,segment_ids;1,16:1,16:1,16 0.5 12965928

View File

@ -123,13 +123,13 @@ int Calibrator::AddQuantizedOp(const CNodePtr &cnode) {
auto make_tuple_size = input_cnode->size() - 1; auto make_tuple_size = input_cnode->size() - 1;
for (size_t j = 0; j < make_tuple_size; j++) { for (size_t j = 0; j < make_tuple_size; j++) {
std::unique_ptr<DataDistribution> input_diverg = std::make_unique<DataDistribution>( std::unique_ptr<DataDistribution> input_diverg = std::make_unique<DataDistribution>(
cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetry_); cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetric_);
MS_CHECK_TRUE_MSG(input_diverg != nullptr, RET_NULL_PTR, "input_diverg is nullptr."); MS_CHECK_TRUE_MSG(input_diverg != nullptr, RET_NULL_PTR, "input_diverg is nullptr.");
inputs_diverg_info_[node_name].insert({index++, std::move(input_diverg)}); inputs_diverg_info_[node_name].insert({index++, std::move(input_diverg)});
} }
} else { } else {
std::unique_ptr<DataDistribution> input_diverg = std::make_unique<DataDistribution>( std::unique_ptr<DataDistribution> input_diverg = std::make_unique<DataDistribution>(
cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetry_); cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetric_);
MS_CHECK_TRUE_MSG(input_diverg != nullptr, RET_NULL_PTR, "input_diverg is nullptr."); MS_CHECK_TRUE_MSG(input_diverg != nullptr, RET_NULL_PTR, "input_diverg is nullptr.");
inputs_diverg_info_[node_name].insert({index++, std::move(input_diverg)}); inputs_diverg_info_[node_name].insert({index++, std::move(input_diverg)});
} }
@ -142,13 +142,13 @@ int Calibrator::AddQuantizedOp(const CNodePtr &cnode) {
MS_ASSERT(elements.size() > 1); MS_ASSERT(elements.size() > 1);
for (size_t i = 0; i < elements.size(); i++) { for (size_t i = 0; i < elements.size(); i++) {
std::unique_ptr<DataDistribution> output_diverg = std::make_unique<DataDistribution>( std::unique_ptr<DataDistribution> output_diverg = std::make_unique<DataDistribution>(
cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetry_); cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetric_);
MS_CHECK_TRUE_MSG(output_diverg != nullptr, RET_NULL_PTR, "output_diverg is nullptr."); MS_CHECK_TRUE_MSG(output_diverg != nullptr, RET_NULL_PTR, "output_diverg is nullptr.");
outputs_diverg_info_[node_name].insert({i, std::move(output_diverg)}); outputs_diverg_info_[node_name].insert({i, std::move(output_diverg)});
} }
} else { } else {
std::unique_ptr<DataDistribution> output_diverg = std::make_unique<DataDistribution>( std::unique_ptr<DataDistribution> output_diverg = std::make_unique<DataDistribution>(
cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetry_); cnode, kDefaultBinNumber, bit_num_, quant_max_, quant_min_, activation_quant_method_, symmetric_);
MS_CHECK_TRUE_MSG(output_diverg != nullptr, RET_NULL_PTR, "output_diverg is nullptr."); MS_CHECK_TRUE_MSG(output_diverg != nullptr, RET_NULL_PTR, "output_diverg is nullptr.");
outputs_diverg_info_[node_name].insert({0, std::move(output_diverg)}); outputs_diverg_info_[node_name].insert({0, std::move(output_diverg)});
} }

View File

@ -33,11 +33,11 @@ enum CollectType {
class Calibrator { class Calibrator {
public: public:
Calibrator(size_t bit_num, int quant_max, int quant_min, ActivationQuantizedMethod activation_quant_method, Calibrator(size_t bit_num, int quant_max, int quant_min, ActivationQuantizedMethod activation_quant_method,
const preprocess::DataPreProcessParam &data_pre_process_param, bool symmetry) const preprocess::DataPreProcessParam &data_pre_process_param, bool symmetric)
: bit_num_(bit_num), : bit_num_(bit_num),
quant_max_(quant_max), quant_max_(quant_max),
quant_min_(quant_min), quant_min_(quant_min),
symmetry_(symmetry), symmetric_(symmetric),
activation_quant_method_(activation_quant_method), activation_quant_method_(activation_quant_method),
data_pre_process_param_(data_pre_process_param) {} data_pre_process_param_(data_pre_process_param) {}
@ -80,7 +80,7 @@ class Calibrator {
size_t bit_num_; size_t bit_num_;
int quant_max_; int quant_max_;
int quant_min_; int quant_min_;
bool symmetry_; bool symmetric_;
ActivationQuantizedMethod activation_quant_method_; ActivationQuantizedMethod activation_quant_method_;
preprocess::DataPreProcessParam data_pre_process_param_; preprocess::DataPreProcessParam data_pre_process_param_;
}; };

View File

@ -179,11 +179,11 @@ int DataDistribution::ComputeThreshold() {
return RET_OK; return RET_OK;
} }
double DataDistribution::CalculateMinMaxScale() { return CalculateScaleAndZp(this->real_min_, this->real_max_); } double DataDistribution::CalculateMinMaxScale() { return CalculateScale(this->real_min_, this->real_max_); }
double DataDistribution::CalculateRemovalOutlierScale() { double DataDistribution::CalculateRemovalOutlierScale() {
this->percent_result_ = CalQuantileMinMax(min_datas_, max_datas_); this->percent_result_ = CalQuantileMinMax(min_datas_, max_datas_);
return CalculateScaleAndZp(percent_result_.first, percent_result_.second); return CalculateScale(percent_result_.first, percent_result_.second);
} }
std::pair<float, float> DataDistribution::CalQuantileMinMax(const std::vector<float> &min_datas, std::pair<float, float> DataDistribution::CalQuantileMinMax(const std::vector<float> &min_datas,
@ -195,40 +195,16 @@ std::pair<float, float> DataDistribution::CalQuantileMinMax(const std::vector<fl
return {avg_min, avg_max}; return {avg_min, avg_max};
} }
double DataDistribution::CalculateScaleAndZp(float min_value, float max_value) { double DataDistribution::CalculateScale(float min_value, float max_value) {
if (symmetry_) { EncodeMinMax(min_value, max_value, quant_min_, quant_max_, symmetric_, &encode_min_, &encode_max_);
auto abs_max = std::max(fabs(min_value), fabs(max_value)); auto q_range = quant_max_ - quant_min_;
encode_min_ = -abs_max;
encode_max_ = abs_max;
} else {
encode_min_ = min_value;
encode_max_ = max_value;
}
// Handling 0
// Inputs are strictly positive, set the real min to 0. e.g. input range = [1.0, 5.0] -> [0.0, 5.0]
if (encode_min_ > 0.0f) {
MS_LOG(DEBUG) << "min " << encode_min_ << " is bigger then 0, set to 0, this may course low precision";
encode_min_ = 0.0f;
}
// Inputs are strictly negative, set the real max to 0. e.g. input range = [-5.0, -1.0] -> [-5.0, 0.0]
if (encode_max_ < 0.0f) {
MS_LOG(DEBUG) << "real_max " << encode_max_ << " is smaller than 0, set to 0, this may course low precision";
encode_max_ = 0.0f;
}
// Inputs are both negative and positive, real_min and real_max are slightly shifted to make the floating point zero
// exactly representable. e.g. input range = [-5.1, 5.1] -> [-5.12, 5.08]
// handle case where encode_min_ == encode_max_
float epsilon = 1e-5;
encode_max_ = std::max(encode_max_, encode_min_ + epsilon);
auto range = encode_max_ - encode_min_;
MS_ASSERT(quant_max_ - quant_min_ > 0); MS_ASSERT(quant_max_ - quant_min_ > 0);
return range / (quant_max_ - quant_min_); auto range = encode_max_ - encode_min_;
return range / q_range;
} }
double DataDistribution::CalculateKLScale() { double DataDistribution::CalculateKLScale() {
return CalculateScaleAndZp(-std::abs(this->best_T_), std::abs(this->best_T_)); return CalculateScale(-std::abs(this->best_T_), std::abs(this->best_T_));
} }
double DataDistribution::GetScale() { double DataDistribution::GetScale() {
@ -250,7 +226,7 @@ double DataDistribution::GetScale() {
} }
int32_t DataDistribution::GetZeroPoint() { int32_t DataDistribution::GetZeroPoint() {
if (symmetry_) { if (symmetric_) {
zero_point_ = 0; zero_point_ = 0;
} else { } else {
MS_ASSERT(scale_ > 0); MS_ASSERT(scale_ > 0);

View File

@ -25,7 +25,7 @@ class DataDistribution {
public: public:
DataDistribution() = default; DataDistribution() = default;
DataDistribution(CNodePtr cnode, int bins, size_t bits, int quant_max, int quant_min, DataDistribution(CNodePtr cnode, int bins, size_t bits, int quant_max, int quant_min,
ActivationQuantizedMethod activation_quant_method, bool symmetry) { ActivationQuantizedMethod activation_quant_method, bool symmetric) {
this->activation_quant_method_ = activation_quant_method; this->activation_quant_method_ = activation_quant_method;
this->cnode_ = std::move(cnode); this->cnode_ = std::move(cnode);
this->bin_num_ = bins; this->bin_num_ = bins;
@ -37,9 +37,9 @@ class DataDistribution {
this->quant_min_ = quant_min; this->quant_min_ = quant_min;
std::fill(histogram_.begin(), histogram_.end(), 1.0e-7); std::fill(histogram_.begin(), histogram_.end(), 1.0e-7);
if (this->activation_quant_method_ == KL) { if (this->activation_quant_method_ == KL) {
symmetry_ = true; symmetric_ = true;
} else { } else {
symmetry_ = symmetry; symmetric_ = symmetric;
} }
} }
@ -74,7 +74,7 @@ class DataDistribution {
double CalculateMinMaxScale(); double CalculateMinMaxScale();
double CalculateRemovalOutlierScale(); double CalculateRemovalOutlierScale();
double CalculateKLScale(); double CalculateKLScale();
double CalculateScaleAndZp(float min_value, float max_value); double CalculateScale(float min_value, float max_value);
std::pair<float, float> CalQuantileMinMax(const std::vector<float> &min_datas, const std::vector<float> &max_datas); std::pair<float, float> CalQuantileMinMax(const std::vector<float> &min_datas, const std::vector<float> &max_datas);
@ -97,7 +97,7 @@ class DataDistribution {
std::pair<float, float> percent_result_{0.0, 0.0}; std::pair<float, float> percent_result_{0.0, 0.0};
double scale_ = 0; double scale_ = 0;
int zero_point_ = 0; int zero_point_ = 0;
bool symmetry_ = true; bool symmetric_ = true;
}; };
} // namespace mindspore::lite::quant } // namespace mindspore::lite::quant
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_DATA_DISTRIBUTION_H #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_DATA_DISTRIBUTION_H

View File

@ -97,7 +97,7 @@ int FullQuantQuantizer::DoParameterWeightQuant(const CNodePtr &cnode, const Para
auto weight_quant_type = per_channel ? WeightQuantType::FIXED_BIT_PER_CHANNEL : WeightQuantType::FIXED_BIT_PER_LAYER; auto weight_quant_type = per_channel ? WeightQuantType::FIXED_BIT_PER_CHANNEL : WeightQuantType::FIXED_BIT_PER_LAYER;
auto status = FixedBitQuantFilter<int8_t>(weight, tensor_info, primitive, QuantType_QUANT_ALL, weight_q_max_, auto status = FixedBitQuantFilter<int8_t>(weight, tensor_info, primitive, QuantType_QUANT_ALL, weight_q_max_,
weight_q_min_, bit_num_, weight_quant_type, kNumberTypeInt8, weight_q_min_, bit_num_, weight_quant_type, kNumberTypeInt8,
input_index - 1, preferred_dim, weight_symmetry_, true); input_index - 1, preferred_dim, weight_symmetric_, true);
if (status != RET_OK) { if (status != RET_OK) {
MS_LOG(ERROR) << "QuantFilter failed: " << status; MS_LOG(ERROR) << "QuantFilter failed: " << status;
return status; return status;
@ -117,7 +117,7 @@ int FullQuantQuantizer::DoValueNodeWeightQuant(const ValueNodePtr &weight, const
auto weight_quant_type = per_channel ? WeightQuantType::FIXED_BIT_PER_CHANNEL : WeightQuantType::FIXED_BIT_PER_LAYER; auto weight_quant_type = per_channel ? WeightQuantType::FIXED_BIT_PER_CHANNEL : WeightQuantType::FIXED_BIT_PER_LAYER;
auto status = auto status =
FixedBitQuantFilter<int8_t>(weight, tensor_info, primitive, QuantType_QUANT_ALL, weight_q_max_, weight_q_min_, FixedBitQuantFilter<int8_t>(weight, tensor_info, primitive, QuantType_QUANT_ALL, weight_q_max_, weight_q_min_,
bit_num_, weight_quant_type, kNumberTypeInt8, input_index - 1, weight_symmetry_, true); bit_num_, weight_quant_type, kNumberTypeInt8, input_index - 1, weight_symmetric_, true);
if (status != RET_OK) { if (status != RET_OK) {
MS_LOG(ERROR) << "QuantFilter failed: " << status; MS_LOG(ERROR) << "QuantFilter failed: " << status;
return status; return status;
@ -384,8 +384,8 @@ void FullQuantQuantizer::InitCpuConfig() {
activation_quant_data_type_ = kNumberTypeInt8; activation_quant_data_type_ = kNumberTypeInt8;
activation_target_data_type_ = kNumberTypeInt8; activation_target_data_type_ = kNumberTypeInt8;
weight_data_type_ = kNumberTypeInt8; weight_data_type_ = kNumberTypeInt8;
activation_symmetry_ = false; activation_symmetric_ = false;
weight_symmetry_ = true; weight_symmetric_ = true;
support_int8_ops_ = { support_int8_ops_ = {
// Compute // Compute
prim::kPrimConv2DFusion, prim::kPrimConv2DFusion,
@ -411,8 +411,8 @@ void FullQuantQuantizer::InitKirinConfig() {
activation_quant_data_type_ = kNumberTypeUInt8; activation_quant_data_type_ = kNumberTypeUInt8;
activation_target_data_type_ = kTypeUnknown; activation_target_data_type_ = kTypeUnknown;
weight_data_type_ = kNumberTypeInt8; weight_data_type_ = kNumberTypeInt8;
activation_symmetry_ = false; activation_symmetric_ = false;
weight_symmetry_ = true; weight_symmetric_ = true;
support_int8_ops_ = {prim::kPrimConv2DFusion, prim::kPrimFullConnection}; support_int8_ops_ = {prim::kPrimConv2DFusion, prim::kPrimFullConnection};
flags_.fullQuantParam.bias_correction = false; flags_.fullQuantParam.bias_correction = false;
per_channel_ops_ = {prim::kPrimConv2DFusion}; per_channel_ops_ = {prim::kPrimConv2DFusion};
@ -421,9 +421,9 @@ void FullQuantQuantizer::InitKirinConfig() {
void FullQuantQuantizer::InitNvGpuConfig() { void FullQuantQuantizer::InitNvGpuConfig() {
// `kTypeUnknown` represents the original data type // `kTypeUnknown` represents the original data type
activation_target_data_type_ = kTypeUnknown; activation_target_data_type_ = kTypeUnknown;
activation_symmetry_ = true; activation_symmetric_ = true;
weight_data_type_ = kTypeUnknown; weight_data_type_ = kTypeUnknown;
weight_symmetry_ = true; weight_symmetric_ = true;
support_int8_ops_ = {prim::kPrimConv2DFusion, prim::kPrimMatMul, prim::kPrimActivation, support_int8_ops_ = {prim::kPrimConv2DFusion, prim::kPrimMatMul, prim::kPrimActivation,
prim::kPrimConv2dTransposeFusion}; prim::kPrimConv2dTransposeFusion};
per_channel_ops_ = {}; per_channel_ops_ = {};
@ -433,7 +433,7 @@ void FullQuantQuantizer::InitNvGpuConfig() {
void FullQuantQuantizer::InitQMinMax() { void FullQuantQuantizer::InitQMinMax() {
MS_ASSERT(activation_quant_data_type_ == kNumberTypeInt8 || activation_quant_data_type_ == kNumberTypeUInt8); MS_ASSERT(activation_quant_data_type_ == kNumberTypeInt8 || activation_quant_data_type_ == kNumberTypeUInt8);
if (activation_quant_data_type_ == kNumberTypeInt8) { if (activation_quant_data_type_ == kNumberTypeInt8) {
activation_q_min_ = QuantMin(this->bit_num_, false, activation_symmetry_); // -128 activation_q_min_ = QuantMin(this->bit_num_, false, activation_symmetric_); // -128
activation_q_max_ = QuantMax(this->bit_num_, false); // 127 activation_q_max_ = QuantMax(this->bit_num_, false); // 127
} else if (activation_quant_data_type_ == kNumberTypeUInt8) { } else if (activation_quant_data_type_ == kNumberTypeUInt8) {
activation_q_min_ = QuantMin(this->bit_num_, true, false); // 0 activation_q_min_ = QuantMin(this->bit_num_, true, false); // 0
@ -441,7 +441,7 @@ void FullQuantQuantizer::InitQMinMax() {
} }
MS_ASSERT(weight_data_type_ == kNumberTypeInt8 || weight_data_type_ == kNumberTypeUInt8); MS_ASSERT(weight_data_type_ == kNumberTypeInt8 || weight_data_type_ == kNumberTypeUInt8);
if (weight_data_type_ == kNumberTypeInt8) { if (weight_data_type_ == kNumberTypeInt8) {
weight_q_min_ = QuantMin(this->bit_num_, false, weight_symmetry_); // -127 weight_q_min_ = QuantMin(this->bit_num_, false, weight_symmetric_); // -127
weight_q_max_ = QuantMax(this->bit_num_, false); // 127 weight_q_max_ = QuantMax(this->bit_num_, false); // 127
} else if (activation_quant_data_type_ == kNumberTypeUInt8) { } else if (activation_quant_data_type_ == kNumberTypeUInt8) {
weight_q_min_ = QuantMin(this->bit_num_, true, false); // 0 weight_q_min_ = QuantMin(this->bit_num_, true, false); // 0
@ -495,7 +495,7 @@ int FullQuantQuantizer::PreProcess(const FuncGraphPtr &func_graph) {
InitQMinMax(); InitQMinMax();
calibrator_ = std::make_shared<Calibrator>(this->bit_num_, activation_q_max_, activation_q_min_, calibrator_ = std::make_shared<Calibrator>(this->bit_num_, activation_q_max_, activation_q_min_,
this->flags_.fullQuantParam.activation_quant_method, this->flags_.fullQuantParam.activation_quant_method,
this->flags_.dataPreProcessParam, activation_symmetry_); this->flags_.dataPreProcessParam, activation_symmetric_);
MSLITE_CHECK_PTR(calibrator_); MSLITE_CHECK_PTR(calibrator_);
quant_strategy_ = std::make_unique<QuantStrategy>(flags_.commonQuantParam.min_quant_weight_size, quant_strategy_ = std::make_unique<QuantStrategy>(flags_.commonQuantParam.min_quant_weight_size,
flags_.commonQuantParam.min_quant_weight_channel, flags_.commonQuantParam.min_quant_weight_channel,
@ -519,6 +519,7 @@ int FullQuantQuantizer::DoInference(CollectType collect_type) {
} }
for (size_t calib_index = 0; calib_index < calibrator_->GetBatchNum(); calib_index++) { for (size_t calib_index = 0; calib_index < calibrator_->GetBatchNum(); calib_index++) {
MS_LOG(INFO) << "Do inference round:" << calib_index;
// set multi-input data // set multi-input data
for (size_t input_index = 0; input_index < inputs.size(); input_index++) { for (size_t input_index = 0; input_index < inputs.size(); input_index++) {
int status = calibrator_->GenerateInputData(inputs[input_index]->tensor_name(), calib_index, inputs[input_index]); int status = calibrator_->GenerateInputData(inputs[input_index]->tensor_name(), calib_index, inputs[input_index]);

View File

@ -81,8 +81,8 @@ class FullQuantQuantizer : public Quantizer {
int activation_q_max_{INT8_MAX}; int activation_q_max_{INT8_MAX};
int weight_q_min_{INT8_MIN}; int weight_q_min_{INT8_MIN};
int weight_q_max_{INT8_MAX}; int weight_q_max_{INT8_MAX};
bool activation_symmetry_{false}; bool activation_symmetric_{false};
bool weight_symmetry_{true}; bool weight_symmetric_{true};
std::set<PrimitivePtr> support_int8_ops_; std::set<PrimitivePtr> support_int8_ops_;
std::set<PrimitivePtr> skip_check_dtype_ops_; std::set<PrimitivePtr> skip_check_dtype_ops_;
std::set<PrimitivePtr> per_channel_ops_; std::set<PrimitivePtr> per_channel_ops_;

View File

@ -524,7 +524,7 @@ void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<in
int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight, int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight,
const PrimitivePtr &primitive, QuantType quant_type, WeightQuantType weight_quant_type, const PrimitivePtr &primitive, QuantType quant_type, WeightQuantType weight_quant_type,
TypeId quant_data_type, double init_scale, int index, int preferred_dim, bool symmetry) { TypeId quant_data_type, double init_scale, int index, int preferred_dim, bool symmetric) {
MS_CHECK_TRUE_RET(primitive != nullptr, RET_NULL_PTR); MS_CHECK_TRUE_RET(primitive != nullptr, RET_NULL_PTR);
MS_CHECK_TRUE_RET(weight != nullptr, RET_NULL_PTR); MS_CHECK_TRUE_RET(weight != nullptr, RET_NULL_PTR);
auto dims = weight->shape(); auto dims = weight->shape();
@ -557,7 +557,7 @@ int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPt
<< parameter_node->fullname_with_scope() << parameter_node->fullname_with_scope()
<< " mixed bit quantization search failed, the current layer rolls back to 8 bit fixed quantization."; << " mixed bit quantization search failed, the current layer rolls back to 8 bit fixed quantization.";
return FixedBitQuantFilter<int8_t>(parameter_node, weight, primitive, QuantType_QUANT_WEIGHT, quant_max, quant_min, return FixedBitQuantFilter<int8_t>(parameter_node, weight, primitive, QuantType_QUANT_WEIGHT, quant_max, quant_min,
k8Bit, FIXED_BIT_PER_CHANNEL, kNumberTypeInt8, index, preferred_dim, symmetry); k8Bit, FIXED_BIT_PER_CHANNEL, kNumberTypeInt8, index, preferred_dim, symmetric);
} }
if (ret != RET_OK) { if (ret != RET_OK) {
return ret; return ret;

View File

@ -88,7 +88,7 @@ bool TensorQuantParamsInited(const schema::TensorT &tensor);
int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight, int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight,
const PrimitivePtr &primitive, QuantType quant_type, WeightQuantType weight_quant_type, const PrimitivePtr &primitive, QuantType quant_type, WeightQuantType weight_quant_type,
TypeId quant_data_type, double init_scale, int index, int preferred_dim, bool symmetry); TypeId quant_data_type, double init_scale, int index, int preferred_dim, bool symmetric);
int CalChannels(const std::vector<int> &dims, int channel_cnt, bool *channel_at_first); int CalChannels(const std::vector<int> &dims, int channel_cnt, bool *channel_at_first);
@ -125,7 +125,7 @@ template <typename T>
int FixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight, int FixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight,
const PrimitivePtr &primitive, QuantType quant_type, int quant_max, int quant_min, const PrimitivePtr &primitive, QuantType quant_type, int quant_max, int quant_min,
size_t bit_num, WeightQuantType weight_quant_type, TypeId quant_data_type, int index, size_t bit_num, WeightQuantType weight_quant_type, TypeId quant_data_type, int index,
int preferred_dim, bool symmetry = false, bool narrow_range = false, bool k_means = false) { int preferred_dim, bool symmetric = false, bool narrow_range = false, bool k_means = false) {
MS_ASSERT(weight != nullptr); MS_ASSERT(weight != nullptr);
MS_ASSERT(primitive != nullptr); MS_ASSERT(primitive != nullptr);
auto dims = weight->shape(); auto dims = weight->shape();
@ -150,7 +150,7 @@ int FixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPt
ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(),
static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max, static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max,
quant_min, bit_num, &quant_data, ConvertShapeVectorToInt32(dims), preferred_dim, quant_min, bit_num, &quant_data, ConvertShapeVectorToInt32(dims), preferred_dim,
symmetry, narrow_range, k_means); symmetric, narrow_range, k_means);
if (ret == RET_NO_CHANGE) { if (ret == RET_NO_CHANGE) {
return ret; return ret;
} else if (ret != RET_OK) { } else if (ret != RET_OK) {
@ -159,7 +159,7 @@ int FixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPt
} }
} else if (weight_quant_type == FIXED_BIT_PER_LAYER) { } else if (weight_quant_type == FIXED_BIT_PER_LAYER) {
ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max, ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max,
quant_min, bit_num, &quant_data, symmetry, narrow_range, k_means); quant_min, bit_num, &quant_data, symmetric, narrow_range, k_means);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Do per layer quant failed."; MS_LOG(ERROR) << "Do per layer quant failed.";
return ret; return ret;