diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc index 719f7094898..50658b09e48 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc @@ -54,6 +54,21 @@ STATUS DivergInfo::RecordMaxValue(const std::vector &datas) { return RET_OK; } +STATUS DivergInfo::RecordMaxValueArray(const std::vector &datas) { + if (datas.size() == 0) { + return RET_ERROR; + } + float max_num = datas.at(0); + float min_num = datas.at(0); + for (float data : datas) { + max_num = std::max(data, max_num); + min_num = std::min(data, min_num); + } + this->max_datas.emplace_back(max_num); + this->min_datas.emplace_back(min_num); + return RET_OK; +} + void DivergInfo::UpdateInterval() { auto max_value = std::max(fabs(this->max), fabs(this->min)); this->interval = max_value / static_cast(bin_num); @@ -85,6 +100,12 @@ STATUS DivergInfo::ComputeThreshold() { return RET_OK; } + if (method_x == kMethodOutlier) { + this->percent_result = PercentMethod(min_datas, max_datas); + this->best_T = std::max(std::fabs(percent_result.first), std::fabs(percent_result.second)); + return RET_OK; + } + constexpr int quant_bint_nums = 128; int threshold = quant_bint_nums; float min_kl = FLT_MAX; @@ -195,8 +216,14 @@ std::pair DivergInfo::GetScale() { float max_value = this->best_T; float min_value = -max_value; + if (this->method_x == kMethodOutlier) { + min_value = percent_result.first; + max_value = percent_result.second; + } + MS_ASSERT(quant_max - quant_min != 0); float scale = (max_value - min_value) / (quant_max - quant_min); + this->scale_tmp = scale; MS_ASSERT(scale != 0); return std::make_pair(this->cnode, scale); } @@ -210,6 +237,10 @@ std::pair DivergInfo::GetZeropoint() { } else { MS_LOG(WARNING) << "unexpectd quant range, quant_min: " << quant_min << " quant_max: " << quant_max; } + + if (this->method_x == kMethodOutlier) { + zero_point = std::round(quant_max - percent_result.second / scale_tmp); + } return std::make_pair(this->cnode, zero_point); } @@ -267,6 +298,7 @@ STATUS Calibrator::RecordMaxValue(const std::string &op_name, const vectorRecordMaxValue(data); + ((*got).second)->RecordMaxValueArray(data); } return RET_OK; } @@ -445,7 +477,7 @@ STATUS Calibrator::ReadConfig() { } else if (key == "thread_num") { config_param_.thread_num = std::stoul(value); } else if (key == "method_x") { - if (value != kMethodKL && value != kMethodMaxMin) { + if (value != kMethodKL && value != kMethodMaxMin && value != kMethodOutlier) { MS_LOG(WARNING) << "unsupported method_x: " << value << ". Use default value."; } else { config_param_.method_x = value; diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h index ecef07fa285..ec49a4e97b8 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h @@ -42,6 +42,7 @@ struct MaxMin { const char kMethodMaxMin[] = "MAX_MIN"; const char kMethodKL[] = "KL"; +const char kMethodOutlier[] = "RemovalOutlier"; constexpr int kDefaultBinNumber = 2048; struct ConfigParam { @@ -127,6 +128,10 @@ struct DivergInfo { int quant_max = 255; int quant_min = 0; std::string method_x = kMethodKL; + std::vector min_datas; + std::vector max_datas; + std::pair percent_result{0.0, 0.0}; + float scale_tmp = 0; DivergInfo(CNodePtr cnode, int bins, size_t bits, int quant_max, int quant_min, const std::string &method_x) { this->method_x = method_x; @@ -143,6 +148,8 @@ struct DivergInfo { STATUS RecordMaxValue(const std::vector &datas); + STATUS RecordMaxValueArray(const std::vector &datas); + void UpdateInterval(); STATUS UpdateHistogram(const std::vector &data); diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index a79c0452d4a..b54ddf0c684 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -304,6 +304,74 @@ STATUS PostBitPack(float *weight, size_t shapeSize, size_t bitNum) { return RET_OK; } + +bool SearchLowerBound(const std::vector &data, const size_t &index, const float &max_tmp, float *min_tmp, + size_t *min_idx) { + size_t length = data.size(); + if (max_tmp - data.at(index) < delta) { + return false; + } + float range_ratio = (data.at(index) - *min_tmp) / (max_tmp - *min_tmp); + float index_ratio = static_cast(index - *min_idx) / (length - *min_idx); + if (index_ratio > 0 && range_ratio / index_ratio > ratio) { + *min_idx = index; + *min_tmp = data.at(index); + } + return true; +} + +bool SearchUpperBound(const std::vector &data, const size_t &index, float *max_tmp, const float &min_tmp, + size_t *max_idx) { + size_t length = data.size(); + if (data.at(index) - min_tmp < delta) { + return false; + } + float range_ratio = (*max_tmp - data.at(index)) / (*max_tmp - min_tmp); + float index_ratio = static_cast(index - *max_idx) / (length - *max_idx); + if (index_ratio > 0 && range_ratio / index_ratio > ratio) { + *max_idx = index; + *max_tmp = data.at(index); + } + return true; +} + +float CalPercentile(const std::vector &datas, const int &outlier_percent) { + const int size = datas.size(); + float val = outlier_percent / 100.0 * size; + int index = std::ceil(val); + float result = 0.0; + if (index - val > 0) { + result = datas.at(index - 1); + } else { + result = (datas.at(index - 1) + datas.at(index)) / 2; + } + return result; +} + +std::pair PercentMethod(std::vector min_datas, std::vector max_datas) { + std::sort(max_datas.begin(), max_datas.end()); + std::sort(min_datas.begin(), min_datas.end()); + float min_val = CalPercentile(min_datas, percent); + float max_val = CalPercentile(max_datas, 100 - percent); + std::reverse(max_datas.begin(), max_datas.end()); + MS_ASSERT(min_val < max_val); + MS_ASSERT(min_datas.size() == max_datas.size()); + float min_tmp = min_val; + float max_tmp = max_val; + size_t min_idx = 0; + size_t max_idx = 0; + size_t length = min_datas.size(); + for (size_t i = 0; i < length; i++) { + if (!SearchLowerBound(min_datas, i, max_tmp, &min_tmp, &min_idx)) { + break; + } + if (!SearchUpperBound(min_datas, i, &max_tmp, min_tmp, &max_idx)) { + break; + } + } + std::pair result{min_tmp, max_tmp}; + return result; +} } // namespace quant } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.h b/mindspore/lite/tools/converter/quantizer/quantize_util.h index 927789cac93..405316c1f2c 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.h +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "tools/converter/quantizer/quantizer.h" #include "src/ops/primitive_c.h" #include "include/errorcode.h" @@ -61,12 +62,26 @@ class QuantStrategy { static const std::vector mul_types; }; +constexpr float delta = 0.1; +constexpr float ratio = 10.0; +constexpr int percent = 10; + STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, int quant_min, int num_bits); STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, int numBits = UINT8_QUANTIZATION); +bool SearchLowerBound(const std::vector &data, const size_t &index, const float &max_tmp, float *min_tmp, + size_t *min_idx); + +bool SearchUpperBound(const std::vector &data, const size_t &index, float *max_tmp, const float &min_tmp, + size_t *max_idx); + +float CalPercentile(const std::vector &datas, const int &percent); + +std::pair PercentMethod(std::vector min_datas, std::vector max_datas); + template T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { MS_ASSERT(quantParam != nullptr);