optimize mixed bit quantization code

2022-08-08 11:25:14 +08:00 · 2022-08-08 11:25:14 +08:00 · e58ede7d5a
parent a397693ca6
commit e58ede7d5a
6 changed files with 79 additions and 91 deletions
--- a/mindspore/lite/tools/converter/quantizer/fse_encoder.h
+++ b/mindspore/lite/tools/converter/quantizer/fse_encoder.h
@ -21,7 +21,9 @@
 #include "ir/anf.h"
 #include "ir/tensor.h"
 #include "tools/converter/quantizer/fse_bit_stream.h"
-#include "tools/converter/quantizer/mixed_bit_weight_quantizer.h"
+#include "schema/inner/model_generated.h"
+#include "src/common/log_adapter.h"
+#include "src/common/quant_utils.h"

 namespace mindspore::lite::quant {
 constexpr int MAX_SYMS = 65534;
--- a/mindspore/lite/tools/converter/quantizer/mixed_bit_weight_quantization.cc
+++ b/mindspore/lite/tools/converter/quantizer/mixed_bit_weight_quantization.cc
@ -14,17 +14,18 @@
 * limitations under the License.
 */

-#include "tools/converter/quantizer/mixed_bit_weight_quantizer.h"
+#include "tools/converter/quantizer/mixed_bit_weight_quantization.h"
 #include <cmath>
 #include <cfloat>
 #include <map>
 #include "tools/common/statistic_utils.h"
+#include "tools/converter/quantizer/quantize_util.h"

 namespace mindspore::lite::quant {
 constexpr float kTwentyFour = 24.0f;

-void MixedBitWeightQuantizer::GetBiasCorrection(float *weights, int element_num, float scale,
-                                                float *origin_dequant_datas) {
+void MixedBitWeightQuantization::CalculateBiasCorrection(float *weights, int element_num, float scale,
+                                                         float *origin_dequant_datas) {
  MS_ASSERT(weights != nullptr);
  MS_ASSERT(origin_dequant_datas != nullptr);
  MS_ASSERT(element_num > 0);
@ -60,7 +61,7 @@ void MixedBitWeightQuantizer::GetBiasCorrection(float *weights, int element_num,
 }

 // the error is currently measured per channel.
-float MixedBitWeightQuantizer::CalculateMeanError(std::vector<float> norms2, std::vector<float> dnorms2) {
+float MixedBitWeightQuantization::CalculateMeanError(std::vector<float> norms2, std::vector<float> dnorms2) {
  int error_count = 0;
  float mse_error = 1e-10f;
  const float soft = 1e-7f;
@ -77,8 +78,8 @@ float MixedBitWeightQuantizer::CalculateMeanError(std::vector<float> norms2, std
 }

 // the `preferred` dim should point to the output channels dimension.
-float MixedBitWeightQuantizer::MeasureQuantizationError(float *weights, const int *shape, int dims, int preferred_dim,
-                                                        float scale) {
+float MixedBitWeightQuantization::MeasureQuantizationError(float *weights, const int *shape, int dims,
+                                                           int preferred_dim, float scale) {
  MS_ASSERT(weights != nullptr);
  MS_ASSERT(shape != nullptr);
  // Init
@ -109,7 +110,7 @@ float MixedBitWeightQuantizer::MeasureQuantizationError(float *weights, const in
  MS_ASSERT(bucket_volume != 0);
  const float upround_offset = 0.5;
  // Bias Correction
-  GetBiasCorrection(weights, element_num, scale, origin_dequant_datas.data());
+  CalculateBiasCorrection(weights, element_num, scale, origin_dequant_datas.data());
  for (int i = 0; i < element_num; i++) {
    int bucket = (i / bucket_volume) % bucket_count;
    norms2[bucket] += weights[i] * weights[i];
@ -122,7 +123,7 @@ float MixedBitWeightQuantizer::MeasureQuantizationError(float *weights, const in
  return mean_error;
 }

-LayerParam MixedBitWeightQuantizer::CalculateLayerParams(const float *weights, int element_num) {
+LayerParam MixedBitWeightQuantization::CalculateLayerParams(const float *weights, int element_num) {
  MS_ASSERT(weights != nullptr);
  float temp_norm_tot = 0.0;
  for (int i = 0; i < element_num; i++) {
@ -133,7 +134,7 @@ LayerParam MixedBitWeightQuantizer::CalculateLayerParams(const float *weights, i
  return ret;
 }

-MinMax MixedBitWeightQuantizer::GetMinMax(const float *arr, int arrc) {
+MinMax MixedBitWeightQuantization::GetMinMax(const float *arr, int arrc) {
  MS_ASSERT(arr != nullptr);
  MinMax min_max = {INFINITY, -INFINITY};
  for (int i = 0; i < arrc; i++)
@ -144,9 +145,9 @@ MinMax MixedBitWeightQuantizer::GetMinMax(const float *arr, int arrc) {
  return min_max;
 }

-BinarySearchResult MixedBitWeightQuantizer::BinarySearchForQuantizationScale(float *weights, int *shape, int dims,
-                                                                             int preferred_dim, int max_iters,
-                                                                             float target_err, float rel_tol) {
+BinarySearchResult MixedBitWeightQuantization::BinarySearchForQuantizationScale(float *weights, int *shape, int dims,
+                                                                                int preferred_dim, int max_iters,
+                                                                                float target_err, float rel_tol) {
  MS_ASSERT(weights != nullptr);
  MS_ASSERT(shape != nullptr);
  int element_num = 1;
@ -191,8 +192,8 @@ BinarySearchResult MixedBitWeightQuantizer::BinarySearchForQuantizationScale(flo
  }
 }

-float MixedBitWeightQuantizer::GetDx(float *weights, int *shape, int dims, int preferred_dim,
-                                     const std::string &description) {
+float MixedBitWeightQuantization::GetDx(float *weights, int *shape, int dims, int preferred_dim,
+                                        const std::string &description) {
  MS_ASSERT(weights != nullptr);
  MS_ASSERT(shape != nullptr);
  static std::map<std::string, LayerParam> param_map;
@ -213,10 +214,10 @@ float MixedBitWeightQuantizer::GetDx(float *weights, int *shape, int dims, int p
  return (target_relative_err_ + target_search_tolerance_ * std::sqrt(kTwentyFour / element_num)) / params.inv_norm;
 }

-int MixedBitWeightQuantizer::DoQuantization(float *weights, std::vector<int64_t> shape, int preferred_dim,
-                                            std::vector<schema::QuantParamT> *quant_params,
-                                            std::vector<int16_t> *quant_datas, const std::string &description,
-                                            bool use_auto_tune_alg) {
+int MixedBitWeightQuantization::DoQuantization(float *weights, std::vector<int64_t> shape, int preferred_dim,
+                                               std::vector<schema::QuantParamT> *quant_params,
+                                               std::vector<int16_t> *quant_datas, const std::string &description,
+                                               bool use_auto_tune_alg) {
  MS_ASSERT(weights != nullptr);
  MS_ASSERT(quant_params != nullptr);
  MS_ASSERT(quant_datas != nullptr);
@ -252,8 +253,8 @@ int MixedBitWeightQuantizer::DoQuantization(float *weights, std::vector<int64_t>
  return RET_OK;
 }

-int MixedBitWeightQuantizer::QuantizeByScale(const float *weights, int weightsc, float scale,
-                                             schema::QuantParamT *quant_params, std::vector<int16_t> *quant_datas) {
+int MixedBitWeightQuantization::QuantizeByScale(const float *weights, int weightsc, float scale,
+                                                schema::QuantParamT *quant_params, std::vector<int16_t> *quant_datas) {
  MS_ASSERT(weights != nullptr);
  MS_ASSERT(weightsc <= quant_datas->size());
  const float upround_offset = 0.5;
@ -269,4 +270,40 @@ int MixedBitWeightQuantizer::QuantizeByScale(const float *weights, int weightsc,
  quant_params->inited = true;
  return RET_OK;
 }
+
+int MixedBitWeightQuantization::QuantFilter(const PrimitivePtr &primitive, const AnfNodePtr &parameter_node,
+                                            const tensor::TensorPtr &weight, int index, schema::QuantType quant_type,
+                                            bool use_auto_tune_alg) {
+  CHECK_NULL_RETURN(primitive);
+  CHECK_NULL_RETURN(weight);
+  std::vector<schema::QuantParamT> quant_params;
+  int elem_count = weight->DataSize();
+  auto *raw_data = static_cast<float *>(weight->data_c());
+  if (raw_data == nullptr) {
+    MS_LOG(ERROR) << "rawDatas is nullptr";
+    return RET_ERROR;
+  }
+
+  std::vector<int16_t> quant_data(elem_count);
+  auto ret = DoQuantization(static_cast<float *>(weight->data_c()), weight->shape_c(), 0, &quant_params, &quant_data,
+                            parameter_node->fullname_with_scope(), use_auto_tune_alg);
+  if (ret != RET_OK) {
+    return ret;
+  }
+  ret = UpdateTensorDataAndSize(parameter_node, weight, quant_data.data(), quant_data.size() * sizeof(int16_t),
+                                kNumberTypeInt16);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
+    return RET_ERROR;
+  }
+
+  if (quant_params.empty()) {
+    MS_LOG(ERROR) << "quant_params empty";
+    return RET_ERROR;
+  }
+  auto quant_param_holder = GetCNodeQuantHolder(primitive);
+  quant_param_holder->set_input_quant_param(index, quant_params);
+  quant_param_holder->set_quant_type(quant_type);
+  return ret;
+}
 }  // namespace mindspore::lite::quant
--- a/mindspore/lite/tools/converter/quantizer/mixed_bit_weight_quantization.h
+++ b/mindspore/lite/tools/converter/quantizer/mixed_bit_weight_quantization.h
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_MIXED_BIT_WEIGHT_QUANTIZER_H_
-#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_MIXED_BIT_WEIGHT_QUANTIZER_H_
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_MIXED_BIT_WEIGHT_QUANTIZATION_H_
+#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_MIXED_BIT_WEIGHT_QUANTIZATION_H_
 #include <cstdint>
 #include <vector>
 #include <cmath>
@ -23,6 +23,7 @@
 #include "schema/inner/model_generated.h"
 #include "src/common/log_adapter.h"
 #include "src/common/quant_utils.h"
+#include "ir/tensor.h"

 namespace mindspore::lite::quant {
 constexpr float kBinarySearchStep = 2.0;
@ -36,20 +37,22 @@ typedef struct {
  MinMax mm;
 } LayerParam;

-class MixedBitWeightQuantizer {
+class MixedBitWeightQuantization {
 public:
-  explicit MixedBitWeightQuantizer(float target_relative_err = 0.01, float target_search_tolerance = 0.01,
-                                   int max_search_iters = 100)
+  explicit MixedBitWeightQuantization(float target_relative_err, float target_search_tolerance = 0.01,
+                                      int max_search_iters = 100)
      : target_relative_err_(target_relative_err),
        target_search_tolerance_(target_search_tolerance),
        max_search_iters_(max_search_iters) {}
-  ~MixedBitWeightQuantizer() = default;
+  ~MixedBitWeightQuantization() = default;

-  int DoQuantization(float *weights, std::vector<int64_t> shape, int preferred_dim,
-                     std::vector<schema::QuantParamT> *quant_params, std::vector<int16_t> *quant_datas,
-                     const std::string &description, bool use_auto_tune_alg);
+  int QuantFilter(const PrimitivePtr &primitive, const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight,
+                  int index, schema::QuantType quant_type, bool use_auto_tune_alg = false);

 private:
+  int DoQuantization(float *weights, std::vector<int64_t> shape, int preferred_dim,
+                     std::vector<schema::QuantParamT> *quant_params, std::vector<int16_t> *quant_datas,
+                     const std::string &description, bool use_auto_tune_alg = false);
  float MeasureQuantizationError(float *weights, const int *shape, int dims, int preferred_dim, float scale);

  static MinMax GetMinMax(const float *arr, int arrc);
@ -63,7 +66,7 @@ class MixedBitWeightQuantizer {

  float GetDx(float *weights, int *shape, int dims, int preferred_dim, const std::string &description);

-  void GetBiasCorrection(float *weights, int element_num, float scale, float *origin_dequant_datas);
+  void CalculateBiasCorrection(float *weights, int element_num, float scale, float *origin_dequant_datas);

  float CalculateMeanError(std::vector<float> norms2, std::vector<float> dnorms2);

@ -75,4 +78,4 @@ class MixedBitWeightQuantizer {
  int max_search_iters_;
 };
 }  // namespace mindspore::lite::quant
-#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_MIXED_BIT_WEIGHT_QUANTIZER_H_
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_MIXED_BIT_WEIGHT_QUANTIZATION_H_
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@ -490,56 +490,6 @@ void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<in
  }
 }

-int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight,
-                        const PrimitivePtr &primitive, schema::QuantType quant_type, WeightQuantType weight_quant_type,
-                        TypeId quant_data_type, double init_scale, int index, int preferred_dim, bool symmetric,
-                        bool use_auto_tune_alg) {
-  MS_CHECK_TRUE_RET(primitive != nullptr, RET_NULL_PTR);
-  MS_CHECK_TRUE_RET(weight != nullptr, RET_NULL_PTR);
-  auto dims = weight->shape();
-  if (weight_quant_type == FIXED_BIT_PER_CHANNEL) {
-    if (dims.size() <= 1) {
-      MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
-      weight_quant_type = FIXED_BIT_PER_LAYER;
-    }
-  }
-  std::vector<schema::QuantParamT> quant_params;
-  int elem_count = weight->DataSize();
-  auto *raw_data = static_cast<float *>(weight->data_c());
-  if (raw_data == nullptr) {
-    MS_LOG(ERROR) << "rawDatas is nullptr";
-    return RET_ERROR;
-  }
-
-  std::vector<int16_t> quant_data(elem_count);
-  if (weight_quant_type != MIXED_BIT_PER_LAYER) {
-    MS_LOG(ERROR) << "Unsupported weight quant type:" << weight_quant_type;
-    return RET_ERROR;
-  }
-  MixedBitWeightQuantizer quantizer(init_scale);
-  auto ret = quantizer.DoQuantization(static_cast<float *>(weight->data_c()), weight->shape_c(), 0, &quant_params,
-                                      &quant_data, parameter_node->fullname_with_scope(), use_auto_tune_alg);
-  if (ret != RET_OK) {
-    return ret;
-  }
-
-  auto status = UpdateTensorDataAndSize(parameter_node, weight, quant_data.data(), quant_data.size() * sizeof(int16_t),
-                                        quant_data_type);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
-    return RET_ERROR;
-  }
-
-  if (quant_params.empty()) {
-    MS_LOG(ERROR) << "quant_params empty";
-    return RET_ERROR;
-  }
-  auto quant_param_holder = GetCNodeQuantHolder(primitive);
-  quant_param_holder->set_input_quant_param(index, quant_params);
-  quant_param_holder->set_quant_type(quant_type);
-  return ret;
-}
-
 bool CheckNodeInSet(const CNodePtr &cnode, const std::set<PrimitivePtr> &support_primitive_types) {
  for (const auto &type : support_primitive_types) {
    if (opt::CheckPrimitiveType(cnode, type)) {
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@ -46,7 +46,7 @@
 #include "abstract/dshape.h"
 #include "tools/converter/quantizer/huffman_encode.h"
 #include "tools/converter/quantizer/quant_params.h"
-#include "tools/converter/quantizer/mixed_bit_weight_quantizer.h"
+#include "tools/converter/quantizer/mixed_bit_weight_quantization.h"
 #include "src/litert/lite_session.h"
 #include "src/common/file_utils.h"
 #include "src/common/quant_utils.h"
@ -67,11 +67,6 @@ void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<in

 bool TensorQuantParamsInited(const schema::TensorT &tensor);

-int MixedBitQuantFilter(const AnfNodePtr &parameter_node, const tensor::TensorPtr &weight,
-                        const PrimitivePtr &primitive, schema::QuantType quant_type, WeightQuantType weight_quant_type,
-                        TypeId quant_data_type, double init_scale, int index, int preferred_dim, bool symmetric,
-                        bool use_auto_tune_alg);
-
 int CalChannels(const std::vector<int> &dims, int channel_cnt, bool *channel_at_first);

 int GetPreferredDim(const CNodePtr &cnode, int input_index, const std::vector<int> &dims);
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
@ -25,6 +25,7 @@
 #include "tools/converter/quantizer/fse_encoder.h"
 #include "tools/converter/quantizer/tensor_compressor.h"
 #include "tools/converter/quantizer/cluster_quantization.h"
+#include "tools/converter/quantizer/mixed_bit_weight_quantization.h"

 namespace mindspore::lite::quant {
 static const float kScaleFactor = (0.01 * 0.01 * 0.01 * 24.0);
@ -147,9 +148,9 @@ int WeightQuantizer::DoMixBitQuant(const CNodePtr &cnode, const ParameterPtr &pa
                                   WeightQuantType weight_quant_type, bool symmetric) {
  auto primitive = GetValueNode<PrimitivePtr>(cnode->input(0));
  CHECK_NULL_RETURN(primitive);
-  auto status = MixedBitQuantFilter(parameter, tensor_info, primitive, param_->commonQuantParam.quant_type,
-                                    WeightQuantType::MIXED_BIT_PER_LAYER, type_id_, mixed_bit_init_scale_, idx - 1,
-                                    preferred_dim, symmetric, is_auto_tune_);
+  auto mixed_bit_quantization = MixedBitWeightQuantization(mixed_bit_init_scale_);
+  auto status = mixed_bit_quantization.QuantFilter(primitive, parameter, tensor_info, idx - 1,
+                                                   param_->commonQuantParam.quant_type, is_auto_tune_);
  if (status == RET_OK) {
    FSEEncoder fse_encoder;
    auto quant_param_holder = GetCNodeQuantHolder(primitive);