!11186 mixed quant support Reshape && fix memory leak

From: @xutianchun Reviewed-by: Signed-off-by:
2021-01-18 19:09:28 +08:00 · 2021-01-18 19:09:28 +08:00 · fa3638ad6b
parent cf87c0304d 080e6b114b
commit fa3638ad6b
10 changed files with 261 additions and 115 deletions
--- a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
@ -8,20 +8,21 @@ file(GLOB KERNEL_SRC
    )
 list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)

-if (SUPPORT_TRAIN)
-    file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
+if(SUPPORT_TRAIN)
+    file(GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
    set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
 endif()

 add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
 add_dependencies(cpu_kernel_mid fbs_src)

-if (PLATFORM_ARM64)
-    if (ENABLE_FP16)
+if(PLATFORM_ARM64)
+    if(ENABLE_FP16)
        file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
        add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
-    endif ()
+        add_dependencies(cpu_fp16_kernel_mid fbs_src)
+    endif()
    file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
    add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
-endif ()
-
+    add_dependencies(cpu_kernel_mid fbs_src)
+endif()
--- a/mindspore/lite/test/models_mindspore_weightquant.cfg
+++ b/mindspore/lite/test/models_mindspore_weightquant.cfg
@ -1,3 +1,3 @@
 retinaface_732_1280_iod.mindir
 mobilefacenet_iod.mindir
-effnet_iod.mindir
+#effnet_iod.mindir
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@ -540,9 +540,9 @@ function Run_x86() {
        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}"
        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_log_file}"
        if [ $? = 0 ]; then
-            run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
+            run_result='x86: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
-            run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
+            run_result='x86: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
        fi
    done < ${models_mindspore_weightquant_config}

@ -806,9 +806,9 @@ function Run_x86_sse() {
        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_sse_log_file}"
        if [ $? = 0 ]; then
-            run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
+            run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
-            run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
+            run_result='x86_sse: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
        fi
    done < ${models_mindspore_weightquant_config}

@ -1072,9 +1072,9 @@ function Run_x86_avx() {
        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}"
        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.weightquant.ms.out >> "${run_x86_avx_log_file}"
        if [ $? = 0 ]; then
-            run_result='x86_avx: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
+            run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
-            run_result='x86_avx: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
+            run_result='x86_avx: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
        fi
    done < ${models_mindspore_weightquant_config}

@ -1624,9 +1624,9 @@ function Run_arm64() {
        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.weightquant.ms.out --loopCount=1' >> adb_run_cmd.txt
        adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_log_file}"
        if [ $? = 0 ]; then
-            run_result='arm64: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_result_file}
+            run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
-            run_result='arm64: '${model_name}'_train failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
+            run_result='arm64: '${model_name}'[weightQuant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
        fi
    done < ${models_mindspore_weightquant_config}

--- a/mindspore/lite/tools/converter/graphdef_transform.cc
+++ b/mindspore/lite/tools/converter/graphdef_transform.cc
@ -141,11 +141,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
    // init old node indecies
    auto old_nodes = GetGraphNodes();
    Optimizer formatTransOptimizer;
-    auto formatTransPass = new (std::nothrow) FormatTransPass();
-    if (formatTransPass == nullptr) {
-      MS_LOG(ERROR) << "new formatTransPass failed";
-      return RET_MEMORY_FAILED;
-    }
    formatTransOptimizer.AddPass(new (std::nothrow) FormatTransFusionPass());
    formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());
    formatTransOptimizer.AddPass(new (std::nothrow) TransOpRemovePass());
@ -164,11 +159,6 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) {
    // init old node indecies
    auto old_nodes = GetGraphNodes();
    Optimizer formatTransOptimizer;
-    auto formatTransPass = new (std::nothrow) FormatTransPass();
-    if (formatTransPass == nullptr) {
-      MS_LOG(ERROR) << "new formatTransPass failed";
-      return RET_MEMORY_FAILED;
-    }
    if (!ctx.trainModel && ctx.fmk != converter::FmkType_ONNX) {
      formatTransOptimizer.AddPass(new (std::nothrow) GlobalFormatTransformPass());
      formatTransOptimizer.AddPass(new (std::nothrow) IsolatedNodeRemovePass());
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@ -418,6 +418,13 @@ PostTrainingQuantizer::PostTrainingQuantizer(FuncGraphPtr graph, string path, in
  }
 }

+PostTrainingQuantizer::~PostTrainingQuantizer() {
+  delete fp32_session_;
+  delete fp32_model_;
+  delete int8_session_;
+  delete int8_model_;
+}
+
 STATUS PostTrainingQuantizer::DoQuantInput(double scale, int32_t zeropoint, struct MaxMin *max_min,
                                           const std::shared_ptr<PrimitiveC> &lite_primitive) const {
  MS_ASSERT(max_min != nullptr);
@ -1435,8 +1442,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
  // anf -- fb
  flags.quantType = schema::QuantType_QUANT_NONE;
  MS_LOG(INFO) << "start create session";
-  fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
-  if (fp32_session_ == nullptr) {
+  auto sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
+  fp32_session_ = sm.session;
+  fp32_model_ = sm.model;
+  if (fp32_session_ == nullptr || fp32_model_ == nullptr) {
    MS_LOG(ERROR) << "create session failed!";
    return RET_ERROR;
  }
@ -1481,8 +1490,10 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
    // init in8 session
    MS_LOG(INFO) << "create quant session";
    flags.quantType = schema::QuantType_PostTraining;
-    int8_session_ = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
-    if (int8_session_ == nullptr) {
+    auto int8_sm = CreateSessionByFuncGraph(func_graph, flags, calibrator_->GetThreadNum());
+    int8_session_ = int8_sm.session;
+    int8_model_ = int8_sm.model;
+    if (int8_session_ == nullptr || int8_model_ == nullptr) {
      MS_LOG(ERROR) << "create session failed!";
      return RET_ERROR;
    }
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h
@ -46,7 +46,7 @@ class PostTrainingQuantizer : public Quantizer {
 public:
  PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8,
                        bool per_channel = true);
-  ~PostTrainingQuantizer() = default;
+  ~PostTrainingQuantizer();

  STATUS DoQuantize(FuncGraphPtr func_graph) override;

@ -64,7 +64,9 @@ class PostTrainingQuantizer : public Quantizer {
  std::unique_ptr<Calibrator> calibrator_;

  session::LiteSession *fp32_session_{nullptr};
+  Model *fp32_model_{nullptr};
  session::LiteSession *int8_session_{nullptr};
+  Model *int8_model_{nullptr};

  std::map<std::string, std::vector<float>> fp32_op_input_map;           // concurency
  std::map<std::string, std::vector<float>> fp32_op_output_ch_mean_map;  // concurency
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@ -134,14 +134,14 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const {
  }

  if (node->size() < 3) {
-    MS_LOG(INFO) << "input size less!";
+    MS_LOG(INFO) << node->fullname_with_scope() << " input size less!";
    return false;
  }

  auto inputNode1 = node->input(1);
  auto inputNode2 = node->input(2);
  if (inputNode1 == nullptr || inputNode2 == nullptr) {
-    MS_LOG(INFO) << "mul input is nullptr!";
+    MS_LOG(INFO) << node->fullname_with_scope() << " mul input is nullptr!";
    return false;
  }

@ -153,7 +153,7 @@ bool QuantStrategy::CanMulOpQuantized(const CNodePtr &node) const {
  }

  if (paramNode == nullptr) {
-    MS_LOG(INFO) << "invalid paramNode!";
+    MS_LOG(INFO) << node->fullname_with_scope() << " invalid paramNode!";
    return false;
  }

@ -480,6 +480,48 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode) {
  return (schema::PrimitiveType)primitive_c->Type();
 }

+std::vector<int> DataToVector(const string &str) {
+  std::vector<int> result;
+  auto raw_datas = str;
+  auto ind = raw_datas.find(',');
+  while (ind != std::string::npos) {
+    auto data = raw_datas.substr(0, ind);
+    Trim(&data);
+    result.push_back(std::stoul(data));
+    raw_datas = raw_datas.substr(ind + 1);
+    Trim(&raw_datas);
+    ind = raw_datas.find(',');
+  }
+  if (!raw_datas.empty()) {
+    result.push_back(std::stoul(raw_datas));
+  }
+  if (result.empty()) {
+    MS_LOG(ERROR) << "result is empty";
+  }
+  return result;
+}
+
+std::vector<std::vector<int>> DataToVectors(const string &str) {
+  std::vector<std::vector<int>> result;
+  auto raw_datas = str;
+  auto ind = raw_datas.find(';');
+  while (ind != std::string::npos) {
+    auto data = raw_datas.substr(0, ind);
+    Trim(&data);
+    result.push_back(DataToVector(data));
+    raw_datas = raw_datas.substr(ind + 1);
+    Trim(&raw_datas);
+    ind = raw_datas.find(';');
+  }
+  if (!raw_datas.empty()) {
+    result.push_back(DataToVector(raw_datas));
+  }
+  if (result.empty()) {
+    MS_LOG(ERROR) << "result is empty";
+  }
+  return result;
+}
+
 STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config) {
  if (post_quant_config == nullptr) {
    MS_LOG(ERROR) << "post_quant_config is null.";
@ -559,6 +601,20 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf
      }
    } else if (key == "mean_error_threshold") {
      post_quant_config->mean_error_threshold = std::stof(value);
+    } else if (key == "input_shapes") {
+      auto &raw_shape = value;
+      auto ind = raw_shape.find('/');
+      while (ind != std::string::npos) {
+        auto shape = raw_shape.substr(0, ind);
+        Trim(&shape);
+        post_quant_config->input_shapes.push_back(DataToVectors(shape));
+        raw_shape = raw_shape.substr(ind + 1);
+        Trim(&raw_shape);
+        ind = raw_shape.find('/');
+      }
+      if (!raw_shape.empty()) {
+        post_quant_config->input_shapes.push_back(DataToVectors(raw_shape));
+      }
    } else {
      MS_LOG(WARNING) << "unsupported parameter: " << key;
    }
@ -578,12 +634,12 @@ STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_conf
  return RET_OK;
 }

-session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
-                                               int thread_num) {
+SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num) {
+  SessionModel sm;
  auto meta_graph = Export(func_graph, true, true);
  if (meta_graph == nullptr) {
    MS_LOG(ERROR) << "Export to meta_graph failed";
-    return nullptr;
+    return sm;
  }

  // transform
@ -592,7 +648,7 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
  auto status = fb_transform.Transform(flags);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "FBTransform model failed";
-    return nullptr;
+    return sm;
  }
  meta_graph->version = Version();

@ -604,12 +660,12 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
  auto *content = reinterpret_cast<const char *>(builder.GetBufferPointer());
  if (content == nullptr) {
    MS_LOG(ERROR) << "GetBufferPointer return null";
-    return nullptr;
+    return sm;
  }
  auto model = lite::Model::Import(content, size);
  if (model == nullptr) {
    MS_LOG(ERROR) << "Import model failed";
-    return nullptr;
+    return sm;
  }

  Context ctx;
@ -618,16 +674,19 @@ session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, c
  auto session = session::LiteSession::CreateSession(&ctx);
  if (session == nullptr) {
    MS_LOG(ERROR) << "create session failed.";
-    return nullptr;
+    return sm;
  }

  status = session->CompileGraph(model);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "CompileGraph error";
-    return nullptr;
+    return sm;
  }
  model->Free();
-  return session;
+  delete meta_graph;
+  sm.session = session;
+  sm.model = model;
+  return sm;
 }

 STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
@ -805,4 +864,21 @@ void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamVal
    return;
  }
 }
+
+STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size) {
+  MS_ASSERT(weight != nullptr);
+  MS_ASSERT(new_size > 0);
+  delete[] reinterpret_cast<char *>(weight->tensor_addr());
+  char *new_tensor_data = new (std::nothrow) char[new_size];
+  if (new_tensor_data == nullptr) {
+    MS_LOG(ERROR) << "new data error";
+    return RET_ERROR;
+  }
+  memcpy(new_tensor_data, quant_datas, new_size);
+
+  weight->set_tensor_size(new_size);
+  weight->set_tensor_addr(new_tensor_data);
+  return RET_OK;
+}
+
 }  // namespace mindspore::lite::quant
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@ -57,9 +57,15 @@ struct PostQuantConfig {
  bool bias_correction{false};
  bool mixed{false};
  float mean_error_threshold{0.04};
+  std::vector<std::vector<std::vector<int>>> input_shapes;  // different input
  bool inited{false};
 };

+struct SessionModel {
+  session::LiteSession *session{nullptr};
+  Model *model{nullptr};
+};
+
 /**
 * 1. when op's weight size > mWeightSize just skip
 * 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
@ -97,6 +103,8 @@ std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector<

 std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam);

+STATUS UpdateTensorDataAndSize(ParamValueLitePtr weight, void *quant_datas, int new_size);
+
 template <typename T>
 T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
  MS_ASSERT(quantParam != nullptr);
@ -148,27 +156,17 @@ T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quan
    return static_cast<T>(quant_data);
  }();
 }
+
 template <typename T>
 STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<PrimitiveC> &primitive_c, QuantType quantType,
-                   int quant_max, int quant_min, size_t bitNum, bool per_channel, bool k_means = false) {
+                   int quant_max, int quant_min, size_t bitNum, bool per_channel, int index = 1, bool k_means = false) {
  MS_ASSERT(weight != nullptr);
  MS_ASSERT(primitive_c != nullptr);
  auto dims = weight->tensor_shape();
-  auto op_type = (schema::PrimitiveType)primitive_c->Type();
  if (per_channel) {
-    if (dims.size() != 4 && dims.size() != 2 && op_type != schema::PrimitiveType_MatMul) {
-      MS_LOG(INFO) << "weight dims size: " << dims.size() << " switch to per-layer quant mode.";
+    if (dims.size() <= 1) {
+      MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
      per_channel = false;
-    } else {
-      if (dims.size() == 2 && op_type != schema::PrimitiveType_FullConnection) {
-        MS_LOG(INFO) << "weight dims size is 2 but op_type is not FullConnection, switch to per-layer quant mode.";
-        per_channel = false;
-      }
-      uint32_t channels = dims[0];
-      if (channels == 0) {
-        MS_LOG(ERROR) << "channels is 0";
-        return RET_ERROR;
-      }
    }
  }

@ -261,12 +259,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
      }
      quant_params.emplace_back(quant_param);
    }
-    auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T));
-    if (ret != EOK) {
-      MS_LOG(ERROR) << "memcpy error: " << ret;
+    auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T));
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
      return RET_ERROR;
    }
-    weight->set_tensor_size(elem_count * sizeof(T));
  } else {
    // per layer
    float min = FLT_MAX;
@ -294,12 +291,11 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
        quant_datas[i] = quant_data;
      }
    }
-    auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T));
-    if (ret != EOK) {
-      MS_LOG(ERROR) << "memcpy error: " << ret;
+    auto status = UpdateTensorDataAndSize(weight, quant_datas.data(), quant_datas.size() * sizeof(T));
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
      return RET_ERROR;
    }
-    weight->set_tensor_size(elem_count * sizeof(T));
  }

  // do bit pack
@ -311,21 +307,19 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
    if (bitNum > 0 && bitNum < 8) {
      std::vector<uint8_t> pack_data{};
      BitPack::BitPacking<T, uint8_t>(bitNum, data, &pack_data);
-      auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint8_t));
-      if (ret != EOK) {
-        MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
+      auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t));
+      if (status != RET_OK) {
+        MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
        return RET_ERROR;
      }
-      weight->set_tensor_size(pack_data.size() * sizeof(uint8_t));
    } else if (bitNum > 8 && bitNum < 16) {
      std::vector<uint16_t> pack_data{};
      BitPack::BitPacking<T, uint16_t>(bitNum, data, &pack_data);
-      auto ret = memcpy_s(raw_datas, weight->tensor_size(), pack_data.data(), pack_data.size() * sizeof(uint16_t));
-      if (ret != EOK) {
-        MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
+      auto status = UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t));
+      if (status != RET_OK) {
+        MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
        return RET_ERROR;
      }
-      weight->set_tensor_size(pack_data.size() * sizeof(uint16_t));
    }
  }

@ -336,7 +330,7 @@ STATUS QuantFilter(const ParamValueLitePtr &weight, const std::shared_ptr<Primit
  if (quantType == QuantType_PostTraining) {
    primitive_c->AddInputQuantParam(quant_params);
  } else {
-    primitive_c->set_input_quant_param(WEIGHT_INDEX, quant_params);
+    primitive_c->set_input_quant_param(index, quant_params);
  }
  return RET_OK;
 }
@ -347,8 +341,7 @@ schema::PrimitiveType NodePrimitiveType(const CNodePtr &cnode);

 STATUS ParseConfigFile(std::string config_file, PostQuantConfig *post_quant_config);

-session::LiteSession *CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags,
-                                               int thread_num);
+SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num);

 STATUS CollectCalibInputs(const std::vector<std::string> &input_dirs, size_t count_limited,
                          std::vector<std::vector<std::string>> *inputs);
@ -359,6 +352,5 @@ STATUS CopyInputDataToTensor(size_t input_index, size_t image_index,
 FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);

 void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, ParamValueLitePtr *param_value);
-
 }  // namespace mindspore::lite::quant
 #endif
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
@ -84,7 +84,13 @@ WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const std::string &config_f
  }
 }

-WeightQuantizer::~WeightQuantizer() { delete fp32_session_; }
+WeightQuantizer::~WeightQuantizer() {
+  for (const auto &fp32_output_tensor : fp32_output_tensors_) {
+    for (const auto &kv : fp32_output_tensor) {
+      delete kv.second;
+    }
+  }
+}

 STATUS WeightQuantizer::SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node,
                                    std::shared_ptr<PrimitiveC> primitive_c) {
@ -278,11 +284,11 @@ STATUS WeightQuantizer::DoLstmQuntize(CNodePtr cnode) {
    }
    auto status = RET_ERROR;
    if (type_id_ == kNumberTypeInt8) {
-      status =
-        QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false);
+      status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
+                                   false, 2);
    } else if (type_id_ == kNumberTypeInt16) {
-      status =
-        QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false);
+      status = QuantFilter<int16_t>(param_value, primitive_c, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_,
+                                    false, 2);
    }
    if (status != RET_OK) {
      MS_LOG(ERROR) << "QuantFilter failed : " << status;
@ -438,15 +444,73 @@ float CompareOutputData(const std::unordered_map<std::string, mindspore::tensor:
  return total_mean_error / tensor_cnt;
 }

-STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
+STATUS WeightQuantizer::RunFp32Graph(FuncGraphPtr func_graph) {
+  auto image_cnt = images_.at(0).size();
+  if (!config_param_.input_shapes.empty()) {
+    if (config_param_.input_shapes.size() != image_cnt) {
+      MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt;
+      return RET_ERROR;
+    }
+  }
  // 0.1 Create Fp32 Session
  flags.quantType = schema::QuantType_QUANT_NONE;
-  fp32_session_ = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
-  if (fp32_session_ == nullptr) {
+  auto fp32_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
+  auto fp32_session = fp32_sm.session;
+  auto fp32_model = fp32_sm.model;
+  if (fp32_session == nullptr || fp32_model == nullptr) {
    MS_LOG(ERROR) << "CreateSessoin fail";
+    delete fp32_model;
    return RET_ERROR;
  }
-  auto fp32_inputs = fp32_session_->GetInputs();
+  auto fp32_inputs = fp32_session->GetInputs();
+  fp32_output_tensors_.resize(image_cnt);
+  // 0.3 save fp32 output
+  for (size_t i = 0; i < image_cnt; i++) {
+    if (!config_param_.input_shapes.empty()) {
+      auto status = fp32_session->Resize(fp32_inputs, {config_param_.input_shapes[i]});
+      if (status != RET_OK) {
+        MS_LOG(ERROR) << "session Resize fail";
+        delete fp32_sm.session;
+        delete fp32_sm.model;
+        return RET_ERROR;
+      }
+    }
+    for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
+      auto status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
+      if (status != RET_OK) {
+        MS_LOG(ERROR) << "generate input data from images failed!";
+        delete fp32_sm.session;
+        delete fp32_sm.model;
+        return RET_ERROR;
+      }
+    }
+    auto status = fp32_session->RunGraph();
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "RunGraph fail";
+      delete fp32_sm.session;
+      delete fp32_sm.model;
+      return RET_ERROR;
+    }
+    auto fp32_outputs = fp32_session->GetOutputs();
+    for (const auto &kv : fp32_outputs) {
+      auto *tensor = kv.second;
+      auto *lite_tensor = reinterpret_cast<lite::Tensor *>(tensor);
+      if (lite_tensor == nullptr) {
+        MS_LOG(ERROR) << "not lite tensor";
+        delete fp32_sm.session;
+        delete fp32_sm.model;
+        return RET_ERROR;
+      }
+      auto *new_tensor = Tensor::CopyTensor(*lite_tensor, true);
+      fp32_output_tensors_[i][kv.first] = new_tensor;
+    }
+  }
+  delete fp32_sm.session;
+  delete fp32_sm.model;
+  return RET_OK;
+}
+
+STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
  // 0.2 Parse input calib files
  auto status = CollectCalibInputs(config_param_.image_paths, config_param_.batch_count, &images_);
  if (status != RET_OK) {
@ -454,6 +518,12 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
    return RET_ERROR;
  }

+  MS_LOG(DEBUG) << "run fp32 model";
+  status = RunFp32Graph(func_graph);
+  if (status != RET_OK) {
+    return RET_ERROR;
+  }
+
  auto cnodes = func_graph->GetOrderedCnodes();
  for (auto &cnode : cnodes) {
    auto op_type = NodePrimitiveType(cnode);
@ -471,6 +541,13 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
      }
    }
  }
+  auto image_cnt = images_.at(0).size();
+  if (!config_param_.input_shapes.empty()) {
+    if (config_param_.input_shapes.size() != image_cnt) {
+      MS_LOG(ERROR) << "input_shapes size: " << config_param_.input_shapes.size() << " image_cnt: " << image_cnt;
+      return RET_ERROR;
+    }
+  }

  for (auto iter = cnodes.end(); iter != cnodes.begin();) {
    auto cnode = *(--iter);
@ -540,66 +617,58 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
        // 2. evaluate the quant
        // 2.1 create quant session, get input, output tensor
        flags.quantType = schema::QuantType_WeightQuant;
-        auto quant_session =
-          std::unique_ptr<session::LiteSession>(CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num));
+        auto quant_sm = CreateSessionByFuncGraph(func_graph, flags, config_param_.thread_num);
+        auto quant_session = std::unique_ptr<session::LiteSession>(quant_sm.session);
        if (quant_session == nullptr) {
          MS_LOG(ERROR) << "create session error: " << status;
+          delete quant_sm.model;
          return RET_ERROR;
        }
        auto quant_inputs = quant_session->GetInputs();

        auto mean_error = 0.0f;
-        if (fp32_inputs.size() != images_.size()) {
-          MS_LOG(ERROR) << "model's input tensor cnt: " << fp32_inputs.size() << " != " << images_.size();
-          return RET_ERROR;
-        }
-        auto image_cnt = images_.at(0).size();
        for (size_t i = 0; i < image_cnt; i++) {
-          // set multi-input data
-          for (size_t input_index = 0; input_index < fp32_inputs.size(); input_index++) {
-            status = CopyInputDataToTensor(input_index, i, images_, fp32_inputs[input_index]);
+          if (!config_param_.input_shapes.empty()) {
+            status = quant_session->Resize(quant_inputs, {config_param_.input_shapes[i]});
            if (status != RET_OK) {
-              MS_LOG(ERROR) << "generate input data from images failed!";
+              MS_LOG(ERROR) << "session Resize fail";
+              delete quant_sm.model;
              return RET_ERROR;
            }
+          }
+
+          // set multi-input data
+          for (size_t input_index = 0; input_index < quant_inputs.size(); input_index++) {
            status = CopyInputDataToTensor(input_index, i, images_, quant_inputs[input_index]);
            if (status != RET_OK) {
              MS_LOG(ERROR) << "generate input data from images failed!";
+              delete quant_sm.model;
              return RET_ERROR;
            }
          }
-          std::future<STATUS> fp32_inference = std::async(
-            std::launch::async, [](session::LiteSession *fp32_session) -> STATUS { return fp32_session->RunGraph(); },
-            fp32_session_);
-
          status = quant_session->RunGraph();
          if (status != RET_OK) {
            MS_LOG(ERROR) << "quant session run error";
-            return RET_ERROR;
-          }
-          status = fp32_inference.get();
-          if (status != RET_OK) {
-            MS_LOG(ERROR) << "fp32 session run error";
+            delete quant_sm.model;
            return RET_ERROR;
          }
          // 3. compare betwen quant and fp32
-          auto fp32_outputs = fp32_session_->GetOutputs();
          auto quant_outputs = quant_session->GetOutputs();
-          mean_error += CompareOutputData<float>(fp32_outputs, quant_outputs);
+          mean_error += CompareOutputData<float>(fp32_output_tensors_[i], quant_outputs);
        }  // end_for: calib data loop
+        delete quant_sm.model;
        mean_error = mean_error / image_cnt;
-
        if (mean_error <= config_param_.mean_error_threshold) {
          MS_LOG(DEBUG) << "op: " << op_name << " got mixed bit: " << bit_num_t << " mean_error: " << mean_error;
          opname_bit_[op_name] = bit_num_t;
          break;
        } else if (bit_num_t != 8) {
+          MS_LOG(DEBUG) << "op: " << op_name << " intermediate bit: " << bit_num_t << " mean_error: " << mean_error
+                        << " [recover]";
          // recover
-          param_value->set_tensor_size(sizeof(float) * elem_count);
-          ret = memcpy_s(raw_data, param_value->tensor_size(), origin_data, sizeof(float) * elem_count);
-          if (ret != EOK) {
-            MS_LOG(ERROR) << "memcpy fail: "
-                          << " src size: " << sizeof(float) * elem_count << " dst size: " << param_value->tensor_size();
+          status = UpdateTensorDataAndSize(param_value, origin_data, sizeof(float) * elem_count);
+          if (status != RET_OK) {
+            MS_LOG(ERROR) << "UpdateTensorDataAndSize fail";
            return RET_ERROR;
          }
        } else {
@ -610,6 +679,9 @@ STATUS WeightQuantizer::DoMiexedQuant(FuncGraphPtr func_graph) {
      free(origin_data);
    }  //  if: conv and matmul
  }    // end loop: all cnode
+  for (const auto &kv : opname_bit_) {
+    MS_LOG(INFO) << "op: " << kv.first << " bit:" << kv.second;
+  }
  return RET_OK;
 }

--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
@ -19,6 +19,7 @@

 #include <future>
 #include <memory>
+#include <unordered_map>
 #include <map>
 #include <list>
 #include <string>
@ -59,11 +60,12 @@ class WeightQuantizer : public Quantizer {
  std::string config_file_;
  PostQuantConfig config_param_;
  std::vector<std::vector<std::string>> images_;  // multi_input, [[mode_input_0], [model_input_1]...]
-  session::LiteSession *fp32_session_ = nullptr;
+  std::vector<std::unordered_map<std::string, mindspore::tensor::MSTensor *>> fp32_output_tensors_;

  STATUS DoMiexedQuant(FuncGraphPtr);
  STATUS SetAbstract(ParamValueLitePtr param_value, ParameterPtr param_node, std::shared_ptr<PrimitiveC> primitive_c);
  STATUS DoFixedQuant(FuncGraphPtr);
+  STATUS RunFp32Graph(FuncGraphPtr);
 };
 }  // namespace mindspore::lite::quant
 #endif