Merge quant util of converter and runtime, and save quantized models

2021-05-02 17:25:06 +03:00 · 2021-05-02 17:25:06 +03:00 · 5251f59f27
parent 9416502e90
commit 5251f59f27
18 changed files with 607 additions and 802 deletions
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -49,6 +49,7 @@ set(LITE_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/common/loader_util.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/common/quant_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c
@ -124,6 +125,7 @@ if(SUPPORT_TRAIN)
            ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc
+            ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc
            )
    if(ENABLE_V0)
      set(LITE_SRC
@ -192,7 +194,10 @@ if(BUILD_MINDDATA STREQUAL "lite")
    target_link_libraries(mindspore-lite_static minddata_eager_mid)
 endif()
 if(SUPPORT_TRAIN)
+    add_dependencies(mindspore-lite fbs_inner_src)
+    add_dependencies(mindspore-lite_static fbs_inner_src)
    target_link_libraries(mindspore-lite minddata-lite)
+    target_link_libraries(mindspore-lite_static minddata-lite)
 endif()


--- a/mindspore/lite/src/common/quant_utils.cc
+++ b/mindspore/lite/src/common/quant_utils.cc
@ -0,0 +1,104 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "schema/inner/model_generated.h"
+#include "src/common/quant_utils.h"
+#include "src/lite_kernel.h"
+
+namespace mindspore {
+namespace lite {
+
+void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
+                         bool channel_at_first, float *desired_max, float *desired_min) {
+  float min = FLT_MAX;
+  float max = -FLT_MAX;
+  // find min and max
+  for (int j = 0; j < one_filter_size; j++) {
+    auto index = j + i * one_filter_size;
+    if (!channel_at_first) {
+      index = j * channels + i;
+    }
+    if (index >= elem_count) {
+      MS_LOG(ERROR) << "over flow!";
+    }
+    min = std::min(min, raw_datas[index]);
+    max = std::max(max, raw_datas[index]);
+  }
+  *desired_max = max;
+  *desired_min = min;
+}
+
+STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
+                             int quant_min, int num_bits) {
+  MS_ASSERT(quantParam != nullptr);
+  if (mMin > 0.0f) {
+    MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision";
+    mMin = 0.0f;
+  }
+  if (mMax < 0.0f) {
+    MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision";
+    mMax = 0.0f;
+  }
+  if (mMin > mMax) {
+    MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax;
+    return RET_PARAM_INVALID;
+  }
+  if (mMin == mMax) {
+    if (mMin != 0.0f) {
+      MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
+      return RET_ERROR;
+    }
+    quantParam->inited = true;
+    quantParam->min = mMin;
+    quantParam->max = mMax;
+    quantParam->scale = 0.0f;
+    quantParam->zeroPoint = 0;
+    quantParam->narrowRange = narrowRange;
+    quantParam->numBits = num_bits;
+    return RET_OK;
+  }
+
+  auto quantMinFloat = static_cast<double>(quant_min);
+  auto quantMaxFloat = static_cast<double>(quant_max);
+  if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) {
+    MS_LOG(ERROR) << "divisor cannot be 0";
+    return RET_ERROR;
+  }
+  double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat);
+  if (fabs(scale) <= 0.0f) {
+    MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
+    return RET_ERROR;
+  }
+  const double zeroPointFromMin = quantMinFloat - mMin / scale;
+  int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin));
+
+  // The zero point should always be in the range of quantized value,
+  // [qmin, qmax].
+  MS_ASSERT(zeroPoint >= quant_min);
+  MS_ASSERT(zeroPoint <= quant_max);
+  quantParam->inited = true;
+  quantParam->min = mMin;
+  quantParam->max = mMax;
+  quantParam->scale = scale;
+  quantParam->zeroPoint = zeroPoint;
+  quantParam->narrowRange = narrowRange;
+  quantParam->numBits = num_bits;
+
+  return RET_OK;
+}
+
+}  // namespace lite
+}  // namespace mindspore
--- a/mindspore/lite/src/common/quant_utils.h
+++ b/mindspore/lite/src/common/quant_utils.h
@ -0,0 +1,234 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
+#define MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
+
+#include <float.h>
+#include <cmath>
+#include <climits>
+#include <limits>
+#include <algorithm>
+#include <vector>
+#include "include/errorcode.h"
+#include "src/common/log_adapter.h"
+#include "ir/dtype/type_id.h"
+
+namespace mindspore {
+
+namespace schema {
+struct QuantParamT;
+}
+
+namespace lite {
+const int RET_QUANT_CONTINUE = 2;
+static constexpr double SCALE_THREASHOLD = 1e-38;
+
+static constexpr int kPerTensor = 1;
+
+inline int QuantMax(int bits, TypeId type) {
+  if (type == kNumberTypeInt8) {
+    return (1 << (bits - 1)) - 1;
+  } else if (type == kNumberTypeUInt8) {
+    return (1 << bits) - 1;
+  }
+  return 0;
+}
+
+inline int QuantMin(int bits, TypeId type) {
+  if (type == kNumberTypeInt8) {
+    return -(1 << (bits - 1));
+  }
+  return 0;
+}
+
+void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
+                         bool channel_at_first, float *desired_max, float *desired_min);
+
+STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
+                             int quant_min, int num_bits);
+
+template <typename T>
+T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
+  MS_ASSERT(quantParam != nullptr);
+  MS_ASSERT(quantParam->inited);
+  const auto scale = quantParam->scale;
+  const auto zeroPoint = quantParam->zeroPoint;
+  const auto numBit = quantParam->numBits;
+  const auto narrowRange = quantParam->narrowRange;
+  double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1);
+  const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale;
+  double minLimit;
+  if (narrowRange) {
+    minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale;
+  } else {
+    minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale;
+  }
+
+  return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
+    double tmp;
+    if (originData > maxLimit) {
+      tmp = maxLimit;
+    } else if (originData < minLimit) {
+      tmp = minLimit;
+    } else {
+      tmp = originData;
+    }
+    auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale));
+    return quantData;
+  }();
+}
+
+template <typename T>
+T QuantizeData(float originData, const schema::QuantParamT *quantParam, int quant_max, int quant_min) {
+  MS_ASSERT(quantParam != nullptr);
+  MS_ASSERT(quantParam->inited);
+  const auto scale = quantParam->scale;
+  const int zeroPoint = quantParam->zeroPoint;
+  const int maxLimit = quant_max;
+  const int minLimit = quant_min;
+
+  if (scale <= SCALE_THREASHOLD) {
+    return 0;
+  }
+
+  return [maxLimit, minLimit, zeroPoint, scale, originData] {
+    auto quant_data = std::round(originData / scale + zeroPoint);
+    if (quant_data > maxLimit) {
+      quant_data = maxLimit;
+    } else if (quant_data < minLimit) {
+      quant_data = minLimit;
+    }
+    return static_cast<T>(quant_data);
+  }();
+}
+
+template <typename T>
+STATUS DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schema::QuantParamT> *quant_params,
+                       const int &quant_max, const int &quant_min, const size_t &bit_num, const bool &k_means,
+                       std::vector<T> *quant_datas) {
+  float min = FLT_MAX;
+  float max = -FLT_MIN;
+  for (uint32_t i = 0; i < elem_count; i++) {
+    min = std::min(min, raw_datas[i]);
+    max = std::max(max, raw_datas[i]);
+  }
+
+  schema::QuantParamT quant_param;
+  if (!k_means) {
+    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
+      return status;
+    }
+  }
+  quant_params->emplace_back(quant_param);
+  // update data and datatype
+  for (uint32_t i = 0; i < elem_count; i++) {
+    float raw_data = raw_datas[i];
+    if (!k_means) {
+      auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min);
+      (*quant_datas)[i] = quant_data;
+    }
+  }
+  return RET_OK;
+}
+
+template <typename T>
+STATUS DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::QuantType &quant_type,
+                         std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
+                         const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, int channels,
+                         bool channel_at_first = true) {
+  static const int quant_param_size = 32 * 8;
+  std::vector<float> dequant_datas(quant_datas->size());
+  if (channels <= 0) {
+    MS_LOG(ERROR) << "channels must be greater than 0";
+    return RET_ERROR;
+  }
+  size_t one_filter_size = elem_count / channels;
+  bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size;
+  if (!do_quant && quant_type == schema::QuantType_WeightQuant) {
+    MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size;
+    return RET_QUANT_CONTINUE;
+  }
+  for (int i = 0; i < channels; i++) {
+    float min = FLT_MAX;
+    float max = -FLT_MAX;
+    GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min);
+    schema::QuantParamT quant_param;
+    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
+      return status;
+    }
+    // do quantization
+    double average_dequant = 0;
+    double average_raw = 0;
+    for (uint32_t j = 0; j < one_filter_size; j++) {
+      auto index = j + i * one_filter_size;
+      if (!channel_at_first) {
+        index = j * channels + i;
+      }
+      MS_ASSERT(index < elem_count);
+      float raw_data = raw_datas[index];
+      auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min);
+      (*quant_datas)[index] = quant_data;
+
+      if (quant_type == schema::QuantType_WeightQuant) {
+        float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint);
+        dequant_datas[index] = dequant_data;
+        average_dequant += dequant_data;
+        average_raw += raw_data;
+      }
+    }
+    if (quant_type == schema::QuantType_WeightQuant && !k_means) {
+      // mean
+      average_dequant = average_dequant / one_filter_size;
+      average_raw = average_raw / one_filter_size;
+      // std
+      double variance_dequant = 0;
+      double variance_raw = 0;
+      for (uint32_t j = 0; j < one_filter_size; j++) {
+        auto index = j + i * one_filter_size;
+        if (!channel_at_first) {
+          index = j * channels + i;
+        }
+        MS_ASSERT(index < elem_count);
+        variance_dequant += std::pow(dequant_datas[index] - average_dequant, 2);
+        variance_raw += std::pow(raw_datas[index] - average_raw, 2);
+      }
+      variance_dequant = std::sqrt(variance_dequant / one_filter_size);
+      variance_raw = std::sqrt(variance_raw / one_filter_size);
+      quant_param.varCorr = 1;
+      if (variance_raw != 0 && variance_dequant != 0) {
+        auto temp_var_corr = variance_raw / variance_dequant;
+        if (temp_var_corr > 0 && temp_var_corr < 10) {
+          quant_param.varCorr = temp_var_corr;
+        } else {
+          MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr;
+        }
+      }
+      quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr;
+    }
+    quant_params->emplace_back(quant_param);
+  }
+  return RET_OK;
+}
+
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_
--- a/mindspore/lite/src/train/train_export.cc
+++ b/mindspore/lite/src/train/train_export.cc
@ -23,46 +23,93 @@
 #include <set>
 #include "schema/inner/model_generated.h"
 #include "src/train/train_utils.h"
+#include "src/common/quant_utils.h"
+#include "tools/common/storage.h"

 namespace mindspore {
 namespace lite {

-std::vector<uint8_t> TrainExport::CreateData(const mindspore::lite::Tensor *tensor) {
+std::vector<uint8_t> TrainExport::CreateData(const lite::Tensor *tensor) {
  uint8_t *tensor_data = reinterpret_cast<uint8_t *>(tensor->data_c());
  auto size = tensor->Size();
  std::vector<uint8_t> data(tensor_data, tensor_data + size);
  return data;
 }

+bool TrainExport::NeedQuantization(const lite::Tensor *tensor) {
+  return (tensor->quant_params().size() > 0 && tensor->quant_params().at(0).inited);
+}
+
+schema::QuantType TrainExport::GetNodeQuantType(const kernel::LiteKernel *kernel) {
+  if (std::any_of(kernel->in_tensors().cbegin(), kernel->in_tensors().cend(), [](const lite::Tensor *t) {
+        return (t->IsConst() && (t->quant_params().size() > 0) && (t->quant_params().at(0).inited));
+      })) {
+    return schema::QuantType_QUANT_WEIGHT;
+  }
+  return schema::QuantType_QUANT_NONE;
+}
+
+int TrainExport::QuantTensorData(schema::TensorT *dest_tensor, const lite::Tensor *src_tensor) {
+  int channels = src_tensor->quant_params().size();
+  if (channels < 1) {
+    MS_LOG(ERROR) << "Quant Params is empty";
+    return RET_ERROR;
+  }
+  int bit_num = src_tensor->quant_params().at(0).bitNum;
+  int quant_max = QuantMax(bit_num, kNumberTypeInt8);
+  int quant_min = QuantMin(bit_num, kNumberTypeInt8);
+  std::vector<int8_t> data(src_tensor->ElementsNum());
+  std::vector<schema::QuantParamT> quant_params;
+
+  STATUS ret = RET_OK;
+  if (channels == kPerTensor) {
+    ret = DoPerLayerQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(),
+                                  &(quant_params), quant_max, quant_min, bit_num, false, &data);
+  } else {
+    bool channel_at_first = (src_tensor->shape().at(0) == channels);
+    ret = DoPerChannelQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(),
+                                    schema::QuantType_WeightQuant, &(quant_params), quant_max, quant_min, bit_num,
+                                    false, &data, channels, channel_at_first);
+  }
+  if (ret == RET_QUANT_CONTINUE) {
+    MS_LOG(DEBUG) << "No Need to quant per channel";
+    return RET_OK;
+  }
+  if (ret == RET_ERROR) {
+    MS_LOG(ERROR) << "QuantTensorData error,  channels = " << channels;
+    return ret;
+  }
+  if (quant_params.empty()) {
+    MS_LOG(ERROR) << "quant_params empty";
+    return RET_ERROR;
+  }
+  dest_tensor->data = std::vector<uint8_t>(data.data(), data.data() + data.size());
+  dest_tensor->dataType = kNumberTypeInt8;
+  dest_tensor->quantParams.clear();
+  for (auto quant_param : quant_params) {
+    dest_tensor->quantParams.emplace_back(std::make_unique<schema::QuantParamT>(quant_param));
+  }
+
+  return RET_OK;
+}
+
 std::unique_ptr<schema::TensorT> TrainExport::CreateTensor(const mindspore::lite::Tensor *tensor,
                                                           schema::Tensor *scTensor) {
  auto tensorT = std::make_unique<schema::TensorT>();
  tensorT->nodeType = scTensor->nodeType();
-  tensorT->dataType = tensor->data_type();
  tensorT->dims = tensor->shape();
  tensorT->format = tensor->format();
  tensorT->name = tensor->tensor_name();
  tensorT->refCount = 0;
  tensorT->offset = 0;
+  tensorT->dataType = tensor->data_type();
  tensorT->enableHuffmanCode = false;
  if ((tensorT->nodeType == NodeType_ValueNode) && (scTensor->data() != nullptr) && (scTensor->data()->size() > 0)) {
-    tensorT->data = CreateData(tensor);
-  }
-  for (auto quant_param : tensor->quant_params()) {
-    auto quantParamT = std::make_unique<schema::QuantParamT>();
-    quantParamT->scale = quant_param.scale;
-    quantParamT->zeroPoint = quant_param.zeroPoint;
-    quantParamT->min = 0;
-    quantParamT->max = 0;
-    quantParamT->narrowRange = true;
-    quantParamT->numBits = quant_param.bitNum;
-    quantParamT->inited = quant_param.inited;
-    quantParamT->varCorr = quant_param.var_corr;
-    quantParamT->meanCorr = quant_param.mean_corr;
-    quantParamT->dstDtype = quant_param.dstDtype;
-    quantParamT->roundType = quant_param.roundType;
-    quantParamT->multiplier = quant_param.multiplier;
-    tensorT->quantParams.emplace_back(std::move(quantParamT));
+    if (NeedQuantization(tensor)) {
+      QuantTensorData(tensorT.get(), tensor);
+    } else {
+      tensorT->data = CreateData(tensor);
+    }
  }
  tensorT->quantClusters = tensor->quant_clusters();
  return tensorT;
@ -85,7 +132,7 @@ std::unique_ptr<schema::CNodeT> TrainExport::CreateCNode(const mindspore::kernel
  cnodeT->inputIndex = inputIndex;
  cnodeT->outputIndex = outputIndex;
  cnodeT->name = kernel->name();
-  cnodeT->quantType = schema::QuantType_QUANT_NONE;
+  cnodeT->quantType = GetNodeQuantType(kernel);
  // find kernel in model
  auto *node = FindNode(kernel);
  if (node == nullptr) {
@ -132,7 +179,6 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
        MS_LOG(ERROR) << "cannot find tensor " + tensor->ToString() + " in model";
        return RET_ERROR;
      }
-      out_set.insert(id);
      auto it = remap.find(id);
      if (it == remap.end()) {
        remap[id] = tensor_idx;
@ -153,7 +199,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
    schema::Tensor *scTensor = model_->all_tensors_.at(id);
    auto tensorT = CreateTensor(tensor, scTensor);
    // find a tensor which is not an output
-    if (out_set.find(id) == out_set.end()) {
+    if (out_set.find(remap[id]) == out_set.end()) {
      if ((tensorT->nodeType == NodeType_ValueNode) && (tensorT->data.size() == 0)) {
        meta_graph->inputIndex.push_back(remap[id]);
      }
@ -165,7 +211,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
    meta_graph->allTensors.emplace_back(std::move(tensorT));
  }
  auto graph = meta_graph.release();
-  int err = SaveToFile(graph, file_name_);
+  int err = Storage::Save(*graph, file_name_);
  if (err != RET_OK) {
    MS_LOG(ERROR) << "failed to save flatbuffer file " << file_name_;
  }
@ -173,30 +219,5 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern
  return err;
 }

-int TrainExport::SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath) {
-  flatbuffers::FlatBufferBuilder builder(1024);
-  auto offset = schema::MetaGraph::Pack(builder, graph);
-  builder.Finish(offset);
-  schema::FinishMetaGraphBuffer(builder, offset);
-  int size = builder.GetSize();
-  auto content = builder.GetBufferPointer();
-  if (content == nullptr) {
-    MS_LOG(ERROR) << "GetBufferPointer nullptr";
-    return RET_ERROR;
-  }
-  if (access((outputPath + ".ms").c_str(), F_OK) == 0) {
-    chmod((outputPath + ".ms").c_str(), S_IWUSR);
-  }
-  std::ofstream output(outputPath + ".ms", std::ofstream::binary);
-  if (!output.is_open()) {
-    MS_LOG(ERROR) << "Can not open output file: " << outputPath << ".ms";
-    return RET_ERROR;
-  }
-  output.write((const char *)content, size);
-  output.close();
-  chmod((outputPath + ".ms").c_str(), S_IRUSR);
-  return RET_OK;
-}
-
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/train/train_export.h
+++ b/mindspore/lite/src/train/train_export.h
@ -50,7 +50,10 @@ class TrainExport {
  std::unique_ptr<schema::TensorT> CreateTensor(const mindspore::lite::Tensor *tensor, schema::Tensor *scTensor);
  std::unique_ptr<schema::CNodeT> CreateCNode(const mindspore::kernel::LiteKernel *kernel,
                                              std::vector<uint32_t> inputIndex, std::vector<uint32_t> outputIndex);
-  int SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath);
+
+  bool NeedQuantization(const mindspore::lite::Tensor *tensor);
+  virtual int QuantTensorData(schema::TensorT *dest_tensor, const mindspore::lite::Tensor *src_tensor);
+  mindspore::schema::QuantType GetNodeQuantType(const mindspore::kernel::LiteKernel *kernel);
 };
 };  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/weight_decoder.cc
+++ b/mindspore/lite/src/weight_decoder.cc
@ -32,7 +32,7 @@ std::vector<bool> StringToBitVector(const std::string &str) {
 }

 STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
-  MS_LOG(ERROR) << "un-index weight";
+  MS_LOG(DEBUG) << "un-index weight";
  auto bit_num = src_tensor.quantParams()->Get(0)->numBits();

  std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size());
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@ -159,6 +159,7 @@ set(TEST_LITE_SRC
        ${LITE_DIR}/src/common/utils.cc
        ${LITE_DIR}/src/common/loader_util.cc
        ${LITE_DIR}/src/common/string_util.cc
+        ${LITE_DIR}/src/common/quant_utils.cc
        ${LITE_DIR}/tools/common/flag_parser.cc
        ${LITE_DIR}/tools/benchmark/benchmark.cc
        ${LITE_DIR}/test/st/benchmark_test.cc
@ -297,6 +298,7 @@ if(SUPPORT_TRAIN)
            ${LITE_DIR}/src/train/train_utils.cc
            ${LITE_DIR}/src/train/transfer_session.cc
            ${LITE_DIR}/src/lite_session.cc
+            ${LITE_DIR}/tools/common/storage.cc
            )
 else()
    set(TEST_LITE_SRC
@ -388,6 +390,10 @@ endif()
 add_executable(lite-test ${TEST_SRC})
 add_dependencies(lite-test fbs_src)

+if(SUPPORT_TRAIN)
+    add_dependencies(lite-test fbs_inner_src)
+endif()
+
 target_link_libraries(lite-test dl mindspore::gtest)

 if(PLATFORM_ARM AND ENABLE_FP16)
--- a/mindspore/lite/test/models_ms_train.cfg
+++ b/mindspore/lite/test/models_ms_train.cfg
@ -11,7 +11,7 @@ googlenet
 densenet
 shufflenetv2
 mini_alexnet weight_quant 2
-nin weight_quant 7
+nin weight_quant 9
 lenet weight_quant 5
 mobilenetv1 weight_quant 2
 mobilenetv2 weight_quant 2
--- a/mindspore/lite/test/run_net_train.sh
+++ b/mindspore/lite/test/run_net_train.sh
@ -82,22 +82,27 @@ function Run_x86() {
        model_prefix=${line_array[0]}
        model_name=${line_array[0]}'_train'
        accuracy_limit=0.5
+        export_file=""
+        inference_file=""
        if [[ $model_name == \#* ]]; then
          continue
        fi
        if [[ "${line_array[1]}" == "weight_quant" ]]; then
            model_name=${line_array[0]}'_train_quant'
            accuracy_limit=${line_array[2]}
+        else
+            export_file="${ms_models_path}/${model_name}_tod"
+            rm -f ${export_file}"*"
        fi
-        if [[ "${save_lite}" == "1" ]]; then
-          inference_file="${ms_models_path}/${model_name}_infer"
-        fi
+        inference_file="${ms_models_path}/${model_name}_infer"
+        rm -f ${inference_file}"*"
        echo ${model_name} >> "${run_x86_log_file}"
        ${run_valgrind}./tools/benchmark_train/benchmark_train \
        --modelFile=${ms_models_path}/${model_name}.ms \
-        --inDataFile=${train_io_path}/${model_prefix}_input1.bin,${train_io_path}/${model_prefix}_input2.bin \
+        --inDataFile=${train_io_path}/${model_prefix}_input \
        --expectedDataFile=${train_io_path}/${model_prefix}_output --epochs=${epoch_num} --numThreads=${threads} \
-        --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} >> "${run_x86_log_file}"
+        --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} \
+        --exportFile=${export_file} >> "${run_x86_log_file}"
        if [ $? = 0 ]; then
            run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
        else
@ -168,21 +173,22 @@ function Run_arm() {
        model_prefix=${line_array[0]}
        model_name=${line_array[0]}'_train'
        accuracy_limit=0.5
+        export_file=""
        if [[ $model_name == \#* ]]; then
            continue
        fi
        if [[ "${line_array[1]}" == "weight_quant" ]]; then
            model_name=${line_array[0]}'_train_quant'
            accuracy_limit=${line_array[2]}
+        else
+            export_file="${tmp_dir}/${model_name}_tod"
        fi
+        inference_file="${tmp_dir}/${model_name}_infer"

        if [[ "${line_array[1]}" == "noarm32" ]] && [[ "$1" == arm32 ]]; then
            run_result=$1': '${model_name}' irrelevant'; echo ${run_result} >> ${run_benchmark_train_result_file}
            continue
        fi
-        if [[ "${save_lite}" == "1" ]]; then
-          inference_file="${ms_models_path}/${model_name}_infer"
-        fi
        # run benchmark_train test without clib data
        echo ${model_name} >> "${run_arm_log_file}"
        adb -s ${device_id} push ${train_io_path}/${model_prefix}_input*.bin ${train_io_path}/${model_prefix}_output*.bin  /data/local/tmp/benchmark_train_test >> ${adb_push_log_file}
@ -193,15 +199,20 @@ function Run_arm() {
        elif [ "$1" == arm32 ]; then
            echo 'cp  /data/local/tmp/arm32/libc++_shared.so ./' >> ${adb_cmd_run_file}
        fi 
-        echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${run_arm_log_file}
-        echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${adb_cmd_run_file}
+        adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
+        echo "rm -f ${export_file} ${inference_file}.ms" >> ${run_arm_log_file}
+        echo "rm -f ${export_file} ${inference_file}.ms" >> ${adb_cmd_run_file}
+        adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file}
        adb_cmd=$(cat <<-ENDM
        export LD_LIBRARY_PATH=./:/data/local/tmp/:/data/local/tmp/benchmark_train_test;./benchmark_train \
        --epochs=${epoch_num} \
        --modelFile=${model_name}.ms \
-        --inDataFile=${tmp_dir}/${model_prefix}_input1.bin,${tmp_dir}/${model_prefix}_input2.bin \
+        --inDataFile=${tmp_dir}/${model_prefix}_input \
        --expectedDataFile=${tmp_dir}/${model_prefix}_output \
-        --numThreads=${threads} --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file}
+        --numThreads=${threads} \
+        --accuracyThreshold=${accuracy_limit} \
+        --inferenceFile=${inference_file} \
+        --exportFile=${export_file}
 ENDM
        )
        echo "${adb_cmd}" >> ${run_arm_log_file}
@ -252,7 +263,7 @@ models_mindspore_train_config=${basepath}/models_ms_train.cfg
 epoch_num=1
 threads=2
 train_io_path=""
-while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do
+while getopts "r:M:c:m:d:i:e:vt:q:D" opt; do
    case ${opt} in
        r)
           release_path=${OPTARG}
@ -295,8 +306,6 @@ while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do
            epoch_num=${OPTARG}
            echo "train epoch num is ${epoch_num}"
            ;;
-        F)  save_lite=1
-            ;;                          
        ?)
            echo "unknown para"
            exit 1;;
--- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc
+++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc
@ -107,7 +107,7 @@ static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_pt
    int bit_num = tensor_input->quantParams.at(0)->numBits;
    // Pack Repetition
    auto repetition_packed = false;
-    MS_LOG(ERROR) << dst_node->name;
+    MS_LOG(DEBUG) << dst_node->name;
    if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) {
      if (bit_num <= 8) {
        repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input);
--- a/mindspore/lite/tools/benchmark_train/net_train.cc
+++ b/mindspore/lite/tools/benchmark_train/net_train.cc
@ -32,8 +32,6 @@

 namespace mindspore {
 namespace lite {
-static const char *DELIM_COLON = ":";
-static const char *DELIM_COMMA = ",";
 static const char *DELIM_SLASH = "/";

 namespace {
@ -81,8 +79,8 @@ int NetTrain::GenerateRandomData(size_t size, void *data) {
  return RET_OK;
 }

-int NetTrain::GenerateInputData() {
-  for (auto tensor : ms_inputs_) {
+int NetTrain::GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
+  for (auto tensor : *ms_inputs) {
    MS_ASSERT(tensor != nullptr);
    auto input_data = tensor->MutableData();
    if (input_data == nullptr) {
@ -100,16 +98,16 @@ int NetTrain::GenerateInputData() {
  return RET_OK;
 }

-int NetTrain::LoadInput() {
+int NetTrain::LoadInput(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
  if (flags_->in_data_file_.empty()) {
-    auto status = GenerateInputData();
+    auto status = GenerateInputData(ms_inputs);
    if (status != RET_OK) {
      std::cerr << "Generate input data error " << status << std::endl;
      MS_LOG(ERROR) << "Generate input data error " << status;
      return status;
    }
  } else {
-    auto status = ReadInputFile();
+    auto status = ReadInputFile(ms_inputs);
    if (status != RET_OK) {
      std::cerr << "ReadInputFile error, " << status << std::endl;
      MS_LOG(ERROR) << "ReadInputFile error, " << status;
@ -119,8 +117,8 @@ int NetTrain::LoadInput() {
  return RET_OK;
 }

-int NetTrain::ReadInputFile() {
-  if (ms_inputs_.empty()) {
+int NetTrain::ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) {
+  if (ms_inputs->empty()) {
    return RET_OK;
  }

@ -128,16 +126,12 @@ int NetTrain::ReadInputFile() {
    MS_LOG(ERROR) << "Not supported image input";
    return RET_ERROR;
  } else {
-    if (ms_inputs_.size() > flags_->input_data_list_.size()) {
-      MS_LOG(ERROR) << "missing input files expecting " << ms_inputs_.size() << ",got "
-                    << flags_->input_data_list_.size();
-      return RET_ERROR;
-    }
-    for (size_t i = 0; i < ms_inputs_.size(); i++) {
-      auto cur_tensor = ms_inputs_.at(i);
+    for (size_t i = 0; i < ms_inputs->size(); i++) {
+      auto cur_tensor = ms_inputs->at(i);
      MS_ASSERT(cur_tensor != nullptr);
      size_t size;
-      char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size);
+      std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin";
+      char *bin_buf = ReadFile(file_name.c_str(), &size);
      if (bin_buf == nullptr) {
        MS_LOG(ERROR) << "ReadFile return nullptr";
        return RET_ERROR;
@ -158,94 +152,12 @@ int NetTrain::ReadInputFile() {
  return RET_OK;
 }

-int NetTrain::CompareOutput() {
-  std::cout << "================ Comparing Output data ================" << std::endl;
-  float total_bias = 0;
-  int total_size = 0;
-  bool has_error = false;
-  auto tensors_list = session_->GetOutputs();
-  if (tensors_list.empty()) {
-    MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
-    return RET_ERROR;
-  }
-  mindspore::tensor::MSTensor *tensor = nullptr;
-  int i = 1;
-  for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) {
-    tensor = session_->GetOutputByTensorName(it->first);
-    std::cout << "output is tensor " << it->first << "\n";
-    auto outputs = tensor->MutableData();
-    size_t size;
-    std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
-    auto *bin_buf = ReadFileBuf(output_file.c_str(), &size);
-    if (bin_buf == nullptr) {
-      MS_LOG(ERROR) << "ReadFile return nullptr";
-      return RET_ERROR;
-    }
-
-    if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) {
-      if (static_cast<int>(size / sizeof(float)) != tensor->ElementsNum()) {
-        MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size()
-                      << ", read size: " << size / sizeof(float);
-        return RET_ERROR;
-      }
-    } else {
-      if (size != tensor->Size()) {
-        MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size()
-                      << ", read size: " << size;
-        return RET_ERROR;
-      }
-    }
-    float bias = 0.f;
-    if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) {
-#ifdef ENABLE_FP16
-      bias = CompareData<float16_t>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float16_t *>(outputs));
-#endif
-    } else {
-      bias = CompareData<float>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float *>(outputs));
-    }
-    if (bias >= 0) {
-      total_bias += bias;
-      total_size++;
-    } else {
-      has_error = true;
-      break;
-    }
-    i++;
-    delete[] bin_buf;
-  }
-
-  if (!has_error) {
-    float mean_bias;
-    if (total_size != 0) {
-      mean_bias = total_bias / total_size * 100;
-    } else {
-      mean_bias = 0;
-    }
-
-    std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%"
-              << " threshold is:" << this->flags_->accuracy_threshold_ << std::endl;
-    std::cout << "=======================================================" << std::endl << std::endl;
-
-    if (mean_bias > this->flags_->accuracy_threshold_) {
-      MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
-      std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
-      return RET_ERROR;
-    } else {
-      return RET_OK;
-    }
-  } else {
-    MS_LOG(ERROR) << "Error in CompareData";
-    std::cerr << "Error in CompareData" << std::endl;
-    std::cout << "=======================================================" << std::endl << std::endl;
-    return RET_ERROR;
-  }
-}
-int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session) {
+int NetTrain::CompareOutput(const session::LiteSession &lite_session) {
  std::cout << "================ Comparing Forward Output data ================" << std::endl;
  float total_bias = 0;
  int total_size = 0;
  bool has_error = false;
-  auto tensors_list = lite_session->GetOutputs();
+  auto tensors_list = lite_session.GetOutputs();
  if (tensors_list.empty()) {
    MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
    return RET_ERROR;
@ -253,9 +165,9 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit
  mindspore::tensor::MSTensor *tensor = nullptr;
  int i = 1;
  for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) {
-    tensor = lite_session->GetOutputByTensorName(it->first);
+    tensor = lite_session.GetOutputByTensorName(it->first);
    std::cout << "output is tensor " << it->first << "\n";
-    auto outputs = tensor->MutableData();
+    auto outputs = tensor->data();
    size_t size;
    std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
    auto *bin_buf = ReadFileBuf(output_file.c_str(), &size);
@ -307,7 +219,7 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit
  }
 }

-int NetTrain::MarkPerformance() {
+int NetTrain::MarkPerformance(session::TrainSession *session) {
  MS_LOG(INFO) << "Running train loops...";
  std::cout << "Running train loops..." << std::endl;
  uint64_t time_min = 0xFFFFFFFFFFFFFFFF;
@ -315,10 +227,10 @@ int NetTrain::MarkPerformance() {
  uint64_t time_avg = 0;

  for (int i = 0; i < flags_->epochs_; i++) {
-    session_->BindThread(true);
+    session->BindThread(true);
    auto start = GetTimeUs();
    auto status =
-      flags_->time_profiling_ ? session_->RunGraph(before_call_back_, after_call_back_) : session_->RunGraph();
+      flags_->time_profiling_ ? session->RunGraph(before_call_back_, after_call_back_) : session->RunGraph();
    if (status != 0) {
      MS_LOG(ERROR) << "Inference error " << status;
      std::cerr << "Inference error " << status;
@ -330,7 +242,7 @@ int NetTrain::MarkPerformance() {
    time_min = std::min(time_min, time);
    time_max = std::max(time_max, time);
    time_avg += time;
-    session_->BindThread(false);
+    session->BindThread(false);
  }

  if (flags_->time_profiling_) {
@ -352,10 +264,9 @@ int NetTrain::MarkPerformance() {
  return RET_OK;
 }

-int NetTrain::MarkAccuracy() {
+int NetTrain::MarkAccuracy(session::LiteSession *session) {
  MS_LOG(INFO) << "MarkAccuracy";
-  std::cout << "MarkAccuracy" << std::endl;
-  for (auto &msInput : ms_inputs_) {
+  for (auto &msInput : session->GetInputs()) {
    switch (msInput->data_type()) {
      case TypeId::kNumberTypeFloat:
        PrintInputData<float>(msInput);
@ -371,50 +282,14 @@ int NetTrain::MarkAccuracy() {
        return RET_ERROR;
    }
  }
-  session_->Eval();
-
-  auto status = session_->RunGraph(before_call_back_, after_call_back_);
+  auto status = session->RunGraph();
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Inference error " << status;
    std::cerr << "Inference error " << status << std::endl;
    return status;
  }

-  status = CompareOutput();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Compare output error " << status;
-    std::cerr << "Compare output error " << status << std::endl;
-    return status;
-  }
-  return RET_OK;
-}
-int NetTrain::MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session) {
-  MS_LOG(INFO) << "MarkAccuracy";
-  std::cout << "MarkAccuracy" << std::endl;
-  for (auto &msInput : ms_inputs_) {
-    switch (msInput->data_type()) {
-      case TypeId::kNumberTypeFloat:
-        PrintInputData<float>(msInput);
-        break;
-      case TypeId::kNumberTypeFloat32:
-        PrintInputData<float>(msInput);
-        break;
-      case TypeId::kNumberTypeInt32:
-        PrintInputData<int>(msInput);
-        break;
-      default:
-        MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported.";
-        return RET_ERROR;
-    }
-  }
-  auto status = lite_session->RunGraph();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Inference error " << status;
-    std::cerr << "Inference error " << status << std::endl;
-    return status;
-  }
-
-  status = CompareOutputLite(lite_session);
+  status = CompareOutput(*session);
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Compare output error " << status;
    std::cerr << "Compare output error " << status << std::endl;
@ -423,228 +298,106 @@ int NetTrain::MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite
  return RET_OK;
 }

-int NetTrain::RunExportedNet() {
+static CpuBindMode FlagToBindMode(int flag) {
+  if (flag == 2) {
+    return MID_CPU;
+  }
+  if (flag == 1) {
+    return HIGHER_CPU;
+  }
+  return NO_BIND;
+}
+
+int NetTrain::CreateAndRunNetwork(const std::string &filename, int train_session, int epochs) {
  auto start_prepare_time = GetTimeUs();
-  // Load graph
-  std::string model_name = flags_->export_file_.substr(flags_->export_file_.find_last_of(DELIM_SLASH) + 1);
+  std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1);
+  Context context;
+  context.device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = FlagToBindMode(flags_->cpu_bind_mode_);
+  context.device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_;
+  context.device_list_[0].device_type_ = mindspore::lite::DT_CPU;
+  context.thread_num_ = flags_->num_threads_;

-  MS_LOG(INFO) << "start reading exported model file";
-  std::cout << "start reading exported model file" << std::endl;
-  auto context = std::make_shared<Context>();
-  if (context == nullptr) {
-    MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
-    std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
-    return RET_ERROR;
-  }
-
-  if (flags_->cpu_bind_mode_ == 2) {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
-  } else if (flags_->cpu_bind_mode_ == 1) {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
-  } else {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
-  }
-
-  context->thread_num_ = flags_->num_threads_;
-
-  auto *model = mindspore::lite::Model::Import(flags_->export_file_.c_str());
+  MS_LOG(INFO) << "start reading model file" << filename.c_str();
+  std::cout << "start reading model file " << filename.c_str() << std::endl;
+  auto *model = mindspore::lite::Model::Import(filename.c_str());
  if (model == nullptr) {
    MS_LOG(ERROR) << "create model for train session failed";
    return RET_ERROR;
  }

-  session_ = session::TrainSession::CreateSession(model, context.get());
-  if (session_ == nullptr) {
-    MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
-    std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
-    return RET_ERROR;
-  }
-  if (flags_->loss_name_ != "") {
-    session_->SetLossName(flags_->loss_name_);
-  }
-  ms_inputs_ = session_->GetInputs();
-  auto end_prepare_time = GetTimeUs();
-  MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
-  std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
-
-  // Load input
-  MS_LOG(INFO) << "start generate input data";
-  auto status = LoadInput();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Generate input data error";
-    return status;
-  }
-
-  if (!flags_->data_file_.empty()) {
-    MS_LOG(INFO) << "Check accuracy for exported model";
-    std::cout << "Check accuracy for exported model " << std::endl;
-    status = MarkAccuracy();
-    for (auto &data : data_) {
-      data.second->shape.clear();
-      data.second->data.clear();
-      delete data.second;
-    }
-    data_.clear();
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status;
-      std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl;
-      return status;
-    }
-  }
-  return RET_OK;
-}
-
-int NetTrain::RunExportedNetLite(std::string file_name) {
-  auto start_prepare_time = GetTimeUs();
-  // Load graph
-  std::string model_name = file_name.substr(file_name.find_last_of(DELIM_SLASH) + 1);
-
-  MS_LOG(INFO) << "start reading exported model file";
-  std::cout << "reading " << file_name << std::endl;
-  auto context = std::make_shared<Context>();
-  if (context == nullptr) {
-    MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
-    std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
-    return RET_ERROR;
-  }
-
-  if (flags_->cpu_bind_mode_ == 2) {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
-  } else if (flags_->cpu_bind_mode_ == 1) {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
-  } else {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
-  }
-
-  context->thread_num_ = flags_->num_threads_;
-
-  auto *model = mindspore::lite::Model::Import(file_name.c_str());
-  if (model == nullptr) {
-    MS_LOG(ERROR) << "create model for lite session failed";
-    return RET_ERROR;
-  }
-  auto lite_session = std::unique_ptr<session::LiteSession>(session::LiteSession::CreateSession(context.get()));
-  if (lite_session == nullptr) {
-    MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
-    std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
-    return RET_ERROR;
-  }
-  if (lite_session->CompileGraph(model) != RET_OK) {
-    MS_LOG(ERROR) << "Cannot compile model";
-    delete model;
-    return RET_ERROR;
-  }
-  ms_inputs_ = lite_session->GetInputs();
-  auto end_prepare_time = GetTimeUs();
-  MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
-  std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
-
-  // Load input
-  MS_LOG(INFO) << "start generate input data";
-  auto status = LoadInput();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Generate input data error";
-    delete model;
-    return status;
-  }
-  if (!flags_->data_file_.empty()) {
-    MS_LOG(INFO) << "Check accuracy for exported model";
-    std::cout << "Check accuracy for exported model " << std::endl;
-    status = MarkAccuracyLite(lite_session);
-    for (auto &data : data_) {
-      data.second->shape.clear();
-      data.second->data.clear();
-      delete data.second;
-    }
-    data_.clear();
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status;
-      std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl;
+  session::LiteSession *session = nullptr;
+  session::TrainSession *t_session = nullptr;
+  if (train_session) {
+    t_session = session::TrainSession::CreateSession(model, &context);
+    if (t_session == nullptr) {
+      MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str();
+      std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl;
      delete model;
-      return status;
+      return RET_ERROR;
    }
-  }
-  delete model;
-  return RET_OK;
-}

-int NetTrain::RunNetTrain() {
-  auto start_prepare_time = GetTimeUs();
-  // Load graph
-  std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
-
-  MS_LOG(INFO) << "start reading model file";
-  std::cout << "start reading model file" << std::endl;
-  auto context = std::make_shared<Context>();
-  if (context == nullptr) {
-    MS_LOG(ERROR) << "New context failed while running " << model_name.c_str();
-    std::cerr << "New context failed while running " << model_name.c_str() << std::endl;
-    return RET_ERROR;
-  }
-
-  if (flags_->cpu_bind_mode_ == 2) {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU;
-  } else if (flags_->cpu_bind_mode_ == 1) {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU;
+    if (flags_->loss_name_ != "") {
+      t_session->SetLossName(flags_->loss_name_);
+    }
+    if (epochs > 0) {
+      t_session->Train();
+    }
+    session = t_session;
  } else {
-    context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND;
-  }
-  context->device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_;
-  layer_checksum_ = flags_->layer_checksum_;
-  context->thread_num_ = flags_->num_threads_;
-
-  auto *model = mindspore::lite::Model::Import(flags_->model_file_.c_str());
-  if (model == nullptr) {
-    MS_LOG(ERROR) << "create model for train session failed";
-    return RET_ERROR;
-  }
-  session_ = session::TrainSession::CreateSession(model, context.get());
-  if (session_ == nullptr) {
-    MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str();
-    std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl;
-    return RET_ERROR;
+    session = session::LiteSession::CreateSession(&context);
+    if (session == nullptr) {
+      MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str();
+      std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl;
+      delete model;
+      return RET_ERROR;
+    }
+    if (session->CompileGraph(model) != RET_OK) {
+      MS_LOG(ERROR) << "Cannot compile model";
+      delete model;
+      return RET_ERROR;
+    }
+    delete model;
  }

-  if (flags_->loss_name_ != "") {
-    session_->SetLossName(flags_->loss_name_);
-  }
-  session_->Train();
-
-  ms_inputs_ = session_->GetInputs();
  auto end_prepare_time = GetTimeUs();
  MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms";
  std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl;
-
  // Load input
-  MS_LOG(INFO) << "start generate input data";
-  auto status = LoadInput();
+  MS_LOG(INFO) << "Load input data";
+  auto ms_inputs = session->GetInputs();
+  auto status = LoadInput(&ms_inputs);
  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Generate input data error";
+    MS_LOG(ERROR) << "Load input data error";
    return status;
  }
-  if (flags_->epochs_ > 0) {
-    status = MarkPerformance();
+
+  if ((epochs > 0) && (t_session != nullptr)) {
+    status = MarkPerformance(t_session);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
      std::cout << "Run MarkPerformance error: " << status << std::endl;
      return status;
    }
+    SaveModels(t_session, model);  // save file if flags are on
  }
  if (!flags_->data_file_.empty()) {
-    status = MarkAccuracy();
-    for (auto &data : data_) {
-      data.second->shape.clear();
-      data.second->data.clear();
-      delete data.second;
+    if (t_session != nullptr) {
+      t_session->Eval();
    }
-    data_.clear();
+    status = MarkAccuracy(session);
    if (status != RET_OK) {
      MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
      std::cout << "Run MarkAccuracy error: " << status << std::endl;
      return status;
    }
  }
-  status = CheckExecute(model);
+  return RET_OK;
+}
+
+int NetTrain::RunNetTrain() {
+  CreateAndRunNetwork(flags_->model_file_, true, flags_->epochs_);
+
+  auto status = CheckExecutionOfSavedModels();  // re-initialize sessions according to flags
  if (status != RET_OK) {
    MS_LOG(ERROR) << "Run CheckExecute error: " << status;
    std::cout << "Run CheckExecute error: " << status << std::endl;
@ -653,8 +406,7 @@ int NetTrain::RunNetTrain() {
  return RET_OK;
 }

-int NetTrain::CheckExecute(mindspore::lite::Model *model) {
-  int status;
+int NetTrain::SaveModels(session::TrainSession *session, mindspore::lite::Model *model) {
  if (!flags_->export_file_.empty()) {
    auto ret = Model::Export(model, flags_->export_file_.c_str());
    if (ret != RET_OK) {
@ -662,67 +414,39 @@ int NetTrain::CheckExecute(mindspore::lite::Model *model) {
      std::cout << "Run SaveToFile error";
      return RET_ERROR;
    }
-    delete session_;
-    session_ = nullptr;
-    status = RunExportedNet();
+  }
+  if (!flags_->inference_file_.empty()) {
+    auto tick = GetTimeUs();
+    auto status = session->ExportInference(flags_->inference_file_);
    if (status != RET_OK) {
-      MS_LOG(ERROR) << "Run Exported model error: " << status;
-      std::cout << "Run Exported model error: " << status << std::endl;
+      MS_LOG(ERROR) << "Save model error: " << status;
+      std::cout << "Save model error: " << status << std::endl;
      return status;
    }
-  } else {
-    if (!flags_->inference_file_.empty()) {
-      auto tick = GetTimeUs();
-      status = session_->ExportInference(flags_->inference_file_);
-      if (status != RET_OK) {
-        MS_LOG(ERROR) << "Save model error: " << status;
-        std::cout << "Save model error: " << status << std::endl;
-        return status;
-      }
-      std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
-      delete session_;
-      session_ = nullptr;
-
-      status = RunExportedNetLite(flags_->inference_file_ + ".ms");
-      if (status != RET_OK) {
-        MS_LOG(ERROR) << "Running saved model error: " << status;
-        std::cout << "Running saved model error: " << status << std::endl;
-        return status;
-      }
-    }
+    std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
  }
  return RET_OK;
 }

-void NetTrainFlags::InitInputDataList() {
-  char *saveptr1 = nullptr;
-  char *input_list = new char[this->in_data_file_.length() + 1];
-  snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str());
-  const char *split_c = ",";
-  char *cur_input = strtok_r(input_list, split_c, &saveptr1);
-  while (cur_input != nullptr) {
-    input_data_list_.emplace_back(cur_input);
-    cur_input = strtok_r(nullptr, split_c, &saveptr1);
-  }
-  delete[] input_list;
-}
-
-void NetTrainFlags::InitResizeDimsList() {
-  std::string content;
-  content = this->resize_dims_in_;
-  std::vector<int64_t> shape;
-  auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
-  for (const auto &shape_str : shape_strs) {
-    shape.clear();
-    auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
-    std::cout << "Resize Dims: ";
-    for (const auto &dim_str : dim_strs) {
-      std::cout << dim_str << " ";
-      shape.emplace_back(static_cast<int64_t>(std::stoi(dim_str)));
+int NetTrain::CheckExecutionOfSavedModels() {
+  int status = RET_OK;
+  if (!flags_->export_file_.empty()) {
+    status = NetTrain::CreateAndRunNetwork(flags_->export_file_, true, 0);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status;
+      std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl;
+      return status;
    }
-    std::cout << std::endl;
-    this->resize_dims_.emplace_back(shape);
  }
+  if (!flags_->inference_file_.empty()) {
+    status = NetTrain::CreateAndRunNetwork(flags_->inference_file_ + ".ms", false, 0);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status;
+      std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl;
+      return status;
+    }
+  }
+  return status;
 }

 int NetTrain::InitCallbackParameter() {
@ -766,7 +490,7 @@ int NetTrain::InitCallbackParameter() {
    op_times_by_type_[call_param.node_type].second += cost;
    op_times_by_name_[call_param.node_name].first++;
    op_times_by_name_[call_param.node_name].second += cost;
-    if (layer_checksum_) {
+    if (flags_->layer_checksum_) {
      auto out_tensor = after_outputs.at(0);
      void *output = out_tensor->MutableData();
      int tensor_size = out_tensor->ElementsNum();
@ -841,13 +565,6 @@ int NetTrain::Init() {
    std::cerr << "modelPath is required" << std::endl;
    return 1;
  }
-  flags_->InitInputDataList();
-  flags_->InitResizeDimsList();
-  if (!flags_->resize_dims_.empty() && flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
-    MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
-    std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
-    return RET_ERROR;
-  }

  if (flags_->time_profiling_) {
    auto status = InitCallbackParameter();
@ -925,14 +642,6 @@ int NetTrain::PrintResult(const std::vector<std::string> &title,
  return RET_OK;
 }

-NetTrain::~NetTrain() {
-  for (auto iter : this->data_) {
-    delete (iter.second);
-  }
-  this->data_.clear();
-  if (session_ != nullptr) delete (session_);
-}
-
 int RunNetTrain(int argc, const char **argv) {
  NetTrainFlags flags;
  Option<std::string> err = flags.ParseFlags(argc, argv);
--- a/mindspore/lite/tools/benchmark_train/net_train.h
+++ b/mindspore/lite/tools/benchmark_train/net_train.h
@ -42,15 +42,6 @@ enum MS_API DataType { kImage = 0, kBinary = 1 };
 constexpr float relativeTolerance = 1e-5;
 constexpr float absoluteTolerance = 1e-8;

-struct MS_API CheckTensor {
-  CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) {
-    this->shape = shape;
-    this->data = data;
-  }
-  std::vector<size_t> shape;
-  std::vector<float> data;
-};
-
 template <typename T>
 float TensorSum(void *data, int size) {
  T *typed_data = reinterpret_cast<T *>(data);
@ -84,10 +75,6 @@ class MS_API NetTrainFlags : public virtual FlagParser {

  ~NetTrainFlags() override = default;

-  void InitInputDataList();
-
-  void InitResizeDimsList();
-
 public:
  // common
  std::string model_file_;
@ -118,25 +105,22 @@ class MS_API NetTrainFlags : public virtual FlagParser {
 class MS_API NetTrain {
 public:
  explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {}
-
-  virtual ~NetTrain();
+  virtual ~NetTrain() = default;

  int Init();
  int RunNetTrain();
-  int RunExportedNet();

 private:
  // call GenerateInputData or ReadInputFile to init inputTensors
-  int LoadInput();
+  int LoadInput(Vector<tensor::MSTensor *> *ms_inputs);

  // call GenerateRandomData to fill inputTensors
-  int GenerateInputData();
+  int GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);

  int GenerateRandomData(size_t size, void *data);

-  int ReadInputFile();
-
-  int CompareOutput();
+  int ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs);
+  int CreateAndRunNetwork(const std::string &filename, int train_session, int epochs);

  int InitCallbackParameter();

@ -208,22 +192,13 @@ class MS_API NetTrain {
    return meanError;
  }

-  int MarkPerformance();
+  int MarkPerformance(session::TrainSession *session);

-  int MarkAccuracy();
-
- private:
-  int RunExportedNetLite(std::string file_name);
-  int MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session);
-  int CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session);
-  int CheckExecute(mindspore::lite::Model *model);
+  int MarkAccuracy(session::LiteSession *lite_session);
+  int CompareOutput(const session::LiteSession &lite_session);
+  int SaveModels(session::TrainSession *session, mindspore::lite::Model *model);
+  int CheckExecutionOfSavedModels();
  NetTrainFlags *flags_;
-  session::TrainSession *session_ = nullptr;
-  std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
-  std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ms_outputs_;
-  std::unordered_map<std::string, CheckTensor *> data_;
-  std::unordered_map<std::string, TypeId> data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat},
-                                                         {"INT32", TypeId::kNumberTypeInt32}};

  // callback parameters
  uint64_t op_begin_ = 0;
@ -234,7 +209,6 @@ class MS_API NetTrain {

  mindspore::KernelCallBack before_call_back_;
  mindspore::KernelCallBack after_call_back_;
-  bool layer_checksum_ = false;
 };

 int MS_API RunNetTrain(int argc, const char **argv);
--- a/mindspore/lite/tools/common/graph_util.h
+++ b/mindspore/lite/tools/common/graph_util.h
@ -143,7 +143,7 @@ bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_
  tensor->data.resize(new_data_str.size());

  tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING;
-  MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING";
+  MS_LOG(DEBUG) << "set WeightQunatCompressType_INDEXING";
  return true;
 }

@ -285,21 +285,21 @@ bool PackRepetition(size_t bit_num, schema::TensorT *tensor) {
  auto pack_sparsity_size_in_bit =
    1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit;
  size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0);
-  MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
+  MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte
                << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte;
  auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte});
  if (min_byte_need == origin_size_in_byte) {
    return false;
  } else if (min_byte_need == pack_repetition_size_in_byte) {
-    MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
+    MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte;
    return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
                               pack_repetition_size_in_byte, bit_num, tensor);
  } else if (min_byte_need == pack_sparsity_size_in_byte) {
-    MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
+    MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte;
    return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt,
                               pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor);
  } else {
-    MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
+    MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " "
                  << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}";
  }
  return false;
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@ -22,6 +22,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        ${CMAKE_CURRENT_SOURCE_DIR}/graphdef_transform.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/optimizer.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/quant_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../common/graph_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../common/node_util.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
@ -14,14 +14,16 @@
 * limitations under the License.
 */

+#include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h"
 #include <vector>
 #include <cmath>
-#include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h"
+#include <algorithm>
 #include "tools/converter/converter_context.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "tools/common/tensor_util.h"
 #include "tools/common/graph_util.h"
 #include "tools/common/node_util.h"
+#include "src/common/quant_utils.h"

 namespace mindspore::lite {
 namespace {
@ -49,7 +51,7 @@ STATUS ComputeDataToInt8(const std::unique_ptr<TensorT> &tensor, int32_t index)
      return RET_OK;
    }
    for (size_t j = 0; j < wShapeSize; j++) {
-      qDatas[j] = quant::QuantizeData<int8_t>(weightData[j], weightQauntParam.get());
+      qDatas[j] = QuantizeData<int8_t>(weightData[j], weightQauntParam.get());
    }
  } else {  // convert uint8 to int8
    auto *weightData = static_cast<uint8_t *>(oriWeightData);
@ -141,7 +143,7 @@ STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, co
        auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
        dst_data_int32[index] = quant_data;
      } else {
-        auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
+        auto quant_data = QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
        dst_data[index] = quant_data;
      }
    }
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@ -44,6 +44,7 @@
 #include "securec/include/securec.h"
 #include "tools/common/tensor_util.h"
 #include "src/common/file_utils.h"
+#include "src/common/quant_utils.h"
 #include "src/common/utils.h"
 #include "tools/converter/quantizer/weight_quantizer.h"

@ -1282,8 +1283,7 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
    return status;
  }

-  if (calibrator_->config_param_.mixed) {
-    // get opname_bit map
+  if (calibrator_->config_param_.mixed) {  // get opname_bit map
    auto weight_quant_func_graph = CopyFuncGraph(func_graph);
    if (weight_quant_func_graph == nullptr) {
      MS_LOG(ERROR) << "CopyFuncGraph error";
@ -1315,7 +1315,6 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
    MS_LOG(ERROR) << "create session failed!";
    return RET_ERROR;
  }
-
  MS_LOG(INFO) << "start to update divergence's max value";
  status = DoInference();
  if (status != RET_OK) {
@ -1363,14 +1362,12 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) {
      MS_LOG(ERROR) << "create session failed!";
      return RET_ERROR;
    }
-
    MS_LOG(INFO) << "do bias correction";
    status = BiasCorrection(func_graph);
    if (status != RET_OK) {
      MS_LOG(WARNING) << "BiasCorrection failed.";
    }
  }
-
  return RET_OK;
 }

@ -1477,7 +1474,7 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) {
        quant_param_t.scale = quant_params[0].scale;
        quant_param_t.zeroPoint = quant_params[0].zeroPoint;
        for (auto float_data : fp32_op_input) {
-          auto quant_data = QuantizeData<int8_t>(float_data, quant_param_t, quant_max, quant_min);
+          auto quant_data = QuantizeData<int8_t>(float_data, &quant_param_t, quant_max, quant_min);
          quant_datas.push_back(quant_data);
        }

--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@ -100,12 +100,12 @@ bool QuantStrategy::CanConvOpQuantized(const CNodePtr &node) const {
  return true;
 }

-bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
+bool QuantStrategy::CanOpPostQuantized(const AnfNodePtr &node) const {
  MS_ASSERT(node != nullptr);
  if (!node->isa<mindspore::CNode>()) {
    return false;
  }
-  auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
+  const auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node);
  auto type = NodePrimitiveType(cnode);
  static const std::vector<std::string> int8OpList = {
    ops::kNameAddFusion,     ops::kNameActivation,      ops::kNameAvgPoolFusion,
@ -268,67 +268,6 @@ bool TensorQuantParamsInited(const schema::TensorT &tensor) {
  return true;
 }

-STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
-                             int quant_min, int num_bits) {
-  MS_ASSERT(quantParam != nullptr);
-  if (mMin > 0.0f) {
-    MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision";
-    mMin = 0.0f;
-  }
-  if (mMax < 0.0f) {
-    MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision";
-    mMax = 0.0f;
-  }
-  if (mMin > mMax) {
-    MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax;
-    return RET_PARAM_INVALID;
-  }
-  if (mMin == mMax) {
-    if (mMin != 0.0f) {
-      MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other";
-      return RET_ERROR;
-    }
-    quantParam->inited = true;
-    quantParam->min = mMin;
-    quantParam->max = mMax;
-    quantParam->scale = 0.0f;
-    quantParam->zeroPoint = 0;
-    quantParam->narrowRange = narrowRange;
-    quantParam->numBits = num_bits;
-    return RET_OK;
-  }
-
-  auto quantMinFloat = static_cast<double>(quant_min);
-  auto quantMaxFloat = static_cast<double>(quant_max);
-  if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) {
-    MS_LOG(ERROR) << "divisor cannot be 0";
-    return RET_ERROR;
-  }
-  double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat);
-  if (fabs(scale) <= 0.0f) {
-    MS_LOG(ERROR) << "divisor 'scale' cannot be 0";
-    return RET_ERROR;
-  }
-  const double zeroPointFromMin = quantMinFloat - mMin / scale;
-  int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin));
-  if (scale < SCALE_THREASHOLD) {
-    zeroPoint = 0;
-  }
-  // The zero point should always be in the range of quantized value,
-  // [qmin, qmax].
-  MS_ASSERT(zeroPoint >= quantMin);
-  MS_ASSERT(zeroPoint <= quantMax);
-  quantParam->inited = true;
-  quantParam->min = mMin;
-  quantParam->max = mMax;
-  quantParam->scale = scale;
-  quantParam->zeroPoint = zeroPoint;
-  quantParam->narrowRange = narrowRange;
-  quantParam->numBits = num_bits;
-
-  return RET_OK;
-}
-
 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int numBits) {
  MS_ASSERT(quantParam != nullptr);
  if (mMin > 0.0f) {
@ -999,26 +938,6 @@ STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_data
  return RET_OK;
 }

-void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
-                         bool channel_at_first, float *desired_max, float *desired_min) {
-  float min = FLT_MAX;
-  float max = -FLT_MAX;
-  // find min and max
-  for (int j = 0; j < one_filter_size; j++) {
-    auto index = j + i * one_filter_size;
-    if (!channel_at_first) {
-      index = j * channels + i;
-    }
-    if (index >= elem_count) {
-      MS_LOG(ERROR) << "over flow!";
-    }
-    min = std::min(min, raw_datas[index]);
-    max = std::max(max, raw_datas[index]);
-  }
-  *desired_max = max;
-  *desired_min = min;
-}
-
 int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first) {
  auto channels = dims[0];
  if (!(*channel_at_first)) {
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@ -43,6 +43,7 @@
 #include "src/lite_session.h"
 #include "tools/converter/graphdef_transform.h"
 #include "src/common/file_utils.h"
+#include "src/common/quant_utils.h"

 namespace mindspore::lite::quant {
 static constexpr size_t UINT8_QUANTIZATION = 8;
@ -82,7 +83,7 @@ class QuantStrategy {

  bool CanConvOpQuantized(const CNodePtr &node) const;
  bool CanMulOpQuantized(const CNodePtr &node) const;
-  bool CanOpPostQuantized(AnfNodePtr &node) const;
+  bool CanOpPostQuantized(const AnfNodePtr &node) const;
  bool CanTensorQuantized(const AnfNodePtr &inputNode) const;

  size_t m_weight_size_;
@ -100,9 +101,6 @@ constexpr int quant_param_size = 32 * 8;

 QuantParamHolderPtr GetCNodeQuantHolder(const PrimitivePtr &primitive);

-STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max,
-                             int quant_min, int num_bits);
-
 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false,
                             int numBits = UINT8_QUANTIZATION);

@ -112,9 +110,6 @@ std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epoc

 STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_datas, int new_size, TypeId new_data_type);

-void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
-                         bool channel_at_first, float *desired_max, float *desired_min);
-
 int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first);

 void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
@ -123,193 +118,10 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
 void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
                       bool *channel_at_first, int *channel_cnt);

-template <typename T>
-T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
-  MS_ASSERT(quantParam != nullptr);
-  MS_ASSERT(quantParam->inited);
-  const auto scale = quantParam->scale;
-  const auto zeroPoint = quantParam->zeroPoint;
-  const auto numBit = quantParam->numBits;
-  const auto narrowRange = quantParam->narrowRange;
-  double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1);
-  const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale;
-  double minLimit;
-  if (narrowRange) {
-    minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale;
-  } else {
-    minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale;
-  }
-
-  return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
-    double tmp;
-    if (originData > maxLimit) {
-      tmp = maxLimit;
-    } else if (originData < minLimit) {
-      tmp = minLimit;
-    } else {
-      tmp = originData;
-    }
-    auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale));
-    return quantData;
-  }();
-}
-
-template <typename T>
-T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quant_max, int quant_min) {
-  MS_ASSERT(quantParam != nullptr);
-  MS_ASSERT(quantParam->inited);
-  const auto scale = quantParam.scale;
-  const int zeroPoint = quantParam.zeroPoint;
-  const auto narrowRange = quantParam.narrowRange;
-  const int maxLimit = quant_max;
-  const int minLimit = quant_min;
-  if (scale <= SCALE_THREASHOLD) {
-    return 0;
-  }
-  return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] {
-    auto quant_data = std::round(originData / scale + zeroPoint);
-    if (quant_data > maxLimit) {
-      quant_data = maxLimit;
-    } else if (quant_data < minLimit) {
-      quant_data = minLimit;
-    }
-    return static_cast<T>(quant_data);
-  }();
-}
-
 bool QuantParamEqual(const schema::QuantParamT &quant_param1, const schema::QuantParamT &quant_param2);

 bool TensorQuantParamsInited(const schema::TensorT &tensor);

-template <typename T>
-STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
-                         std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
-                         const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
-                         std::vector<float> *dequant_datas, TypeId quant_data_type, bool channel_at_first = true,
-                         int channel_cnt = -1) {
-  auto dims = weight->shape();
-  size_t elem_count = weight->DataSize();
-  auto *raw_datas = static_cast<float *>(weight->data_c());
-  auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
-  if (channels == 0) {
-    MS_LOG(ERROR) << "channels is zero";
-    return RET_ERROR;
-  }
-  size_t one_filter_size = elem_count / channels;
-  bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size;
-  if (!do_quant && quant_type == QuantType_WeightQuant) {
-    MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size;
-    return RET_CONTINUE;
-  }
-  for (int i = 0; i < channels; i++) {
-    float min = FLT_MAX;
-    float max = -FLT_MAX;
-    GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min);
-    schema::QuantParamT quant_param;
-    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
-      return status;
-    }
-    // do quantization
-    double average_dequant = 0;
-    double average_raw = 0;
-    for (uint32_t j = 0; j < one_filter_size; j++) {
-      auto index = j + i * one_filter_size;
-      if (!channel_at_first) {
-        index = j * channels + i;
-      }
-      MS_ASSERT(index < elem_count);
-      float raw_data = raw_datas[index];
-      auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min);
-      (*quant_datas)[index] = quant_data;
-
-      if (quant_type == QuantType_WeightQuant) {
-        float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint);
-        (*dequant_datas)[index] = dequant_data;
-        average_dequant += dequant_data;
-        average_raw += raw_data;
-      }
-    }
-    if (quant_type == QuantType_WeightQuant && !k_means) {
-      // mean
-      average_dequant = average_dequant / one_filter_size;
-      average_raw = average_raw / one_filter_size;
-      // std
-      double variance_dequant = 0;
-      double variance_raw = 0;
-      for (uint32_t j = 0; j < one_filter_size; j++) {
-        auto index = j + i * one_filter_size;
-        if (!channel_at_first) {
-          index = j * channels + i;
-        }
-        MS_ASSERT(index < elem_count);
-        variance_dequant += std::pow((*dequant_datas)[index] - average_dequant, 2);
-        variance_raw += std::pow(raw_datas[index] - average_raw, 2);
-      }
-      variance_dequant = std::sqrt(variance_dequant / one_filter_size);
-      variance_raw = std::sqrt(variance_raw / one_filter_size);
-      quant_param.varCorr = 1;
-      if (variance_raw != 0 && variance_dequant != 0) {
-        auto temp_var_corr = variance_raw / variance_dequant;
-        if (temp_var_corr > 0 && temp_var_corr < 10) {
-          quant_param.varCorr = temp_var_corr;
-        } else {
-          MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr;
-        }
-      }
-      quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr;
-    }
-    quant_params->emplace_back(quant_param);
-  }
-  auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-template <typename T>
-STATUS DoPerLayerQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
-                       std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
-                       const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
-                       TypeId quant_data_type) {
-  auto dims = weight->shape();
-  size_t elem_count = weight->DataSize();
-  auto *raw_datas = static_cast<float *>(weight->data_c());
-  float min = FLT_MAX;
-  float max = -FLT_MIN;
-  for (uint32_t i = 0; i < elem_count; i++) {
-    // find max min
-    min = std::min(min, raw_datas[i]);
-    max = std::max(max, raw_datas[i]);
-  }
-
-  schema::QuantParamT quant_param;
-  if (!k_means) {
-    STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "CalQuantizationParams failed" << status;
-      return status;
-    }
-  }
-  quant_params->emplace_back(quant_param);
-  // update data and datatype
-  for (uint32_t i = 0; i < elem_count; i++) {
-    float raw_data = raw_datas[i];
-    if (!k_means) {
-      auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min);
-      (*quant_datas)[i] = quant_data;
-    }
-  }
-  auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
 template <typename T>
 STATUS DoBitPack(const tensor::TensorPtr &weight, const size_t &bit_num, const std::vector<T> &quant_datas) {
  if (bit_num != 8 && bit_num != 16) {
@ -363,15 +175,19 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
  }

  std::vector<T> quant_data(elem_count);
-  std::vector<float> dequant_datas(elem_count);
  int ret = RET_OK;
  if (per_channel) {
    bool channel_at_first = true;
    int channel_cnt = -1;
    CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt);
-    // channel at first
-    ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
-                               &dequant_datas, quant_data_type, channel_at_first, channel_cnt);
+    auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
+    if (channels == 0) {
+      MS_LOG(ERROR) << "channels is zero";
+      return RET_ERROR;
+    }
+    ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(),
+                               static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max,
+                               quant_min, bit_num, k_means, &quant_data, channels, channel_at_first);
    if (ret == RET_CONTINUE) {
      return ret;
    } else if (ret != RET_OK) {
@ -379,13 +195,18 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
      return ret;
    }
  } else {
-    ret = DoPerLayerQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
-                             quant_data_type);
+    ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max,
+                             quant_min, bit_num, k_means, &quant_data);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "Do per layer quant failed.";
      return ret;
    }
  }
+  auto status = UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(T), quant_data_type);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
+    return RET_ERROR;
+  }

 #ifdef HUFFMAN_ENCODE
  auto huffman_encode = std::make_unique<lite::HuffmanEncode>();