From 7dfac75de27d7caa58cb57954b788c60949d0cb5 Mon Sep 17 00:00:00 2001
From: kai00 <wangchangkai@huawei.com>
Date: Fri, 11 Sep 2020 12:54:04 +0800
Subject: [PATCH] weight quant fix

---
 mindspore/lite/src/lite_kernel.cc             | 47 ++++++++++++
 mindspore/lite/src/lite_kernel.h              |  4 ++
 mindspore/lite/src/ops/primitive_c.cc         |  4 +-
 .../kernel/arm/base/convolution_base.cc       | 47 ------------
 .../kernel/arm/base/convolution_base.h        |  2 -
 .../kernel/arm/base/fullconnection_base.cc    | 65 ++++-------------
 .../kernel/arm/base/fullconnection_base.h     |  1 -
 .../runtime/kernel/arm/base/matmul_base.cc    | 69 +++++-------------
 .../src/runtime/kernel/arm/base/matmul_base.h |  1 -
 .../kernel/arm/fp32/arithmetic_self.cc        | 46 ------------
 .../runtime/kernel/arm/fp32/arithmetic_self.h |  1 -
 .../runtime/kernel/arm/fp32/convolution.cc    | 15 +++-
 .../kernel/arm/fp32/convolution_depthwise.cc  | 15 +++-
 .../lite/src/runtime/kernel/arm/fp32/scale.cc | 71 +++++--------------
 .../lite/src/runtime/kernel/arm/fp32/scale.h  |  1 -
 .../kernel/arm/fp32_grad/apply_momentum.cc    |  2 +-
 .../lite/test/models_tflite_weightquant.cfg   |  1 +
 mindspore/lite/test/run_benchmark_nets.sh     | 33 +++++++++
 .../anf_importer/import_from_meta_graphT.cc   |  2 +-
 .../lite/tools/converter/anf_transform.cc     |  7 +-
 .../lite/tools/converter/converter_flags.cc   |  2 +-
 .../graph/weight_format_hardcode_pass.cc      |  3 +
 .../converter/quantizer/weight_quantizer.cc   | 14 ++--
 .../converter/quantizer/weight_quantizer.h    |  3 +-
 24 files changed, 186 insertions(+), 270 deletions(-)
 create mode 100644 mindspore/lite/test/models_tflite_weightquant.cfg
diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc
index 7fa5420e968..7e6752b26c4 100644
--- a/mindspore/lite/src/lite_kernel.cc
+++ b/mindspore/lite/src/lite_kernel.cc
@@ -163,4 +163,51 @@ void LiteKernelUtil::InitTensorRefCount(std::vector<kernel::LiteKernel *> &kerne
 }
 
 int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs) { return -1; }
+
+float *LiteKernelUtil::DequantWeight(lite::Tensor *input_tensor) {
+  MS_ASSERT(input_tensor != nullptr);
+  if (input_tensor->data_type() != kNumberTypeInt8) {
+    MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type();
+    return nullptr;
+  }
+  if (input_tensor->GetQuantParams().empty()) {
+    MS_LOG(ERROR) << "no quant param";
+    return nullptr;
+  }
+  const auto *quant_data = static_cast<const int8_t *>(input_tensor->MutableData());
+  auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
+  if (dequant_data == nullptr) {
+    MS_LOG(ERROR) << "malloc faile";
+    return nullptr;
+  }
+
+  if (input_tensor->GetQuantParams().size() != kPerTensor) {
+    size_t channels = static_cast<size_t>(input_tensor->Batch());
+    if (input_tensor->GetQuantParams().size() != channels) {
+      MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
+      free(dequant_data);
+      return nullptr;
+    }
+    size_t per_channel_size = input_tensor->ElementsNum() / channels;
+    auto quant_param = input_tensor->GetQuantParams();
+    for (size_t i = 0; i < channels; i++) {
+      auto param = quant_param.at(i);
+      auto scale = param.scale;
+      auto zero_point = param.zeroPoint;
+      for (size_t j = 0; j < per_channel_size; j++) {
+        dequant_data[per_channel_size * i + j] =
+          static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
+      }
+    }
+  } else {
+    auto quant_param = input_tensor->GetQuantParams();
+    auto param = quant_param.front();
+    auto scale = param.scale;
+    auto zero_point = param.zeroPoint;
+    for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
+      dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
+    }
+  }
+  return dequant_data;
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h
index e1196b82616..1ff4314ea84 100644
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -28,6 +28,8 @@
 #include "src/tensor.h"
 #include "include/errorcode.h"
 
+static constexpr int kPerTensor = 1;
+
 // using mindspore::kernel::AddressPtr;
 namespace mindspore::kernel {
 using mindspore::lite::RET_ERROR;
@@ -202,6 +204,8 @@ class LiteKernelUtil {
   static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels);
 
   static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs);
+
+  static float *DequantWeight(lite::Tensor *input_tensor);
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/ops/primitive_c.cc b/mindspore/lite/src/ops/primitive_c.cc
index 912db6aef2f..d81e217847b 100644
--- a/mindspore/lite/src/ops/primitive_c.cc
+++ b/mindspore/lite/src/ops/primitive_c.cc
@@ -657,8 +657,8 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) {
       return new PowerGrad(primitive);
     case schema::PrimitiveType_BNGradInput:
       return new BNGradInput(primitive);
-    case schema::PrimitiveType_SoftmaxCrossEntroy:
-      return new SoftmaxCrossEntroy(primitive);
+    case schema::PrimitiveType_SoftmaxCrossEntropy:
+      return new SoftmaxCrossEntropy(primitive);
     case schema::PrimitiveType_Depend:
       return new Depend(primitive);
     case schema::PrimitiveType_FlattenGrad:
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
index 3c905ba8f8d..52aea1ad9c4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
@@ -324,51 +324,4 @@ int ConvolutionBaseCPUKernel::SetQuantParam() {
                                     &conv_param_->conv_quant_arg_.out_act_max_[0]);
   return RET_OK;
 }
-int ConvolutionBaseCPUKernel::RestoreFilter(lite::Tensor *input_tensor) {
-  MS_ASSERT(input_tensor != nullptr);
-  if (input_tensor->data_type() != kNumberTypeUInt8) {
-    MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type();
-    return RET_ERROR;
-  }
-  if (input_tensor->GetQuantParams().empty()) {
-    MS_LOG(ERROR) << "no quant param";
-    return RET_ERROR;
-  }
-  const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
-  auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
-  if (dequant_data == nullptr) {
-    MS_LOG(ERROR) << "malloc faile";
-    return RET_ERROR;
-  }
-
-  if (input_tensor->GetQuantParams().size() != kPerTensor) {
-    size_t channels = static_cast<size_t>(input_tensor->Batch());
-    if (input_tensor->GetQuantParams().size() != channels) {
-      MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
-      free(dequant_data);
-      return RET_ERROR;
-    }
-    size_t per_channel_size = input_tensor->ElementsNum() / channels;
-    auto quant_param = input_tensor->GetQuantParams();
-    for (size_t i = 0; i < channels; i++) {
-      auto param = quant_param.at(i);
-      auto scale = param.scale;
-      auto zero_point = param.zeroPoint;
-      for (size_t j = 0; j < per_channel_size; j++) {
-        dequant_data[per_channel_size * i + j] =
-          static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
-      }
-    }
-  } else {
-    auto quant_param = input_tensor->GetQuantParams();
-    auto param = quant_param.front();
-    auto scale = param.scale;
-    auto zero_point = param.zeroPoint;
-    for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
-      dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
-    }
-  }
-  input_tensor->SetData(dequant_data);
-  return RET_OK;
-}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
index 447e61533d8..9f54577d178 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
@@ -32,7 +32,6 @@
 using mindspore::lite::Context;
 using mindspore::schema::PadMode;
 using mindspore::schema::QuantType;
-static constexpr int kPerTensor = 1;
 
 namespace mindspore::kernel {
 class ConvolutionBaseCPUKernel : public LiteKernel {
@@ -60,7 +59,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
   int SetQuantMultiplier();
   int CheckResizeValid();
   void FreeQuantParam();
-  static int RestoreFilter(lite::Tensor *input_tensor);
 
  protected:
   int tile_num_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
index dfd5d88125e..7ff75ba0b4c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
@@ -53,56 +53,6 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::T
   }
   return kernel;
 }
-int RestoreFullconnectWeight(lite::Tensor *input_tensor) {
-  MS_ASSERT(input_tensor != nullptr);
-  if (input_tensor->data_type() != kNumberTypeInt8) {
-    MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type();
-    return RET_ERROR;
-  }
-  if (input_tensor->GetQuantParams().empty()) {
-    MS_LOG(ERROR) << "no quant param";
-    return RET_ERROR;
-  }
-  const auto *quant_data = static_cast<const int8_t *>(input_tensor->MutableData());
-  if (quant_data == nullptr) {
-    MS_LOG(ERROR) << "input_tensor MutableData is nullptr.";
-    return RET_ERROR;
-  }
-  auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
-  if (dequant_data == nullptr) {
-    MS_LOG(ERROR) << "malloc faile";
-    return RET_ERROR;
-  }
-
-  if (input_tensor->GetQuantParams().size() != kPerTensor) {
-    size_t channels = static_cast<size_t>(input_tensor->Batch());
-    if (input_tensor->GetQuantParams().size() != channels) {
-      MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
-      return RET_ERROR;
-    }
-    size_t per_channel_size = input_tensor->ElementsNum() / channels;
-    auto quant_param = input_tensor->GetQuantParams();
-    for (size_t i = 0; i < channels; i++) {
-      auto param = quant_param.at(i);
-      auto scale = param.scale;
-      auto zero_point = param.zeroPoint;
-      for (size_t j = 0; j < per_channel_size; j++) {
-        dequant_data[per_channel_size * i + j] =
-          static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
-      }
-    }
-  } else {
-    auto quant_param = input_tensor->GetQuantParams();
-    auto param = quant_param.front();
-    auto scale = param.scale;
-    auto zero_point = param.zeroPoint;
-    for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
-      dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
-    }
-  }
-  input_tensor->SetData(dequant_data);
-  return RET_OK;
-}
 kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                        const std::vector<lite::Tensor *> &outputs,
                                                        OpParameter *opParameter, const lite::Context *ctx,
@@ -114,11 +64,20 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::T
   // data of second tensor of fc may be nullptr
   auto *restore_data = weight_tensor->data_c();
   if (!weight_tensor->GetQuantParams().empty()) {
-    RestoreFullconnectWeight(inputs.at(kWeightIndex));
+    auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
+    if (dequant_weight == nullptr) {
+      MS_LOG(ERROR) << "dequant data is nullptr.";
+      return nullptr;
+    }
+    weight_tensor->SetData(dequant_weight);
   }
   auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (!kernel) {
     MS_LOG(ERROR) << "kernel is nullptr.";
+    if (!weight_tensor->GetQuantParams().empty()) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
   auto ret = kernel->Init();
@@ -126,6 +85,10 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::T
     delete kernel;
     MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
                   << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
+    if (!weight_tensor->GetQuantParams().empty()) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
   if (!weight_tensor->GetQuantParams().empty()) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
index c9e6b42f614..924915b391d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
@@ -23,7 +23,6 @@
 #include "nnacl/matmul_parameter.h"
 
 using mindspore::lite::Context;
-static constexpr int kPerTensor = 1;
 
 namespace mindspore::kernel {
 class FullconnectionBaseCPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
index b10a3e52126..ab1a4996490 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
@@ -26,56 +26,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_MatMul;
 
 namespace mindspore::kernel {
-int RestoreMatmulWeight(lite::Tensor *input_tensor) {
-  MS_ASSERT(input_tensor != nullptr);
-  if (input_tensor->data_type() != kNumberTypeUInt8) {
-    MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type();
-    return RET_ERROR;
-  }
-  if (input_tensor->GetQuantParams().empty()) {
-    MS_LOG(ERROR) << "no quant param";
-    return RET_ERROR;
-  }
-  const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
-  if (quant_data == nullptr) {
-    MS_LOG(ERROR) << "input_tensor MutableData is nullptr.";
-    return RET_ERROR;
-  }
-  auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
-  if (dequant_data == nullptr) {
-    MS_LOG(ERROR) << "malloc faile";
-    return RET_ERROR;
-  }
-
-  if (input_tensor->GetQuantParams().size() != kPerTensor) {
-    size_t channels = static_cast<size_t>(input_tensor->Batch());
-    if (input_tensor->GetQuantParams().size() != channels) {
-      MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
-      return RET_ERROR;
-    }
-    size_t per_channel_size = input_tensor->ElementsNum() / channels;
-    auto quant_param = input_tensor->GetQuantParams();
-    for (size_t i = 0; i < channels; i++) {
-      auto param = quant_param.at(i);
-      auto scale = param.scale;
-      auto zero_point = param.zeroPoint;
-      for (size_t j = 0; j < per_channel_size; j++) {
-        dequant_data[per_channel_size * i + j] =
-          static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
-      }
-    }
-  } else {
-    auto quant_param = input_tensor->GetQuantParams();
-    auto param = quant_param.front();
-    auto scale = param.scale;
-    auto zero_point = param.zeroPoint;
-    for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
-      dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
-    }
-  }
-  input_tensor->SetData(dequant_data);
-  return RET_OK;
-}
 kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                            const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
                                            const lite::Context *ctx, const kernel::KernelKey &desc,
@@ -89,8 +39,13 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in
     MS_LOG(ERROR) << "weight_tensor MutableData is nullptr.";
     return nullptr;
   }
-  if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
-    RestoreMatmulWeight(inputs.at(kWeightIndex));
+  if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+    auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
+    if (dequant_weight == nullptr) {
+      MS_LOG(ERROR) << "dequant data is nullptr.";
+      return nullptr;
+    }
+    weight_tensor->SetData(dequant_weight);
   }
 
   auto input_tensor = inputs.at(kInputIndex);
@@ -103,6 +58,10 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in
   }
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "kernel is nullptr.";
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
   auto ret = kernel->Init();
@@ -110,10 +69,14 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in
     delete kernel;
     MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
                   << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
 
-  if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+  if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
     weight_tensor->FreeData();
     weight_tensor->SetData(restore_data);
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
index f9c07c38ea1..3cec23c4625 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
@@ -23,7 +23,6 @@
 #include "nnacl/matmul_parameter.h"
 
 using mindspore::lite::Context;
-static constexpr int kPerTensor = 1;
 
 namespace mindspore::kernel {
 class MatmulBaseCPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
index 9fdcb21bdb4..026c4047ca1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
@@ -69,52 +69,6 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
   }
   return RET_OK;
 }
-int RestoreMulWeight(lite::Tensor *input_tensor) {
-  MS_ASSERT(input_tensor != nullptr);
-  if (input_tensor->data_type() != kNumberTypeUInt8) {
-    MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type();
-    return RET_ERROR;
-  }
-  if (input_tensor->GetQuantParams().empty()) {
-    MS_LOG(ERROR) << "no quant param";
-    return RET_ERROR;
-  }
-  const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
-  auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
-  if (dequant_data == nullptr) {
-    MS_LOG(ERROR) << "malloc faile";
-    return RET_ERROR;
-  }
-
-  if (input_tensor->GetQuantParams().size() != kPerTensor) {
-    size_t channels = static_cast<size_t>(input_tensor->Batch());
-    if (input_tensor->GetQuantParams().size() != channels) {
-      MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
-      return RET_ERROR;
-    }
-    size_t per_channel_size = input_tensor->ElementsNum() / channels;
-    auto quant_param = input_tensor->GetQuantParams();
-    for (size_t i = 0; i < channels; i++) {
-      auto param = quant_param.at(i);
-      auto scale = param.scale;
-      auto zero_point = param.zeroPoint;
-      for (size_t j = 0; j < per_channel_size; j++) {
-        dequant_data[per_channel_size * i + j] =
-          static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
-      }
-    }
-  } else {
-    auto quant_param = input_tensor->GetQuantParams();
-    auto param = quant_param.front();
-    auto scale = param.scale;
-    auto zero_point = param.zeroPoint;
-    for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
-      dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
-    }
-  }
-  input_tensor->SetData(dequant_data);
-  return RET_OK;
-}
 int ArithmeticSelfCPUKernel::Run() {
   auto ret = Prepare();
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
index 7de629b6fa8..254c529f588 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
@@ -37,7 +37,6 @@ using mindspore::schema::PrimitiveType_Sin;
 using mindspore::schema::PrimitiveType_Sqrt;
 using mindspore::schema::PrimitiveType_Square;
 using mindspore::schema::PrimitiveType_Neg;
-static constexpr int kPerTensor = 1;
 
 namespace mindspore::kernel {
 class ArithmeticSelfCPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
index 4c1198013a9..ba2dfc8a7c9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
@@ -235,7 +235,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
   auto *weight_tensor = inputs.at(kWeightIndex);
   auto *restore_data = weight_tensor->MutableData();
   if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
-    ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex));
+    auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
+    if (dequant_weight == nullptr) {
+      MS_LOG(ERROR) << "dequant data is nullptr.";
+      return nullptr;
+    }
+    weight_tensor->SetData(dequant_weight);
   }
 
   kernel::LiteKernel *kernel;
@@ -253,6 +258,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
   }
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "kernel is nullptr.";
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
   auto ret = kernel->Init();
@@ -260,6 +269,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
     delete kernel;
     MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
                   << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
index b24e05cf9b7..5588e315616 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
@@ -134,7 +134,12 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *>
   auto *weight_tensor = inputs.at(kWeightIndex);
   auto *restore_data = weight_tensor->MutableData();
   if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
-    ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex));
+    auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
+    if (dequant_weight == nullptr) {
+      MS_LOG(ERROR) << "dequant data is nullptr.";
+      return nullptr;
+    }
+    weight_tensor->SetData(dequant_weight);
   }
 
   auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
@@ -146,6 +151,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *>
   }
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "kernel is nullptr.";
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
   auto ret = kernel->Init();
@@ -153,6 +162,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *>
     delete kernel;
     MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
                   << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
index b9a41da299c..06ce5321512 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
@@ -169,69 +169,32 @@ int ScaleCPUKernel::Run() {
   }
   return RET_OK;
 }
-int RestoreScaleWeight(lite::Tensor *input_tensor) {
-  MS_ASSERT(input_tensor != nullptr);
-  if (input_tensor->data_type() != kNumberTypeUInt8) {
-    MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type();
-    return RET_ERROR;
-  }
-  if (input_tensor->GetQuantParams().empty()) {
-    MS_LOG(ERROR) << "no quant param";
-    return RET_ERROR;
-  }
-  const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
-  auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
-  if (dequant_data == nullptr) {
-    MS_LOG(ERROR) << "malloc faile";
-    return RET_ERROR;
-  }
-
-  if (input_tensor->GetQuantParams().size() != kPerTensor) {
-    size_t channels = static_cast<size_t>(input_tensor->Batch());
-    if (input_tensor->GetQuantParams().size() != channels) {
-      MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
-      return RET_ERROR;
-    }
-    size_t per_channel_size = input_tensor->ElementsNum() / channels;
-    auto quant_param = input_tensor->GetQuantParams();
-    for (size_t i = 0; i < channels; i++) {
-      auto param = quant_param.at(i);
-      auto scale = param.scale;
-      auto zero_point = param.zeroPoint;
-      for (size_t j = 0; j < per_channel_size; j++) {
-        dequant_data[per_channel_size * i + j] =
-          static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
-      }
-    }
-  } else {
-    auto quant_param = input_tensor->GetQuantParams();
-    auto param = quant_param.front();
-    auto scale = param.scale;
-    auto zero_point = param.zeroPoint;
-    for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
-      dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
-    }
-  }
-  input_tensor->SetData(dequant_data);
-  return RET_OK;
-}
 kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
                                               const lite::Context *ctx, const kernel::KernelKey &desc,
                                               const mindspore::lite::PrimitiveC *primitive) {
   MS_ASSERT(desc.type == schema::PrimitiveType_Scale);
-  auto *weight_tensor = inputs.at(kWeightIndex);
-  auto *restore_data = weight_tensor->MutableData();
-  if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
-    RestoreScaleWeight(inputs.at(kWeightIndex));
-  }
   if (opParameter == nullptr) {
     MS_LOG(ERROR) << "opParameter is nullptr";
     return nullptr;
   }
+  auto *weight_tensor = inputs.at(kWeightIndex);
+  auto *restore_data = weight_tensor->MutableData();
+  if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+    auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
+    if (dequant_weight == nullptr) {
+      MS_LOG(ERROR) << "dequant data is nullptr.";
+      return nullptr;
+    }
+    weight_tensor->SetData(dequant_weight);
+  }
   auto *kernel = new (std::nothrow) ScaleCPUKernel(opParameter, inputs, outputs, ctx, primitive);
   if (kernel == nullptr) {
     MS_LOG(ERROR) << "New kernel fails.";
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
 
@@ -240,9 +203,13 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *>
     MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
                   << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
     delete kernel;
+    if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+      weight_tensor->FreeData();
+      weight_tensor->SetData(restore_data);
+    }
     return nullptr;
   }
-  if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
+  if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
     weight_tensor->FreeData();
     weight_tensor->SetData(restore_data);
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
index 01b334f10bb..e4492fc591d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
@@ -21,7 +21,6 @@
 #include "src/lite_kernel.h"
 #include "nnacl/fp32/scale.h"
 
-static constexpr int kPerTensor = 1;
 namespace mindspore::kernel {
 
 class ScaleCPUKernel : public LiteKernel {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
index 4c88b3e7ca2..41e45c0d82c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc
@@ -65,7 +65,7 @@ int ApplyMomentumCPUKernel::Init() {
   // Only for test with uninitialized Data
   size_t elem_num = in_tensors_[0]->ElementsNum();
   auto accumulate = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
-  for (int i = 0; i < elem_num; i++) accumulate[i] = 0.0;
+  for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0;
 
   workspace = new float[elem_num];
   return 0;
diff --git a/mindspore/lite/test/models_tflite_weightquant.cfg b/mindspore/lite/test/models_tflite_weightquant.cfg
new file mode 100644
index 00000000000..dfc9d8397ae
--- /dev/null
+++ b/mindspore/lite/test/models_tflite_weightquant.cfg
@@ -0,0 +1 @@
+ml_face_openclose.tflite
diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh
index a0ce9217f97..797483aa70b 100644
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@@ -127,6 +127,27 @@ function Run_x86() {
             return 1
         fi
     done < ${models_mindspore_config}
+
+    # Run tflite weight quantization converted models:
+    while read line; do
+        model_name=${line}
+        if [[ $model_name == \#* ]]; then
+          continue
+        fi
+        echo ${model_name} >> "${run_benchmark_log_file}"
+        echo 'cd  '${convertor_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_benchmark_log_file}"
+        cd ${convertor_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1' >> "${run_benchmark_log_file}"
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}_weightquant.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --warmUpLoopCount=1 --loopCount=1 >> "${run_benchmark_log_file}"
+        if [ $? = 0 ]; then
+            run_result='x86: '${model_name}'_weightquant pass'
+            echo ${run_result} >> ${run_benchmark_result_file}
+        else
+            run_result='x86: '${model_name}'_weightquant failed'
+            echo ${run_result} >> ${run_benchmark_result_file}
+            return 1
+        fi
+    done < ${models_tflite_weightquant_config}
 }
 
 # Run on arm64 platform:
@@ -432,6 +453,7 @@ models_tflite_config=${basepath}/models_tflite.cfg
 models_caffe_config=${basepath}/models_caffe.cfg
 models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg
 models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg
+models_tflite_weightquant_config=${basepath}/models_tflite_weightquant.cfg
 models_onnx_config=${basepath}/models_onnx.cfg
 models_fp16_config=${basepath}/models_fp16.cfg
 models_mindspore_config=${basepath}/models_mindspore.cfg
@@ -522,6 +544,17 @@ while read line; do
   cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms
 done < ${models_fp16_config}
 
+# Convert weightquant models:
+while read line; do
+    model_name=${line}
+    if [[ $model_name == \#* ]]; then
+      continue
+    fi
+    echo ${model_name} >> "${run_benchmark_log_file}"
+    echo './converter_lite  --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16' >> "${run_benchmark_log_file}"
+    ./converter_lite  --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16 || Convert_status=$?
+done < ${models_tflite_weightquant_config}
+
 # Check all result and return value
 if [[ ${Convert_status} = 0 ]];then
     echo "convert is ended"
diff --git a/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc b/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc
index 907f219ccb4..682122c142a 100644
--- a/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc
+++ b/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc
@@ -72,7 +72,7 @@ ValueNodePtr AnfImporterFromMetaGraphT::ConvertPrimitive(const std::unique_ptr<s
   auto primitiveCValue = PrimitiveC::Create(cNode->primitive.release());
   cNode->primitive = nullptr;
   // add quant parameter
-  if (cNode->quantType != schema::QuantType_PostTraining) {
+  if (cNode->quantType != schema::QuantType_PostTraining && cNode->quantType != schema::QuantType_WeightQuant) {
     primitiveCValue->SetQuantType(cNode->quantType);
     for (int index : cNode->inputIndex) {
       if (meta_graph_->allTensors[index]->quantParams.size() > 0) {
diff --git a/mindspore/lite/tools/converter/anf_transform.cc b/mindspore/lite/tools/converter/anf_transform.cc
index 1d731543030..12d89535c19 100644
--- a/mindspore/lite/tools/converter/anf_transform.cc
+++ b/mindspore/lite/tools/converter/anf_transform.cc
@@ -64,10 +64,15 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver
         return nullptr;
       }
     } else if (config->quantType == schema::QuantType_WeightQuant) {
+      auto bitNum = static_cast<size_t>(std::stoull(config->bitNum));
+      if (bitNum != quant::UINT8_QUANTIZATION) {
+        MS_LOG(ERROR) << "Current Only Support 8 bit weight quant";
+        return nullptr;
+      }
       this->mQuantizer = std::make_unique<quant::WeightQuantizer>(
         new_graph, config->quantSize, config->convWeightQuantChannelThreshold, config->bitNum);
       if (mQuantizer == nullptr) {
-        MS_LOG(ERROR) << "New PostTrainingQuantizer failed";
+        MS_LOG(ERROR) << "New WeightQuantizer failed";
         return nullptr;
       }
     }
diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc
index 0afb39fdaf5..8e17a68cfe0 100644
--- a/mindspore/lite/tools/converter/converter_flags.cc
+++ b/mindspore/lite/tools/converter/converter_flags.cc
@@ -31,7 +31,7 @@ Flags::Flags() {
           "Input model weight file path. Needed when fmk is CAFFE. CAFFE: *.caffemodel", "");
   AddFlag(&Flags::inferenceTypeIn, "inferenceType",
           "Real data type saved in output file, reserved param, NOT used for now. FLOAT | INT8", "FLOAT");
-  AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining", "");
+  AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining | WeightQuant", "");
   AddFlag(&Flags::inputInferenceTypeIn, "inputInferenceType", "Input inference data type. FLOAT | INT8", "FLOAT");
   AddFlag(&Flags::stdDev, "stdDev", "Standard deviation value for aware-quantization", "128");
   AddFlag(&Flags::mean, "mean", "Mean value for aware-quantization", "-0.5");
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc
index 7c396c2d3e5..a4bea1892b6 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc
@@ -86,6 +86,7 @@ STATUS WeightFormatHardCodePass::HardCodeCAFFE(const std::unique_ptr<CNodeT> &no
   MS_ASSERT(node->primitive != nullptr);
   auto opType = node->primitive->value.type;
   switch (this->quantType) {
+    case QuantType_WeightQuant:
     case QuantType_QUANT_NONE: {
       if (opType == schema::PrimitiveType_Conv2D || opType == schema::PrimitiveType_DepthwiseConv2D ||
           opType == schema::PrimitiveType_DeConv2D || opType == schema::PrimitiveType_DeDepthwiseConv2D) {
@@ -123,6 +124,7 @@ STATUS WeightFormatHardCodePass::HardCodeONNX(const std::unique_ptr<CNodeT> &nod
         return RET_ERROR;
       }
     } break;
+    case QuantType_WeightQuant:
     case QuantType_QUANT_NONE: {
       // conv (K x C/group x kH x kW) group = 1
       // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W)
@@ -162,6 +164,7 @@ STATUS WeightFormatHardCodePass::HardCodeMS(const std::unique_ptr<CNodeT> &node,
         weightTensor->format = schema::Format::Format_KCHW;
       }
     } break;
+    case QuantType_WeightQuant:
     case QuantType_QUANT_NONE: {
       // sum up from current ms quant models
       if (opType == PrimitiveType_Conv2D) {
diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
index 1fa02f84b76..e6c1110d7e4 100644
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
@@ -66,13 +66,14 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) {
 
     ParamValueLitePtr param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param());
     auto status =
-      QuantFilter<uint8_t>(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, depthwise);
+      QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant,
+        quant_max, quant_min, bitNum, true, depthwise);
     if (status != RET_OK) {
       MS_LOG(ERROR) << "QuantFilter failed : " << status;
       return status;
     }
     // set dtype
-    param_value->set_tensor_type(kNumberTypeUInt8);
+    param_value->set_tensor_type(kNumberTypeInt8);
     auto abstractBase = param_node->abstract();
     if (abstractBase == nullptr) {
       MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name();
@@ -83,7 +84,7 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) {
       return RET_ERROR;
     }
     auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase);
-    abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8));
+    abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8));
     primitive_c->SetQuantType(schema::QuantType_WeightQuant);
   }
 
@@ -128,12 +129,13 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) {
 
     std::vector<schema::QuantParamT> quant_params;
     primitive_c->AddInputQuantParam(quant_params);
-    auto status = QuantFilter<uint8_t>(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, false);
+    auto status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant,
+      quant_max, quant_min, bitNum, true, false);
     if (status != RET_OK) {
       MS_LOG(ERROR) << "QuantFilter failed : " << status;
       return status;
     }
-    param_value->set_tensor_type(kNumberTypeUInt8);
+    param_value->set_tensor_type(kNumberTypeInt8);
     // set dtype
     auto abstractBase = param_node->abstract();
     if (abstractBase == nullptr) {
@@ -145,7 +147,7 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) {
       return RET_ERROR;
     }
     auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase);
-    abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8));
+    abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8));
     primitive_c->SetQuantType(schema::QuantType_WeightQuant);
   }
 
diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
index d91c6b7b88e..7485343873b 100644
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h
@@ -41,7 +41,8 @@ class WeightQuantizer : public Quantizer {
   STATUS DoQuantize(FuncGraphPtr funcGraph) override;
   STATUS DoConvQuantize(const std::list<CNodePtr> &nodes);
   STATUS DoMulQuantize(const std::list<CNodePtr> &nodes);
-
+  int quant_max{INT8_MAX};
+  int quant_min{INT8_MIN};
  private:
   std::unique_ptr<QuantStrategy> mStrategy;
   size_t bitNum;