From 7dfac75de27d7caa58cb57954b788c60949d0cb5 Mon Sep 17 00:00:00 2001 From: kai00 Date: Fri, 11 Sep 2020 12:54:04 +0800 Subject: [PATCH] weight quant fix --- mindspore/lite/src/lite_kernel.cc | 47 ++++++++++++ mindspore/lite/src/lite_kernel.h | 4 ++ mindspore/lite/src/ops/primitive_c.cc | 4 +- .../kernel/arm/base/convolution_base.cc | 47 ------------ .../kernel/arm/base/convolution_base.h | 2 - .../kernel/arm/base/fullconnection_base.cc | 65 ++++------------- .../kernel/arm/base/fullconnection_base.h | 1 - .../runtime/kernel/arm/base/matmul_base.cc | 69 +++++------------- .../src/runtime/kernel/arm/base/matmul_base.h | 1 - .../kernel/arm/fp32/arithmetic_self.cc | 46 ------------ .../runtime/kernel/arm/fp32/arithmetic_self.h | 1 - .../runtime/kernel/arm/fp32/convolution.cc | 15 +++- .../kernel/arm/fp32/convolution_depthwise.cc | 15 +++- .../lite/src/runtime/kernel/arm/fp32/scale.cc | 71 +++++-------------- .../lite/src/runtime/kernel/arm/fp32/scale.h | 1 - .../kernel/arm/fp32_grad/apply_momentum.cc | 2 +- .../lite/test/models_tflite_weightquant.cfg | 1 + mindspore/lite/test/run_benchmark_nets.sh | 33 +++++++++ .../anf_importer/import_from_meta_graphT.cc | 2 +- .../lite/tools/converter/anf_transform.cc | 7 +- .../lite/tools/converter/converter_flags.cc | 2 +- .../graph/weight_format_hardcode_pass.cc | 3 + .../converter/quantizer/weight_quantizer.cc | 14 ++-- .../converter/quantizer/weight_quantizer.h | 3 +- 24 files changed, 186 insertions(+), 270 deletions(-) create mode 100644 mindspore/lite/test/models_tflite_weightquant.cfg diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc index 7fa5420e968..7e6752b26c4 100644 --- a/mindspore/lite/src/lite_kernel.cc +++ b/mindspore/lite/src/lite_kernel.cc @@ -163,4 +163,51 @@ void LiteKernelUtil::InitTensorRefCount(std::vector &kerne } int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector inputs) { return -1; } + +float *LiteKernelUtil::DequantWeight(lite::Tensor *input_tensor) { + MS_ASSERT(input_tensor != nullptr); + if (input_tensor->data_type() != kNumberTypeInt8) { + MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type(); + return nullptr; + } + if (input_tensor->GetQuantParams().empty()) { + MS_LOG(ERROR) << "no quant param"; + return nullptr; + } + const auto *quant_data = static_cast(input_tensor->MutableData()); + auto *dequant_data = static_cast(malloc(input_tensor->ElementsNum() * sizeof(float))); + if (dequant_data == nullptr) { + MS_LOG(ERROR) << "malloc faile"; + return nullptr; + } + + if (input_tensor->GetQuantParams().size() != kPerTensor) { + size_t channels = static_cast(input_tensor->Batch()); + if (input_tensor->GetQuantParams().size() != channels) { + MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; + free(dequant_data); + return nullptr; + } + size_t per_channel_size = input_tensor->ElementsNum() / channels; + auto quant_param = input_tensor->GetQuantParams(); + for (size_t i = 0; i < channels; i++) { + auto param = quant_param.at(i); + auto scale = param.scale; + auto zero_point = param.zeroPoint; + for (size_t j = 0; j < per_channel_size; j++) { + dequant_data[per_channel_size * i + j] = + static_cast((quant_data[per_channel_size * i + j] - zero_point) * scale); + } + } + } else { + auto quant_param = input_tensor->GetQuantParams(); + auto param = quant_param.front(); + auto scale = param.scale; + auto zero_point = param.zeroPoint; + for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { + dequant_data[j] = static_cast((quant_data[j] - zero_point) * scale); + } + } + return dequant_data; +} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index e1196b82616..1ff4314ea84 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -28,6 +28,8 @@ #include "src/tensor.h" #include "include/errorcode.h" +static constexpr int kPerTensor = 1; + // using mindspore::kernel::AddressPtr; namespace mindspore::kernel { using mindspore::lite::RET_ERROR; @@ -202,6 +204,8 @@ class LiteKernelUtil { static void InitTensorRefCount(std::vector &kernels); static int SetInput(LiteKernel &kernelMod, std::vector inputs); + + static float *DequantWeight(lite::Tensor *input_tensor); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/ops/primitive_c.cc b/mindspore/lite/src/ops/primitive_c.cc index 912db6aef2f..d81e217847b 100644 --- a/mindspore/lite/src/ops/primitive_c.cc +++ b/mindspore/lite/src/ops/primitive_c.cc @@ -657,8 +657,8 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { return new PowerGrad(primitive); case schema::PrimitiveType_BNGradInput: return new BNGradInput(primitive); - case schema::PrimitiveType_SoftmaxCrossEntroy: - return new SoftmaxCrossEntroy(primitive); + case schema::PrimitiveType_SoftmaxCrossEntropy: + return new SoftmaxCrossEntropy(primitive); case schema::PrimitiveType_Depend: return new Depend(primitive); case schema::PrimitiveType_FlattenGrad: diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc index 3c905ba8f8d..52aea1ad9c4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc @@ -324,51 +324,4 @@ int ConvolutionBaseCPUKernel::SetQuantParam() { &conv_param_->conv_quant_arg_.out_act_max_[0]); return RET_OK; } -int ConvolutionBaseCPUKernel::RestoreFilter(lite::Tensor *input_tensor) { - MS_ASSERT(input_tensor != nullptr); - if (input_tensor->data_type() != kNumberTypeUInt8) { - MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type(); - return RET_ERROR; - } - if (input_tensor->GetQuantParams().empty()) { - MS_LOG(ERROR) << "no quant param"; - return RET_ERROR; - } - const auto *quant_data = static_cast(input_tensor->MutableData()); - auto *dequant_data = static_cast(malloc(input_tensor->ElementsNum() * sizeof(float))); - if (dequant_data == nullptr) { - MS_LOG(ERROR) << "malloc faile"; - return RET_ERROR; - } - - if (input_tensor->GetQuantParams().size() != kPerTensor) { - size_t channels = static_cast(input_tensor->Batch()); - if (input_tensor->GetQuantParams().size() != channels) { - MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; - free(dequant_data); - return RET_ERROR; - } - size_t per_channel_size = input_tensor->ElementsNum() / channels; - auto quant_param = input_tensor->GetQuantParams(); - for (size_t i = 0; i < channels; i++) { - auto param = quant_param.at(i); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (size_t j = 0; j < per_channel_size; j++) { - dequant_data[per_channel_size * i + j] = - static_cast((quant_data[per_channel_size * i + j] - zero_point) * scale); - } - } - } else { - auto quant_param = input_tensor->GetQuantParams(); - auto param = quant_param.front(); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { - dequant_data[j] = static_cast((quant_data[j] - zero_point) * scale); - } - } - input_tensor->SetData(dequant_data); - return RET_OK; -} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h index 447e61533d8..9f54577d178 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h @@ -32,7 +32,6 @@ using mindspore::lite::Context; using mindspore::schema::PadMode; using mindspore::schema::QuantType; -static constexpr int kPerTensor = 1; namespace mindspore::kernel { class ConvolutionBaseCPUKernel : public LiteKernel { @@ -60,7 +59,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel { int SetQuantMultiplier(); int CheckResizeValid(); void FreeQuantParam(); - static int RestoreFilter(lite::Tensor *input_tensor); protected: int tile_num_; diff --git a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc index dfd5d88125e..7ff75ba0b4c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc @@ -53,56 +53,6 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vectordata_type() != kNumberTypeInt8) { - MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type(); - return RET_ERROR; - } - if (input_tensor->GetQuantParams().empty()) { - MS_LOG(ERROR) << "no quant param"; - return RET_ERROR; - } - const auto *quant_data = static_cast(input_tensor->MutableData()); - if (quant_data == nullptr) { - MS_LOG(ERROR) << "input_tensor MutableData is nullptr."; - return RET_ERROR; - } - auto *dequant_data = static_cast(malloc(input_tensor->ElementsNum() * sizeof(float))); - if (dequant_data == nullptr) { - MS_LOG(ERROR) << "malloc faile"; - return RET_ERROR; - } - - if (input_tensor->GetQuantParams().size() != kPerTensor) { - size_t channels = static_cast(input_tensor->Batch()); - if (input_tensor->GetQuantParams().size() != channels) { - MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; - return RET_ERROR; - } - size_t per_channel_size = input_tensor->ElementsNum() / channels; - auto quant_param = input_tensor->GetQuantParams(); - for (size_t i = 0; i < channels; i++) { - auto param = quant_param.at(i); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (size_t j = 0; j < per_channel_size; j++) { - dequant_data[per_channel_size * i + j] = - static_cast((quant_data[per_channel_size * i + j] - zero_point) * scale); - } - } - } else { - auto quant_param = input_tensor->GetQuantParams(); - auto param = quant_param.front(); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { - dequant_data[j] = static_cast((quant_data[j] - zero_point) * scale); - } - } - input_tensor->SetData(dequant_data); - return RET_OK; -} kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::Context *ctx, @@ -114,11 +64,20 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vectordata_c(); if (!weight_tensor->GetQuantParams().empty()) { - RestoreFullconnectWeight(inputs.at(kWeightIndex)); + auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); + if (dequant_weight == nullptr) { + MS_LOG(ERROR) << "dequant data is nullptr."; + return nullptr; + } + weight_tensor->SetData(dequant_weight); } auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive); if (!kernel) { MS_LOG(ERROR) << "kernel is nullptr."; + if (!weight_tensor->GetQuantParams().empty()) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } auto ret = kernel->Init(); @@ -126,6 +85,10 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vectorname_ << ", type: " << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + if (!weight_tensor->GetQuantParams().empty()) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } if (!weight_tensor->GetQuantParams().empty()) { diff --git a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h index c9e6b42f614..924915b391d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h @@ -23,7 +23,6 @@ #include "nnacl/matmul_parameter.h" using mindspore::lite::Context; -static constexpr int kPerTensor = 1; namespace mindspore::kernel { class FullconnectionBaseCPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc index b10a3e52126..ab1a4996490 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc @@ -26,56 +26,6 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_MatMul; namespace mindspore::kernel { -int RestoreMatmulWeight(lite::Tensor *input_tensor) { - MS_ASSERT(input_tensor != nullptr); - if (input_tensor->data_type() != kNumberTypeUInt8) { - MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type(); - return RET_ERROR; - } - if (input_tensor->GetQuantParams().empty()) { - MS_LOG(ERROR) << "no quant param"; - return RET_ERROR; - } - const auto *quant_data = static_cast(input_tensor->MutableData()); - if (quant_data == nullptr) { - MS_LOG(ERROR) << "input_tensor MutableData is nullptr."; - return RET_ERROR; - } - auto *dequant_data = static_cast(malloc(input_tensor->ElementsNum() * sizeof(float))); - if (dequant_data == nullptr) { - MS_LOG(ERROR) << "malloc faile"; - return RET_ERROR; - } - - if (input_tensor->GetQuantParams().size() != kPerTensor) { - size_t channels = static_cast(input_tensor->Batch()); - if (input_tensor->GetQuantParams().size() != channels) { - MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; - return RET_ERROR; - } - size_t per_channel_size = input_tensor->ElementsNum() / channels; - auto quant_param = input_tensor->GetQuantParams(); - for (size_t i = 0; i < channels; i++) { - auto param = quant_param.at(i); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (size_t j = 0; j < per_channel_size; j++) { - dequant_data[per_channel_size * i + j] = - static_cast((quant_data[per_channel_size * i + j] - zero_point) * scale); - } - } - } else { - auto quant_param = input_tensor->GetQuantParams(); - auto param = quant_param.front(); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { - dequant_data[j] = static_cast((quant_data[j] - zero_point) * scale); - } - } - input_tensor->SetData(dequant_data); - return RET_OK; -} kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::Context *ctx, const kernel::KernelKey &desc, @@ -89,8 +39,13 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector &in MS_LOG(ERROR) << "weight_tensor MutableData is nullptr."; return nullptr; } - if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { - RestoreMatmulWeight(inputs.at(kWeightIndex)); + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); + if (dequant_weight == nullptr) { + MS_LOG(ERROR) << "dequant data is nullptr."; + return nullptr; + } + weight_tensor->SetData(dequant_weight); } auto input_tensor = inputs.at(kInputIndex); @@ -103,6 +58,10 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector &in } if (kernel == nullptr) { MS_LOG(ERROR) << "kernel is nullptr."; + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } auto ret = kernel->Init(); @@ -110,10 +69,14 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector &in delete kernel; MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } - if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { weight_tensor->FreeData(); weight_tensor->SetData(restore_data); } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h index f9c07c38ea1..3cec23c4625 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h @@ -23,7 +23,6 @@ #include "nnacl/matmul_parameter.h" using mindspore::lite::Context; -static constexpr int kPerTensor = 1; namespace mindspore::kernel { class MatmulBaseCPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc index 9fdcb21bdb4..026c4047ca1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc @@ -69,52 +69,6 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) { } return RET_OK; } -int RestoreMulWeight(lite::Tensor *input_tensor) { - MS_ASSERT(input_tensor != nullptr); - if (input_tensor->data_type() != kNumberTypeUInt8) { - MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type(); - return RET_ERROR; - } - if (input_tensor->GetQuantParams().empty()) { - MS_LOG(ERROR) << "no quant param"; - return RET_ERROR; - } - const auto *quant_data = static_cast(input_tensor->MutableData()); - auto *dequant_data = static_cast(malloc(input_tensor->ElementsNum() * sizeof(float))); - if (dequant_data == nullptr) { - MS_LOG(ERROR) << "malloc faile"; - return RET_ERROR; - } - - if (input_tensor->GetQuantParams().size() != kPerTensor) { - size_t channels = static_cast(input_tensor->Batch()); - if (input_tensor->GetQuantParams().size() != channels) { - MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; - return RET_ERROR; - } - size_t per_channel_size = input_tensor->ElementsNum() / channels; - auto quant_param = input_tensor->GetQuantParams(); - for (size_t i = 0; i < channels; i++) { - auto param = quant_param.at(i); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (size_t j = 0; j < per_channel_size; j++) { - dequant_data[per_channel_size * i + j] = - static_cast((quant_data[per_channel_size * i + j] - zero_point) * scale); - } - } - } else { - auto quant_param = input_tensor->GetQuantParams(); - auto param = quant_param.front(); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { - dequant_data[j] = static_cast((quant_data[j] - zero_point) * scale); - } - } - input_tensor->SetData(dequant_data); - return RET_OK; -} int ArithmeticSelfCPUKernel::Run() { auto ret = Prepare(); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h index 7de629b6fa8..254c529f588 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h @@ -37,7 +37,6 @@ using mindspore::schema::PrimitiveType_Sin; using mindspore::schema::PrimitiveType_Sqrt; using mindspore::schema::PrimitiveType_Square; using mindspore::schema::PrimitiveType_Neg; -static constexpr int kPerTensor = 1; namespace mindspore::kernel { class ArithmeticSelfCPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc index 4c1198013a9..ba2dfc8a7c9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc @@ -235,7 +235,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector & auto *weight_tensor = inputs.at(kWeightIndex); auto *restore_data = weight_tensor->MutableData(); if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { - ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex)); + auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); + if (dequant_weight == nullptr) { + MS_LOG(ERROR) << "dequant data is nullptr."; + return nullptr; + } + weight_tensor->SetData(dequant_weight); } kernel::LiteKernel *kernel; @@ -253,6 +258,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector & } if (kernel == nullptr) { MS_LOG(ERROR) << "kernel is nullptr."; + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } auto ret = kernel->Init(); @@ -260,6 +269,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector & delete kernel; MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc index b24e05cf9b7..5588e315616 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc @@ -134,7 +134,12 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector auto *weight_tensor = inputs.at(kWeightIndex); auto *restore_data = weight_tensor->MutableData(); if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { - ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex)); + auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); + if (dequant_weight == nullptr) { + MS_LOG(ERROR) << "dequant data is nullptr."; + return nullptr; + } + weight_tensor->SetData(dequant_weight); } auto conv_param = reinterpret_cast(opParameter); @@ -146,6 +151,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector } if (kernel == nullptr) { MS_LOG(ERROR) << "kernel is nullptr."; + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } auto ret = kernel->Init(); @@ -153,6 +162,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector delete kernel; MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc index b9a41da299c..06ce5321512 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc @@ -169,69 +169,32 @@ int ScaleCPUKernel::Run() { } return RET_OK; } -int RestoreScaleWeight(lite::Tensor *input_tensor) { - MS_ASSERT(input_tensor != nullptr); - if (input_tensor->data_type() != kNumberTypeUInt8) { - MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type(); - return RET_ERROR; - } - if (input_tensor->GetQuantParams().empty()) { - MS_LOG(ERROR) << "no quant param"; - return RET_ERROR; - } - const auto *quant_data = static_cast(input_tensor->MutableData()); - auto *dequant_data = static_cast(malloc(input_tensor->ElementsNum() * sizeof(float))); - if (dequant_data == nullptr) { - MS_LOG(ERROR) << "malloc faile"; - return RET_ERROR; - } - - if (input_tensor->GetQuantParams().size() != kPerTensor) { - size_t channels = static_cast(input_tensor->Batch()); - if (input_tensor->GetQuantParams().size() != channels) { - MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels; - return RET_ERROR; - } - size_t per_channel_size = input_tensor->ElementsNum() / channels; - auto quant_param = input_tensor->GetQuantParams(); - for (size_t i = 0; i < channels; i++) { - auto param = quant_param.at(i); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (size_t j = 0; j < per_channel_size; j++) { - dequant_data[per_channel_size * i + j] = - static_cast((quant_data[per_channel_size * i + j] - zero_point) * scale); - } - } - } else { - auto quant_param = input_tensor->GetQuantParams(); - auto param = quant_param.front(); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { - dequant_data[j] = static_cast((quant_data[j] - zero_point) * scale); - } - } - input_tensor->SetData(dequant_data); - return RET_OK; -} kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::Context *ctx, const kernel::KernelKey &desc, const mindspore::lite::PrimitiveC *primitive) { MS_ASSERT(desc.type == schema::PrimitiveType_Scale); - auto *weight_tensor = inputs.at(kWeightIndex); - auto *restore_data = weight_tensor->MutableData(); - if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { - RestoreScaleWeight(inputs.at(kWeightIndex)); - } if (opParameter == nullptr) { MS_LOG(ERROR) << "opParameter is nullptr"; return nullptr; } + auto *weight_tensor = inputs.at(kWeightIndex); + auto *restore_data = weight_tensor->MutableData(); + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor); + if (dequant_weight == nullptr) { + MS_LOG(ERROR) << "dequant data is nullptr."; + return nullptr; + } + weight_tensor->SetData(dequant_weight); + } auto *kernel = new (std::nothrow) ScaleCPUKernel(opParameter, inputs, outputs, ctx, primitive); if (kernel == nullptr) { MS_LOG(ERROR) << "New kernel fails."; + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } @@ -240,9 +203,13 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); delete kernel; + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { + weight_tensor->FreeData(); + weight_tensor->SetData(restore_data); + } return nullptr; } - if (primitive->GetQuantType() == schema::QuantType_WeightQuant) { + if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) { weight_tensor->FreeData(); weight_tensor->SetData(restore_data); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h index 01b334f10bb..e4492fc591d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h @@ -21,7 +21,6 @@ #include "src/lite_kernel.h" #include "nnacl/fp32/scale.h" -static constexpr int kPerTensor = 1; namespace mindspore::kernel { class ScaleCPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc index 4c88b3e7ca2..41e45c0d82c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc @@ -65,7 +65,7 @@ int ApplyMomentumCPUKernel::Init() { // Only for test with uninitialized Data size_t elem_num = in_tensors_[0]->ElementsNum(); auto accumulate = reinterpret_cast(in_tensors_[1]->MutableData()); - for (int i = 0; i < elem_num; i++) accumulate[i] = 0.0; + for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0; workspace = new float[elem_num]; return 0; diff --git a/mindspore/lite/test/models_tflite_weightquant.cfg b/mindspore/lite/test/models_tflite_weightquant.cfg new file mode 100644 index 00000000000..dfc9d8397ae --- /dev/null +++ b/mindspore/lite/test/models_tflite_weightquant.cfg @@ -0,0 +1 @@ +ml_face_openclose.tflite diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index a0ce9217f97..797483aa70b 100644 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -127,6 +127,27 @@ function Run_x86() { return 1 fi done < ${models_mindspore_config} + + # Run tflite weight quantization converted models: + while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + echo ${model_name} >> "${run_benchmark_log_file}" + echo 'cd '${convertor_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_benchmark_log_file}" + cd ${convertor_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1 + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1' >> "${run_benchmark_log_file}" + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}_weightquant.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --warmUpLoopCount=1 --loopCount=1 >> "${run_benchmark_log_file}" + if [ $? = 0 ]; then + run_result='x86: '${model_name}'_weightquant pass' + echo ${run_result} >> ${run_benchmark_result_file} + else + run_result='x86: '${model_name}'_weightquant failed' + echo ${run_result} >> ${run_benchmark_result_file} + return 1 + fi + done < ${models_tflite_weightquant_config} } # Run on arm64 platform: @@ -432,6 +453,7 @@ models_tflite_config=${basepath}/models_tflite.cfg models_caffe_config=${basepath}/models_caffe.cfg models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg +models_tflite_weightquant_config=${basepath}/models_tflite_weightquant.cfg models_onnx_config=${basepath}/models_onnx.cfg models_fp16_config=${basepath}/models_fp16.cfg models_mindspore_config=${basepath}/models_mindspore.cfg @@ -522,6 +544,17 @@ while read line; do cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms done < ${models_fp16_config} +# Convert weightquant models: +while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + echo ${model_name} >> "${run_benchmark_log_file}" + echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16' >> "${run_benchmark_log_file}" + ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16 || Convert_status=$? +done < ${models_tflite_weightquant_config} + # Check all result and return value if [[ ${Convert_status} = 0 ]];then echo "convert is ended" diff --git a/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc b/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc index 907f219ccb4..682122c142a 100644 --- a/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc +++ b/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc @@ -72,7 +72,7 @@ ValueNodePtr AnfImporterFromMetaGraphT::ConvertPrimitive(const std::unique_ptrprimitive.release()); cNode->primitive = nullptr; // add quant parameter - if (cNode->quantType != schema::QuantType_PostTraining) { + if (cNode->quantType != schema::QuantType_PostTraining && cNode->quantType != schema::QuantType_WeightQuant) { primitiveCValue->SetQuantType(cNode->quantType); for (int index : cNode->inputIndex) { if (meta_graph_->allTensors[index]->quantParams.size() > 0) { diff --git a/mindspore/lite/tools/converter/anf_transform.cc b/mindspore/lite/tools/converter/anf_transform.cc index 1d731543030..12d89535c19 100644 --- a/mindspore/lite/tools/converter/anf_transform.cc +++ b/mindspore/lite/tools/converter/anf_transform.cc @@ -64,10 +64,15 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver return nullptr; } } else if (config->quantType == schema::QuantType_WeightQuant) { + auto bitNum = static_cast(std::stoull(config->bitNum)); + if (bitNum != quant::UINT8_QUANTIZATION) { + MS_LOG(ERROR) << "Current Only Support 8 bit weight quant"; + return nullptr; + } this->mQuantizer = std::make_unique( new_graph, config->quantSize, config->convWeightQuantChannelThreshold, config->bitNum); if (mQuantizer == nullptr) { - MS_LOG(ERROR) << "New PostTrainingQuantizer failed"; + MS_LOG(ERROR) << "New WeightQuantizer failed"; return nullptr; } } diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc index 0afb39fdaf5..8e17a68cfe0 100644 --- a/mindspore/lite/tools/converter/converter_flags.cc +++ b/mindspore/lite/tools/converter/converter_flags.cc @@ -31,7 +31,7 @@ Flags::Flags() { "Input model weight file path. Needed when fmk is CAFFE. CAFFE: *.caffemodel", ""); AddFlag(&Flags::inferenceTypeIn, "inferenceType", "Real data type saved in output file, reserved param, NOT used for now. FLOAT | INT8", "FLOAT"); - AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining", ""); + AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining | WeightQuant", ""); AddFlag(&Flags::inputInferenceTypeIn, "inputInferenceType", "Input inference data type. FLOAT | INT8", "FLOAT"); AddFlag(&Flags::stdDev, "stdDev", "Standard deviation value for aware-quantization", "128"); AddFlag(&Flags::mean, "mean", "Mean value for aware-quantization", "-0.5"); diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc index 7c396c2d3e5..a4bea1892b6 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/weight_format_hardcode_pass.cc @@ -86,6 +86,7 @@ STATUS WeightFormatHardCodePass::HardCodeCAFFE(const std::unique_ptr &no MS_ASSERT(node->primitive != nullptr); auto opType = node->primitive->value.type; switch (this->quantType) { + case QuantType_WeightQuant: case QuantType_QUANT_NONE: { if (opType == schema::PrimitiveType_Conv2D || opType == schema::PrimitiveType_DepthwiseConv2D || opType == schema::PrimitiveType_DeConv2D || opType == schema::PrimitiveType_DeDepthwiseConv2D) { @@ -123,6 +124,7 @@ STATUS WeightFormatHardCodePass::HardCodeONNX(const std::unique_ptr &nod return RET_ERROR; } } break; + case QuantType_WeightQuant: case QuantType_QUANT_NONE: { // conv (K x C/group x kH x kW) group = 1 // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W) @@ -162,6 +164,7 @@ STATUS WeightFormatHardCodePass::HardCodeMS(const std::unique_ptr &node, weightTensor->format = schema::Format::Format_KCHW; } } break; + case QuantType_WeightQuant: case QuantType_QUANT_NONE: { // sum up from current ms quant models if (opType == PrimitiveType_Conv2D) { diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc index 1fa02f84b76..e6c1110d7e4 100644 --- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc @@ -66,13 +66,14 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list &nodes) { ParamValueLitePtr param_value = std::static_pointer_cast(param_node->default_param()); auto status = - QuantFilter(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, depthwise); + QuantFilter(param_value, primitive_c, QuantType_WeightQuant, + quant_max, quant_min, bitNum, true, depthwise); if (status != RET_OK) { MS_LOG(ERROR) << "QuantFilter failed : " << status; return status; } // set dtype - param_value->set_tensor_type(kNumberTypeUInt8); + param_value->set_tensor_type(kNumberTypeInt8); auto abstractBase = param_node->abstract(); if (abstractBase == nullptr) { MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name(); @@ -83,7 +84,7 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list &nodes) { return RET_ERROR; } auto abstractTensor = utils::cast(abstractBase); - abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8)); + abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8)); primitive_c->SetQuantType(schema::QuantType_WeightQuant); } @@ -128,12 +129,13 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list &nodes) { std::vector quant_params; primitive_c->AddInputQuantParam(quant_params); - auto status = QuantFilter(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, false); + auto status = QuantFilter(param_value, primitive_c, QuantType_WeightQuant, + quant_max, quant_min, bitNum, true, false); if (status != RET_OK) { MS_LOG(ERROR) << "QuantFilter failed : " << status; return status; } - param_value->set_tensor_type(kNumberTypeUInt8); + param_value->set_tensor_type(kNumberTypeInt8); // set dtype auto abstractBase = param_node->abstract(); if (abstractBase == nullptr) { @@ -145,7 +147,7 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list &nodes) { return RET_ERROR; } auto abstractTensor = utils::cast(abstractBase); - abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8)); + abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8)); primitive_c->SetQuantType(schema::QuantType_WeightQuant); } diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h index d91c6b7b88e..7485343873b 100644 --- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h +++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h @@ -41,7 +41,8 @@ class WeightQuantizer : public Quantizer { STATUS DoQuantize(FuncGraphPtr funcGraph) override; STATUS DoConvQuantize(const std::list &nodes); STATUS DoMulQuantize(const std::list &nodes); - + int quant_max{INT8_MAX}; + int quant_min{INT8_MIN}; private: std::unique_ptr mStrategy; size_t bitNum;