forked from mindspore-Ecosystem/mindspore
weight quant fix
This commit is contained in:
parent
8862e38f46
commit
7dfac75de2
|
@ -163,4 +163,51 @@ void LiteKernelUtil::InitTensorRefCount(std::vector<kernel::LiteKernel *> &kerne
|
|||
}
|
||||
|
||||
int LiteKernelUtil::SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs) { return -1; }
|
||||
|
||||
float *LiteKernelUtil::DequantWeight(lite::Tensor *input_tensor) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
if (input_tensor->data_type() != kNumberTypeInt8) {
|
||||
MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type();
|
||||
return nullptr;
|
||||
}
|
||||
if (input_tensor->GetQuantParams().empty()) {
|
||||
MS_LOG(ERROR) << "no quant param";
|
||||
return nullptr;
|
||||
}
|
||||
const auto *quant_data = static_cast<const int8_t *>(input_tensor->MutableData());
|
||||
auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
|
||||
if (dequant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc faile";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (input_tensor->GetQuantParams().size() != kPerTensor) {
|
||||
size_t channels = static_cast<size_t>(input_tensor->Batch());
|
||||
if (input_tensor->GetQuantParams().size() != channels) {
|
||||
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
|
||||
free(dequant_data);
|
||||
return nullptr;
|
||||
}
|
||||
size_t per_channel_size = input_tensor->ElementsNum() / channels;
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
auto param = quant_param.at(i);
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (size_t j = 0; j < per_channel_size; j++) {
|
||||
dequant_data[per_channel_size * i + j] =
|
||||
static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
auto param = quant_param.front();
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
|
||||
dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
return dequant_data;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#include "src/tensor.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
static constexpr int kPerTensor = 1;
|
||||
|
||||
// using mindspore::kernel::AddressPtr;
|
||||
namespace mindspore::kernel {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
|
@ -202,6 +204,8 @@ class LiteKernelUtil {
|
|||
static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs);
|
||||
|
||||
static float *DequantWeight(lite::Tensor *input_tensor);
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
|
|
|
@ -657,8 +657,8 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) {
|
|||
return new PowerGrad(primitive);
|
||||
case schema::PrimitiveType_BNGradInput:
|
||||
return new BNGradInput(primitive);
|
||||
case schema::PrimitiveType_SoftmaxCrossEntroy:
|
||||
return new SoftmaxCrossEntroy(primitive);
|
||||
case schema::PrimitiveType_SoftmaxCrossEntropy:
|
||||
return new SoftmaxCrossEntropy(primitive);
|
||||
case schema::PrimitiveType_Depend:
|
||||
return new Depend(primitive);
|
||||
case schema::PrimitiveType_FlattenGrad:
|
||||
|
|
|
@ -324,51 +324,4 @@ int ConvolutionBaseCPUKernel::SetQuantParam() {
|
|||
&conv_param_->conv_quant_arg_.out_act_max_[0]);
|
||||
return RET_OK;
|
||||
}
|
||||
int ConvolutionBaseCPUKernel::RestoreFilter(lite::Tensor *input_tensor) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
if (input_tensor->data_type() != kNumberTypeUInt8) {
|
||||
MS_LOG(ERROR) << "conv weight input type error" << input_tensor->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_tensor->GetQuantParams().empty()) {
|
||||
MS_LOG(ERROR) << "no quant param";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
|
||||
auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
|
||||
if (dequant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc faile";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (input_tensor->GetQuantParams().size() != kPerTensor) {
|
||||
size_t channels = static_cast<size_t>(input_tensor->Batch());
|
||||
if (input_tensor->GetQuantParams().size() != channels) {
|
||||
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
|
||||
free(dequant_data);
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t per_channel_size = input_tensor->ElementsNum() / channels;
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
auto param = quant_param.at(i);
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (size_t j = 0; j < per_channel_size; j++) {
|
||||
dequant_data[per_channel_size * i + j] =
|
||||
static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
auto param = quant_param.front();
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
|
||||
dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
input_tensor->SetData(dequant_data);
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
using mindspore::lite::Context;
|
||||
using mindspore::schema::PadMode;
|
||||
using mindspore::schema::QuantType;
|
||||
static constexpr int kPerTensor = 1;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConvolutionBaseCPUKernel : public LiteKernel {
|
||||
|
@ -60,7 +59,6 @@ class ConvolutionBaseCPUKernel : public LiteKernel {
|
|||
int SetQuantMultiplier();
|
||||
int CheckResizeValid();
|
||||
void FreeQuantParam();
|
||||
static int RestoreFilter(lite::Tensor *input_tensor);
|
||||
|
||||
protected:
|
||||
int tile_num_;
|
||||
|
|
|
@ -53,56 +53,6 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::T
|
|||
}
|
||||
return kernel;
|
||||
}
|
||||
int RestoreFullconnectWeight(lite::Tensor *input_tensor) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
if (input_tensor->data_type() != kNumberTypeInt8) {
|
||||
MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_tensor->GetQuantParams().empty()) {
|
||||
MS_LOG(ERROR) << "no quant param";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const auto *quant_data = static_cast<const int8_t *>(input_tensor->MutableData());
|
||||
if (quant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "input_tensor MutableData is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
|
||||
if (dequant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc faile";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (input_tensor->GetQuantParams().size() != kPerTensor) {
|
||||
size_t channels = static_cast<size_t>(input_tensor->Batch());
|
||||
if (input_tensor->GetQuantParams().size() != channels) {
|
||||
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t per_channel_size = input_tensor->ElementsNum() / channels;
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
auto param = quant_param.at(i);
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (size_t j = 0; j < per_channel_size; j++) {
|
||||
dequant_data[per_channel_size * i + j] =
|
||||
static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
auto param = quant_param.front();
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
|
||||
dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
input_tensor->SetData(dequant_data);
|
||||
return RET_OK;
|
||||
}
|
||||
kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
|
@ -114,11 +64,20 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::T
|
|||
// data of second tensor of fc may be nullptr
|
||||
auto *restore_data = weight_tensor->data_c();
|
||||
if (!weight_tensor->GetQuantParams().empty()) {
|
||||
RestoreFullconnectWeight(inputs.at(kWeightIndex));
|
||||
auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
|
||||
if (dequant_weight == nullptr) {
|
||||
MS_LOG(ERROR) << "dequant data is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
weight_tensor->SetData(dequant_weight);
|
||||
}
|
||||
auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (!kernel) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
if (!weight_tensor->GetQuantParams().empty()) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
|
@ -126,6 +85,10 @@ kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::T
|
|||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
if (!weight_tensor->GetQuantParams().empty()) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
if (!weight_tensor->GetQuantParams().empty()) {
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "nnacl/matmul_parameter.h"
|
||||
|
||||
using mindspore::lite::Context;
|
||||
static constexpr int kPerTensor = 1;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class FullconnectionBaseCPUKernel : public LiteKernel {
|
||||
|
|
|
@ -26,56 +26,6 @@ using mindspore::lite::RET_OK;
|
|||
using mindspore::schema::PrimitiveType_MatMul;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int RestoreMatmulWeight(lite::Tensor *input_tensor) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
if (input_tensor->data_type() != kNumberTypeUInt8) {
|
||||
MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_tensor->GetQuantParams().empty()) {
|
||||
MS_LOG(ERROR) << "no quant param";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
|
||||
if (quant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "input_tensor MutableData is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
|
||||
if (dequant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc faile";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (input_tensor->GetQuantParams().size() != kPerTensor) {
|
||||
size_t channels = static_cast<size_t>(input_tensor->Batch());
|
||||
if (input_tensor->GetQuantParams().size() != channels) {
|
||||
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t per_channel_size = input_tensor->ElementsNum() / channels;
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
auto param = quant_param.at(i);
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (size_t j = 0; j < per_channel_size; j++) {
|
||||
dequant_data[per_channel_size * i + j] =
|
||||
static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
auto param = quant_param.front();
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
|
||||
dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
input_tensor->SetData(dequant_data);
|
||||
return RET_OK;
|
||||
}
|
||||
kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc,
|
||||
|
@ -89,8 +39,13 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in
|
|||
MS_LOG(ERROR) << "weight_tensor MutableData is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
RestoreMatmulWeight(inputs.at(kWeightIndex));
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
|
||||
if (dequant_weight == nullptr) {
|
||||
MS_LOG(ERROR) << "dequant data is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
weight_tensor->SetData(dequant_weight);
|
||||
}
|
||||
|
||||
auto input_tensor = inputs.at(kInputIndex);
|
||||
|
@ -103,6 +58,10 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in
|
|||
}
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
|
@ -110,10 +69,14 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::Tensor *> &in
|
|||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "nnacl/matmul_parameter.h"
|
||||
|
||||
using mindspore::lite::Context;
|
||||
static constexpr int kPerTensor = 1;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class MatmulBaseCPUKernel : public LiteKernel {
|
||||
|
|
|
@ -69,52 +69,6 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
int RestoreMulWeight(lite::Tensor *input_tensor) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
if (input_tensor->data_type() != kNumberTypeUInt8) {
|
||||
MS_LOG(ERROR) << "full connect input type error" << input_tensor->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_tensor->GetQuantParams().empty()) {
|
||||
MS_LOG(ERROR) << "no quant param";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
|
||||
auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
|
||||
if (dequant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc faile";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (input_tensor->GetQuantParams().size() != kPerTensor) {
|
||||
size_t channels = static_cast<size_t>(input_tensor->Batch());
|
||||
if (input_tensor->GetQuantParams().size() != channels) {
|
||||
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t per_channel_size = input_tensor->ElementsNum() / channels;
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
auto param = quant_param.at(i);
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (size_t j = 0; j < per_channel_size; j++) {
|
||||
dequant_data[per_channel_size * i + j] =
|
||||
static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
auto param = quant_param.front();
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
|
||||
dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
input_tensor->SetData(dequant_data);
|
||||
return RET_OK;
|
||||
}
|
||||
int ArithmeticSelfCPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
|
|
|
@ -37,7 +37,6 @@ using mindspore::schema::PrimitiveType_Sin;
|
|||
using mindspore::schema::PrimitiveType_Sqrt;
|
||||
using mindspore::schema::PrimitiveType_Square;
|
||||
using mindspore::schema::PrimitiveType_Neg;
|
||||
static constexpr int kPerTensor = 1;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ArithmeticSelfCPUKernel : public LiteKernel {
|
||||
|
|
|
@ -235,7 +235,12 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
|
|||
auto *weight_tensor = inputs.at(kWeightIndex);
|
||||
auto *restore_data = weight_tensor->MutableData();
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex));
|
||||
auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
|
||||
if (dequant_weight == nullptr) {
|
||||
MS_LOG(ERROR) << "dequant data is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
weight_tensor->SetData(dequant_weight);
|
||||
}
|
||||
|
||||
kernel::LiteKernel *kernel;
|
||||
|
@ -253,6 +258,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
|
|||
}
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
|
@ -260,6 +269,10 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::Tensor *> &
|
|||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -134,7 +134,12 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
auto *weight_tensor = inputs.at(kWeightIndex);
|
||||
auto *restore_data = weight_tensor->MutableData();
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
ConvolutionBaseCPUKernel::RestoreFilter(inputs.at(kWeightIndex));
|
||||
auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
|
||||
if (dequant_weight == nullptr) {
|
||||
MS_LOG(ERROR) << "dequant data is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
weight_tensor->SetData(dequant_weight);
|
||||
}
|
||||
|
||||
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
|
||||
|
@ -146,6 +151,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
}
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel is nullptr.";
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
|
@ -153,6 +162,10 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -169,69 +169,32 @@ int ScaleCPUKernel::Run() {
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
int RestoreScaleWeight(lite::Tensor *input_tensor) {
|
||||
MS_ASSERT(input_tensor != nullptr);
|
||||
if (input_tensor->data_type() != kNumberTypeUInt8) {
|
||||
MS_LOG(ERROR) << "mat mul input type error" << input_tensor->data_type();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_tensor->GetQuantParams().empty()) {
|
||||
MS_LOG(ERROR) << "no quant param";
|
||||
return RET_ERROR;
|
||||
}
|
||||
const auto *quant_data = static_cast<const uint8_t *>(input_tensor->MutableData());
|
||||
auto *dequant_data = static_cast<float *>(malloc(input_tensor->ElementsNum() * sizeof(float)));
|
||||
if (dequant_data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc faile";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (input_tensor->GetQuantParams().size() != kPerTensor) {
|
||||
size_t channels = static_cast<size_t>(input_tensor->Batch());
|
||||
if (input_tensor->GetQuantParams().size() != channels) {
|
||||
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->GetQuantParams().size() << channels;
|
||||
return RET_ERROR;
|
||||
}
|
||||
size_t per_channel_size = input_tensor->ElementsNum() / channels;
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
for (size_t i = 0; i < channels; i++) {
|
||||
auto param = quant_param.at(i);
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (size_t j = 0; j < per_channel_size; j++) {
|
||||
dequant_data[per_channel_size * i + j] =
|
||||
static_cast<float>((quant_data[per_channel_size * i + j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto quant_param = input_tensor->GetQuantParams();
|
||||
auto param = quant_param.front();
|
||||
auto scale = param.scale;
|
||||
auto zero_point = param.zeroPoint;
|
||||
for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) {
|
||||
dequant_data[j] = static_cast<float>((quant_data[j] - zero_point) * scale);
|
||||
}
|
||||
}
|
||||
input_tensor->SetData(dequant_data);
|
||||
return RET_OK;
|
||||
}
|
||||
kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::Context *ctx, const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Scale);
|
||||
auto *weight_tensor = inputs.at(kWeightIndex);
|
||||
auto *restore_data = weight_tensor->MutableData();
|
||||
if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
RestoreScaleWeight(inputs.at(kWeightIndex));
|
||||
}
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "opParameter is nullptr";
|
||||
return nullptr;
|
||||
}
|
||||
auto *weight_tensor = inputs.at(kWeightIndex);
|
||||
auto *restore_data = weight_tensor->MutableData();
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
auto *dequant_weight = kernel::LiteKernelUtil::DequantWeight(weight_tensor);
|
||||
if (dequant_weight == nullptr) {
|
||||
MS_LOG(ERROR) << "dequant data is nullptr.";
|
||||
return nullptr;
|
||||
}
|
||||
weight_tensor->SetData(dequant_weight);
|
||||
}
|
||||
auto *kernel = new (std::nothrow) ScaleCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "New kernel fails.";
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -240,9 +203,13 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::Tensor *>
|
|||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
if (primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
if (weight_tensor->data_type() == kNumberTypeInt8 || primitive->GetQuantType() == schema::QuantType_WeightQuant) {
|
||||
weight_tensor->FreeData();
|
||||
weight_tensor->SetData(restore_data);
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include "src/lite_kernel.h"
|
||||
#include "nnacl/fp32/scale.h"
|
||||
|
||||
static constexpr int kPerTensor = 1;
|
||||
namespace mindspore::kernel {
|
||||
|
||||
class ScaleCPUKernel : public LiteKernel {
|
||||
|
|
|
@ -65,7 +65,7 @@ int ApplyMomentumCPUKernel::Init() {
|
|||
// Only for test with uninitialized Data
|
||||
size_t elem_num = in_tensors_[0]->ElementsNum();
|
||||
auto accumulate = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
|
||||
for (int i = 0; i < elem_num; i++) accumulate[i] = 0.0;
|
||||
for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0;
|
||||
|
||||
workspace = new float[elem_num];
|
||||
return 0;
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
ml_face_openclose.tflite
|
|
@ -127,6 +127,27 @@ function Run_x86() {
|
|||
return 1
|
||||
fi
|
||||
done < ${models_mindspore_config}
|
||||
|
||||
# Run tflite weight quantization converted models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_benchmark_log_file}"
|
||||
echo 'cd '${convertor_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86} >> "${run_benchmark_log_file}"
|
||||
cd ${convertor_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86} || return 1
|
||||
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath='${ms_models_path}'/'${model_name}'.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1' >> "${run_benchmark_log_file}"
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelPath=${ms_models_path}/${model_name}_weightquant.ms --inDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --calibDataPath=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --warmUpLoopCount=1 --loopCount=1 >> "${run_benchmark_log_file}"
|
||||
if [ $? = 0 ]; then
|
||||
run_result='x86: '${model_name}'_weightquant pass'
|
||||
echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
run_result='x86: '${model_name}'_weightquant failed'
|
||||
echo ${run_result} >> ${run_benchmark_result_file}
|
||||
return 1
|
||||
fi
|
||||
done < ${models_tflite_weightquant_config}
|
||||
}
|
||||
|
||||
# Run on arm64 platform:
|
||||
|
@ -432,6 +453,7 @@ models_tflite_config=${basepath}/models_tflite.cfg
|
|||
models_caffe_config=${basepath}/models_caffe.cfg
|
||||
models_tflite_awaretraining_config=${basepath}/models_tflite_awaretraining.cfg
|
||||
models_tflite_posttraining_config=${basepath}/models_tflite_posttraining.cfg
|
||||
models_tflite_weightquant_config=${basepath}/models_tflite_weightquant.cfg
|
||||
models_onnx_config=${basepath}/models_onnx.cfg
|
||||
models_fp16_config=${basepath}/models_fp16.cfg
|
||||
models_mindspore_config=${basepath}/models_mindspore.cfg
|
||||
|
@ -522,6 +544,17 @@ while read line; do
|
|||
cp ${ms_models_path}/${model_name}.ms ${ms_models_path}/${model_name}.fp16.ms
|
||||
done < ${models_fp16_config}
|
||||
|
||||
# Convert weightquant models:
|
||||
while read line; do
|
||||
model_name=${line}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_benchmark_log_file}"
|
||||
echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16' >> "${run_benchmark_log_file}"
|
||||
./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantSize=500 --convWeightQuantChannelThreshold=16 || Convert_status=$?
|
||||
done < ${models_tflite_weightquant_config}
|
||||
|
||||
# Check all result and return value
|
||||
if [[ ${Convert_status} = 0 ]];then
|
||||
echo "convert is ended"
|
||||
|
|
|
@ -72,7 +72,7 @@ ValueNodePtr AnfImporterFromMetaGraphT::ConvertPrimitive(const std::unique_ptr<s
|
|||
auto primitiveCValue = PrimitiveC::Create(cNode->primitive.release());
|
||||
cNode->primitive = nullptr;
|
||||
// add quant parameter
|
||||
if (cNode->quantType != schema::QuantType_PostTraining) {
|
||||
if (cNode->quantType != schema::QuantType_PostTraining && cNode->quantType != schema::QuantType_WeightQuant) {
|
||||
primitiveCValue->SetQuantType(cNode->quantType);
|
||||
for (int index : cNode->inputIndex) {
|
||||
if (meta_graph_->allTensors[index]->quantParams.size() > 0) {
|
||||
|
|
|
@ -64,10 +64,15 @@ FuncGraphPtr AnfTransform::Transform(const FuncGraphPtr &old_graph, const conver
|
|||
return nullptr;
|
||||
}
|
||||
} else if (config->quantType == schema::QuantType_WeightQuant) {
|
||||
auto bitNum = static_cast<size_t>(std::stoull(config->bitNum));
|
||||
if (bitNum != quant::UINT8_QUANTIZATION) {
|
||||
MS_LOG(ERROR) << "Current Only Support 8 bit weight quant";
|
||||
return nullptr;
|
||||
}
|
||||
this->mQuantizer = std::make_unique<quant::WeightQuantizer>(
|
||||
new_graph, config->quantSize, config->convWeightQuantChannelThreshold, config->bitNum);
|
||||
if (mQuantizer == nullptr) {
|
||||
MS_LOG(ERROR) << "New PostTrainingQuantizer failed";
|
||||
MS_LOG(ERROR) << "New WeightQuantizer failed";
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ Flags::Flags() {
|
|||
"Input model weight file path. Needed when fmk is CAFFE. CAFFE: *.caffemodel", "");
|
||||
AddFlag(&Flags::inferenceTypeIn, "inferenceType",
|
||||
"Real data type saved in output file, reserved param, NOT used for now. FLOAT | INT8", "FLOAT");
|
||||
AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining", "");
|
||||
AddFlag(&Flags::quantTypeIn, "quantType", "Quantization Type. AwareTraining | PostTraining | WeightQuant", "");
|
||||
AddFlag(&Flags::inputInferenceTypeIn, "inputInferenceType", "Input inference data type. FLOAT | INT8", "FLOAT");
|
||||
AddFlag(&Flags::stdDev, "stdDev", "Standard deviation value for aware-quantization", "128");
|
||||
AddFlag(&Flags::mean, "mean", "Mean value for aware-quantization", "-0.5");
|
||||
|
|
|
@ -86,6 +86,7 @@ STATUS WeightFormatHardCodePass::HardCodeCAFFE(const std::unique_ptr<CNodeT> &no
|
|||
MS_ASSERT(node->primitive != nullptr);
|
||||
auto opType = node->primitive->value.type;
|
||||
switch (this->quantType) {
|
||||
case QuantType_WeightQuant:
|
||||
case QuantType_QUANT_NONE: {
|
||||
if (opType == schema::PrimitiveType_Conv2D || opType == schema::PrimitiveType_DepthwiseConv2D ||
|
||||
opType == schema::PrimitiveType_DeConv2D || opType == schema::PrimitiveType_DeDepthwiseConv2D) {
|
||||
|
@ -123,6 +124,7 @@ STATUS WeightFormatHardCodePass::HardCodeONNX(const std::unique_ptr<CNodeT> &nod
|
|||
return RET_ERROR;
|
||||
}
|
||||
} break;
|
||||
case QuantType_WeightQuant:
|
||||
case QuantType_QUANT_NONE: {
|
||||
// conv (K x C/group x kH x kW) group = 1
|
||||
// depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W)
|
||||
|
@ -162,6 +164,7 @@ STATUS WeightFormatHardCodePass::HardCodeMS(const std::unique_ptr<CNodeT> &node,
|
|||
weightTensor->format = schema::Format::Format_KCHW;
|
||||
}
|
||||
} break;
|
||||
case QuantType_WeightQuant:
|
||||
case QuantType_QUANT_NONE: {
|
||||
// sum up from current ms quant models
|
||||
if (opType == PrimitiveType_Conv2D) {
|
||||
|
|
|
@ -66,13 +66,14 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) {
|
|||
|
||||
ParamValueLitePtr param_value = std::static_pointer_cast<ParamValueLite>(param_node->default_param());
|
||||
auto status =
|
||||
QuantFilter<uint8_t>(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, depthwise);
|
||||
QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant,
|
||||
quant_max, quant_min, bitNum, true, depthwise);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "QuantFilter failed : " << status;
|
||||
return status;
|
||||
}
|
||||
// set dtype
|
||||
param_value->set_tensor_type(kNumberTypeUInt8);
|
||||
param_value->set_tensor_type(kNumberTypeInt8);
|
||||
auto abstractBase = param_node->abstract();
|
||||
if (abstractBase == nullptr) {
|
||||
MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << param_node->name();
|
||||
|
@ -83,7 +84,7 @@ STATUS WeightQuantizer::DoConvQuantize(const std::list<CNodePtr> &nodes) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase);
|
||||
abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8));
|
||||
abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8));
|
||||
primitive_c->SetQuantType(schema::QuantType_WeightQuant);
|
||||
}
|
||||
|
||||
|
@ -128,12 +129,13 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) {
|
|||
|
||||
std::vector<schema::QuantParamT> quant_params;
|
||||
primitive_c->AddInputQuantParam(quant_params);
|
||||
auto status = QuantFilter<uint8_t>(param_value, primitive_c, QuantType_WeightQuant, 255, 0, bitNum, true, false);
|
||||
auto status = QuantFilter<int8_t>(param_value, primitive_c, QuantType_WeightQuant,
|
||||
quant_max, quant_min, bitNum, true, false);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "QuantFilter failed : " << status;
|
||||
return status;
|
||||
}
|
||||
param_value->set_tensor_type(kNumberTypeUInt8);
|
||||
param_value->set_tensor_type(kNumberTypeInt8);
|
||||
// set dtype
|
||||
auto abstractBase = param_node->abstract();
|
||||
if (abstractBase == nullptr) {
|
||||
|
@ -145,7 +147,7 @@ STATUS WeightQuantizer::DoMulQuantize(const std::list<CNodePtr> &nodes) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
auto abstractTensor = utils::cast<abstract::AbstractTensorPtr>(abstractBase);
|
||||
abstractTensor->element()->set_type(TypeIdToType(kNumberTypeUInt8));
|
||||
abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8));
|
||||
primitive_c->SetQuantType(schema::QuantType_WeightQuant);
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,8 @@ class WeightQuantizer : public Quantizer {
|
|||
STATUS DoQuantize(FuncGraphPtr funcGraph) override;
|
||||
STATUS DoConvQuantize(const std::list<CNodePtr> &nodes);
|
||||
STATUS DoMulQuantize(const std::list<CNodePtr> &nodes);
|
||||
|
||||
int quant_max{INT8_MAX};
|
||||
int quant_min{INT8_MIN};
|
||||
private:
|
||||
std::unique_ptr<QuantStrategy> mStrategy;
|
||||
size_t bitNum;
|
||||
|
|
Loading…
Reference in New Issue