[MS][LITE] support perchannel mindir quant model

This commit is contained in:
cjh9368 2021-04-12 18:40:51 +08:00
parent 7cba27ba22
commit 33ab40af78
9 changed files with 140 additions and 27 deletions

View File

@ -2,3 +2,4 @@ deeplabv3.r1.1.mindir 1.5
mobilenetv2.r1.1.mindir 0.5
ssd.r1.1.mindir 0.5
ssd_ghostnet.r1.1.mindir 2.0
lenet_quant.mindir 0.5

View File

@ -382,6 +382,16 @@ STATUS NodeInferShpae(const schema::CNodeT &node, const std::vector<Tensor *> &i
return ret;
}
size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode) {
size_t ret = -1;
for (size_t i = 0; i < cnode.inputIndex.size(); i++) {
if (cnode.inputIndex.at(i) == tensor_index) {
ret = i;
}
}
return ret;
}
STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) {
if (tensor == nullptr) {
MS_LOG(ERROR) << "tensor is null";

View File

@ -71,11 +71,12 @@ std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes();
std::vector<schema::PrimitiveType> Getfp32FullOpList();
std::vector<schema::PrimitiveType> GetUint8NhwcOpList();
std::vector<schema::PrimitiveType> GetInt8OpList();
const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb);
size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode);
class NodeUtils {
public:
static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format,

View File

@ -20,6 +20,8 @@
#include "tools/converter/converter_context.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "tools/common/tensor_util.h"
#include "tools/common/graph_util.h"
#include "tools/common/node_util.h"
namespace mindspore::lite {
namespace {
@ -112,6 +114,62 @@ STATUS ComputeDataToInt32(const std::unique_ptr<TensorT> &tensor) {
}
return RET_OK;
}
STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, const schema::MetaGraphT &graph) {
bool channel_at_first = true;
int channel_cnt = -1;
auto used_nodes_idx = GetLinkedPostIdx(graph, tensor_index);
if (used_nodes_idx.size() != 1) {
MS_LOG(ERROR) << "Tensor is used by nodes more than one";
return RET_ERROR;
}
auto &used_node = graph.nodes.at(used_nodes_idx.front());
auto &primitive = used_node->primitive;
int input_index = GetTensorInputIndexInCNode(tensor_index, *used_node);
quant::CalQuantAssitInfo(*primitive, tensor->dims, input_index, &channel_at_first, &channel_cnt);
auto *raw_datas = reinterpret_cast<float *>(tensor->data.data());
ShapeVector dims;
std::transform(tensor->dims.begin(), tensor->dims.end(), std::back_inserter(dims),
[&](int32_t dim) { return (int64_t)dim; });
auto channels = quant::CalChannels(dims, channel_cnt, &channel_at_first);
if (channels == 0) {
MS_LOG(ERROR) << "channels is zero";
return RET_ERROR;
}
int32_t dst_dtype = tensor->quantParams.front()->dstDtype == kNumberTypeInt32 ? kNumberTypeInt32 : kNumberTypeInt8;
size_t elem_count = tensor->data.size() / sizeof(float);
size_t data_size = dst_dtype == kNumberTypeInt32 ? elem_count * sizeof(int32_t) : elem_count * sizeof(int8_t);
std::vector<int8_t> dst_data(data_size);
size_t one_filter_size = elem_count / channels;
for (int i = 0; i < channels; i++) {
// do quantization
for (uint32_t j = 0; j < one_filter_size; j++) {
auto index = j + i * one_filter_size;
if (!channel_at_first) {
index = j * channels + i;
}
MS_ASSERT(index < elem_count);
float raw_data = raw_datas[index];
if (tensor->quantParams.at(i)->dstDtype == kNumberTypeInt32) {
auto quant_data = (int32_t)std::round(raw_datas[i] / tensor->quantParams.at(i)->scale);
auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
dst_data_int32[index] = quant_data;
} else {
auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
dst_data[index] = quant_data;
}
}
}
tensor->data.clear();
tensor->data.resize(data_size);
tensor->dataType = dst_dtype;
if (memcpy_s(tensor->data.data(), data_size, dst_data.data(), data_size) != EOK) {
MS_LOG(ERROR) << "memcpy_s failed";
return RET_ERROR;
}
return RET_OK;
}
} // namespace
STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
@ -133,8 +191,13 @@ STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
continue;
}
if (tensor->quantParams.size() != 1) { // perchannel
MS_LOG(ERROR) << "perchannel do quant is not supported yet";
return RET_ERROR;
status = ComputeQuantTensorPerChannel(tensor.get(), index, *graph);
if (status != RET_OK) {
MS_LOG(ERROR) << "compute tensor to int8 prechannel failed.";
return RET_ERROR;
}
index++;
continue;
}
// perlayer
auto &quantParam = tensor->quantParams.front();

View File

@ -52,6 +52,12 @@ STATUS ConcatQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaG
MS_ASSERT(narrow_range == quantParam->narrowRange);
MS_ASSERT(num_bits == quantParam->numBits);
}
if (in_quant_param->max < in_quant_param->min) {
MS_LOG(DEBUG) << "Input quant param is invalid for propogator";
return RET_ERROR;
}
if (min_min > in_quant_param->min) {
min_min = in_quant_param->min;
}

View File

@ -35,23 +35,22 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra
return RET_OK;
}
auto &input_quant_param = input_tensor->quantParams.at(0);
auto &weight_quant_param = weight_tensor->quantParams.at(0);
if (bias_tensor->quantParams.empty()) {
auto tmp_quant_param = std::make_unique<schema::QuantParamT>();
bias_tensor->quantParams.emplace_back(std::move(tmp_quant_param));
std::vector<std::unique_ptr<schema::QuantParamT>> bias_quant_params;
for (auto &weight_quant_param : weight_tensor->quantParams) {
auto bias_quant_param = std::make_unique<schema::QuantParamT>();
bias_quant_param->min = 0.0;
bias_quant_param->max = 0.0;
bias_quant_param->dstDtype = kNumberTypeInt32;
bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
bias_quant_param->zeroPoint = 0;
if (bias_quant_param->inited) {
bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
}
bias_quant_param->roundType = 1;
bias_quant_param->multiplier = 1;
bias_quant_params.emplace_back(std::move(bias_quant_param));
}
auto &bias_quant_param = bias_tensor->quantParams.front();
bias_quant_param->min = 0.0;
bias_quant_param->max = 0.0;
bias_quant_param->dstDtype = kNumberTypeInt32;
bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
bias_quant_param->zeroPoint = 0;
if (bias_quant_param->inited) {
bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
}
bias_quant_param->roundType = 1;
bias_quant_param->multiplier = 1;
bias_tensor->quantParams = std::move(bias_quant_params);
}
for (auto &quantParam : bias_tensor->quantParams) {
quantParam->dstDtype = TypeId::kNumberTypeInt32;

View File

@ -1042,4 +1042,32 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
}
}
void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
bool *channel_at_first, int *channel_cnt) {
if (primitive.value.type == schema::PrimitiveType_MatMul && static_cast<int>(shapes.size()) == 2) {
auto matmul_prim = primitive.value.AsMatMul();
MS_ASSERT(matmul_prim != nullptr);
*channel_at_first = index != 1 || matmul_prim->transpose_b;
} else if (primitive.value.type == schema::PrimitiveType_LSTM) {
if (index == 1 || index == 2) {
if (shapes.size() != 3) {
MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
} else {
*channel_cnt = shapes[0] * shapes[1];
}
} else if (index == 3) {
if (shapes.size() != 2) {
MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
} else {
auto tensor_elem_cnt = shapes[0] * shapes[1];
if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) {
*channel_cnt = 4;
}
}
} else {
MS_LOG(WARNING) << "unexpected index of lstm: " << index;
}
}
}
} // namespace mindspore::lite::quant

View File

@ -120,6 +120,9 @@ int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first
void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
int *channel_cnt);
void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
bool *channel_at_first, int *channel_cnt);
template <typename T>
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
MS_ASSERT(quantParam != nullptr);

View File

@ -69,17 +69,19 @@ int ConvertInputQuantParam(const PrimitivePtr &prim, bool narrow_range, int32_t
quant_param.min = FLT_MAX;
quant_param.max = FLT_MIN;
for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) {
quant_param.min = (*(minBuf) < quant_param.min) ? (*minBuf) : quant_param.min;
quant_param.max = (*(maxBuf) > quant_param.max) ? (*maxBuf) : quant_param.max;
schema::QuantParamT tmp_quant_param;
tmp_quant_param.min = *minBuf;
tmp_quant_param.max = *maxBuf;
auto ret =
lite::quant::CalQuantizationParams(&tmp_quant_param, tmp_quant_param.min, tmp_quant_param.max, true, numbits);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Can't calculate quant parameters";
return ret;
}
quants.emplace_back(tmp_quant_param);
minBuf++;
maxBuf++;
}
auto ret = lite::quant::CalQuantizationParams(&quant_param, quant_param.min, quant_param.max, true, numbits);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Can't calculate quant parameters";
return ret;
}
quants.emplace_back(quant_param);
quant_param_holder->set_input_quant_param(1, quants);
}
return lite::RET_OK;