forked from mindspore-Ecosystem/mindspore
[MS][LITE] support perchannel mindir quant model
This commit is contained in:
parent
7cba27ba22
commit
33ab40af78
|
@ -2,3 +2,4 @@ deeplabv3.r1.1.mindir 1.5
|
|||
mobilenetv2.r1.1.mindir 0.5
|
||||
ssd.r1.1.mindir 0.5
|
||||
ssd_ghostnet.r1.1.mindir 2.0
|
||||
lenet_quant.mindir 0.5
|
||||
|
|
|
@ -382,6 +382,16 @@ STATUS NodeInferShpae(const schema::CNodeT &node, const std::vector<Tensor *> &i
|
|||
return ret;
|
||||
}
|
||||
|
||||
size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode) {
|
||||
size_t ret = -1;
|
||||
for (size_t i = 0; i < cnode.inputIndex.size(); i++) {
|
||||
if (cnode.inputIndex.at(i) == tensor_index) {
|
||||
ret = i;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) {
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "tensor is null";
|
||||
|
|
|
@ -71,11 +71,12 @@ std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes();
|
|||
std::vector<schema::PrimitiveType> Getfp32FullOpList();
|
||||
|
||||
std::vector<schema::PrimitiveType> GetUint8NhwcOpList();
|
||||
|
||||
std::vector<schema::PrimitiveType> GetInt8OpList();
|
||||
|
||||
const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb);
|
||||
|
||||
size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode);
|
||||
|
||||
class NodeUtils {
|
||||
public:
|
||||
static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format,
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "tools/converter/converter_context.h"
|
||||
#include "tools/converter/quantizer/quantize_util.h"
|
||||
#include "tools/common/tensor_util.h"
|
||||
#include "tools/common/graph_util.h"
|
||||
#include "tools/common/node_util.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
|
@ -112,6 +114,62 @@ STATUS ComputeDataToInt32(const std::unique_ptr<TensorT> &tensor) {
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, const schema::MetaGraphT &graph) {
|
||||
bool channel_at_first = true;
|
||||
int channel_cnt = -1;
|
||||
auto used_nodes_idx = GetLinkedPostIdx(graph, tensor_index);
|
||||
if (used_nodes_idx.size() != 1) {
|
||||
MS_LOG(ERROR) << "Tensor is used by nodes more than one";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto &used_node = graph.nodes.at(used_nodes_idx.front());
|
||||
auto &primitive = used_node->primitive;
|
||||
int input_index = GetTensorInputIndexInCNode(tensor_index, *used_node);
|
||||
quant::CalQuantAssitInfo(*primitive, tensor->dims, input_index, &channel_at_first, &channel_cnt);
|
||||
|
||||
auto *raw_datas = reinterpret_cast<float *>(tensor->data.data());
|
||||
ShapeVector dims;
|
||||
std::transform(tensor->dims.begin(), tensor->dims.end(), std::back_inserter(dims),
|
||||
[&](int32_t dim) { return (int64_t)dim; });
|
||||
auto channels = quant::CalChannels(dims, channel_cnt, &channel_at_first);
|
||||
if (channels == 0) {
|
||||
MS_LOG(ERROR) << "channels is zero";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int32_t dst_dtype = tensor->quantParams.front()->dstDtype == kNumberTypeInt32 ? kNumberTypeInt32 : kNumberTypeInt8;
|
||||
size_t elem_count = tensor->data.size() / sizeof(float);
|
||||
size_t data_size = dst_dtype == kNumberTypeInt32 ? elem_count * sizeof(int32_t) : elem_count * sizeof(int8_t);
|
||||
std::vector<int8_t> dst_data(data_size);
|
||||
size_t one_filter_size = elem_count / channels;
|
||||
for (int i = 0; i < channels; i++) {
|
||||
// do quantization
|
||||
for (uint32_t j = 0; j < one_filter_size; j++) {
|
||||
auto index = j + i * one_filter_size;
|
||||
if (!channel_at_first) {
|
||||
index = j * channels + i;
|
||||
}
|
||||
MS_ASSERT(index < elem_count);
|
||||
float raw_data = raw_datas[index];
|
||||
if (tensor->quantParams.at(i)->dstDtype == kNumberTypeInt32) {
|
||||
auto quant_data = (int32_t)std::round(raw_datas[i] / tensor->quantParams.at(i)->scale);
|
||||
auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
|
||||
dst_data_int32[index] = quant_data;
|
||||
} else {
|
||||
auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
|
||||
dst_data[index] = quant_data;
|
||||
}
|
||||
}
|
||||
}
|
||||
tensor->data.clear();
|
||||
tensor->data.resize(data_size);
|
||||
tensor->dataType = dst_dtype;
|
||||
if (memcpy_s(tensor->data.data(), data_size, dst_data.data(), data_size) != EOK) {
|
||||
MS_LOG(ERROR) << "memcpy_s failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
|
||||
|
@ -133,8 +191,13 @@ STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
|
|||
continue;
|
||||
}
|
||||
if (tensor->quantParams.size() != 1) { // perchannel
|
||||
MS_LOG(ERROR) << "perchannel do quant is not supported yet";
|
||||
return RET_ERROR;
|
||||
status = ComputeQuantTensorPerChannel(tensor.get(), index, *graph);
|
||||
if (status != RET_OK) {
|
||||
MS_LOG(ERROR) << "compute tensor to int8 prechannel failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
// perlayer
|
||||
auto &quantParam = tensor->quantParams.front();
|
||||
|
|
|
@ -52,6 +52,12 @@ STATUS ConcatQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaG
|
|||
MS_ASSERT(narrow_range == quantParam->narrowRange);
|
||||
MS_ASSERT(num_bits == quantParam->numBits);
|
||||
}
|
||||
|
||||
if (in_quant_param->max < in_quant_param->min) {
|
||||
MS_LOG(DEBUG) << "Input quant param is invalid for propogator";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (min_min > in_quant_param->min) {
|
||||
min_min = in_quant_param->min;
|
||||
}
|
||||
|
|
|
@ -35,23 +35,22 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra
|
|||
return RET_OK;
|
||||
}
|
||||
auto &input_quant_param = input_tensor->quantParams.at(0);
|
||||
auto &weight_quant_param = weight_tensor->quantParams.at(0);
|
||||
|
||||
if (bias_tensor->quantParams.empty()) {
|
||||
auto tmp_quant_param = std::make_unique<schema::QuantParamT>();
|
||||
bias_tensor->quantParams.emplace_back(std::move(tmp_quant_param));
|
||||
std::vector<std::unique_ptr<schema::QuantParamT>> bias_quant_params;
|
||||
for (auto &weight_quant_param : weight_tensor->quantParams) {
|
||||
auto bias_quant_param = std::make_unique<schema::QuantParamT>();
|
||||
bias_quant_param->min = 0.0;
|
||||
bias_quant_param->max = 0.0;
|
||||
bias_quant_param->dstDtype = kNumberTypeInt32;
|
||||
bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
|
||||
bias_quant_param->zeroPoint = 0;
|
||||
if (bias_quant_param->inited) {
|
||||
bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
|
||||
}
|
||||
bias_quant_param->roundType = 1;
|
||||
bias_quant_param->multiplier = 1;
|
||||
bias_quant_params.emplace_back(std::move(bias_quant_param));
|
||||
}
|
||||
auto &bias_quant_param = bias_tensor->quantParams.front();
|
||||
bias_quant_param->min = 0.0;
|
||||
bias_quant_param->max = 0.0;
|
||||
bias_quant_param->dstDtype = kNumberTypeInt32;
|
||||
bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
|
||||
bias_quant_param->zeroPoint = 0;
|
||||
if (bias_quant_param->inited) {
|
||||
bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
|
||||
}
|
||||
bias_quant_param->roundType = 1;
|
||||
bias_quant_param->multiplier = 1;
|
||||
bias_tensor->quantParams = std::move(bias_quant_params);
|
||||
}
|
||||
for (auto &quantParam : bias_tensor->quantParams) {
|
||||
quantParam->dstDtype = TypeId::kNumberTypeInt32;
|
||||
|
|
|
@ -1042,4 +1042,32 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
|
|||
}
|
||||
}
|
||||
|
||||
void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
|
||||
bool *channel_at_first, int *channel_cnt) {
|
||||
if (primitive.value.type == schema::PrimitiveType_MatMul && static_cast<int>(shapes.size()) == 2) {
|
||||
auto matmul_prim = primitive.value.AsMatMul();
|
||||
MS_ASSERT(matmul_prim != nullptr);
|
||||
*channel_at_first = index != 1 || matmul_prim->transpose_b;
|
||||
} else if (primitive.value.type == schema::PrimitiveType_LSTM) {
|
||||
if (index == 1 || index == 2) {
|
||||
if (shapes.size() != 3) {
|
||||
MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
|
||||
} else {
|
||||
*channel_cnt = shapes[0] * shapes[1];
|
||||
}
|
||||
} else if (index == 3) {
|
||||
if (shapes.size() != 2) {
|
||||
MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
|
||||
} else {
|
||||
auto tensor_elem_cnt = shapes[0] * shapes[1];
|
||||
if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) {
|
||||
*channel_cnt = 4;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "unexpected index of lstm: " << index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite::quant
|
||||
|
|
|
@ -120,6 +120,9 @@ int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first
|
|||
void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
|
||||
int *channel_cnt);
|
||||
|
||||
void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
|
||||
bool *channel_at_first, int *channel_cnt);
|
||||
|
||||
template <typename T>
|
||||
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
|
||||
MS_ASSERT(quantParam != nullptr);
|
||||
|
|
|
@ -69,17 +69,19 @@ int ConvertInputQuantParam(const PrimitivePtr &prim, bool narrow_range, int32_t
|
|||
quant_param.min = FLT_MAX;
|
||||
quant_param.max = FLT_MIN;
|
||||
for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) {
|
||||
quant_param.min = (*(minBuf) < quant_param.min) ? (*minBuf) : quant_param.min;
|
||||
quant_param.max = (*(maxBuf) > quant_param.max) ? (*maxBuf) : quant_param.max;
|
||||
schema::QuantParamT tmp_quant_param;
|
||||
tmp_quant_param.min = *minBuf;
|
||||
tmp_quant_param.max = *maxBuf;
|
||||
auto ret =
|
||||
lite::quant::CalQuantizationParams(&tmp_quant_param, tmp_quant_param.min, tmp_quant_param.max, true, numbits);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Can't calculate quant parameters";
|
||||
return ret;
|
||||
}
|
||||
quants.emplace_back(tmp_quant_param);
|
||||
minBuf++;
|
||||
maxBuf++;
|
||||
}
|
||||
auto ret = lite::quant::CalQuantizationParams(&quant_param, quant_param.min, quant_param.max, true, numbits);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Can't calculate quant parameters";
|
||||
return ret;
|
||||
}
|
||||
quants.emplace_back(quant_param);
|
||||
quant_param_holder->set_input_quant_param(1, quants);
|
||||
}
|
||||
return lite::RET_OK;
|
||||
|
|
Loading…
Reference in New Issue