!15024 [MS][LITE] Support perchannel

From: @cjh9368 Reviewed-by: @hangangqiang,@zhanghaibo5 Signed-off-by: @hangangqiang
2021-04-22 14:15:28 +08:00 · 2021-04-22 14:15:28 +08:00 · 2396c76fe0
parent 4c936f473b 33ab40af78
commit 2396c76fe0
9 changed files with 140 additions and 27 deletions
--- a/mindspore/lite/test/models_mindspore.cfg
+++ b/mindspore/lite/test/models_mindspore.cfg
@ -2,3 +2,4 @@ deeplabv3.r1.1.mindir 1.5
 mobilenetv2.r1.1.mindir 0.5
 ssd.r1.1.mindir 0.5
 ssd_ghostnet.r1.1.mindir 2.0
+lenet_quant.mindir 0.5
--- a/mindspore/lite/tools/common/node_util.cc
+++ b/mindspore/lite/tools/common/node_util.cc
@ -382,6 +382,16 @@ STATUS NodeInferShpae(const schema::CNodeT &node, const std::vector<Tensor *> &i
  return ret;
 }

+size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode) {
+  size_t ret = -1;
+  for (size_t i = 0; i < cnode.inputIndex.size(); i++) {
+    if (cnode.inputIndex.at(i) == tensor_index) {
+      ret = i;
+    }
+  }
+  return ret;
+}
+
 STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) {
  if (tensor == nullptr) {
    MS_LOG(ERROR) << "tensor is null";
--- a/mindspore/lite/tools/common/node_util.h
+++ b/mindspore/lite/tools/common/node_util.h
@ -71,11 +71,12 @@ std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes();
 std::vector<schema::PrimitiveType> Getfp32FullOpList();

 std::vector<schema::PrimitiveType> GetUint8NhwcOpList();
-
 std::vector<schema::PrimitiveType> GetInt8OpList();

 const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb);

+size_t GetTensorInputIndexInCNode(const uint32_t &tensor_index, const schema::CNodeT &cnode);
+
 class NodeUtils {
 public:
  static STATUS ConvertDims(schema::Format src_format, const std::vector<int32_t> &src_dims, schema::Format dst_format,
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/tensor_quant_pass.cc
@ -20,6 +20,8 @@
 #include "tools/converter/converter_context.h"
 #include "tools/converter/quantizer/quantize_util.h"
 #include "tools/common/tensor_util.h"
+#include "tools/common/graph_util.h"
+#include "tools/common/node_util.h"

 namespace mindspore::lite {
 namespace {
@ -112,6 +114,62 @@ STATUS ComputeDataToInt32(const std::unique_ptr<TensorT> &tensor) {
  }
  return RET_OK;
 }
+
+STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, const schema::MetaGraphT &graph) {
+  bool channel_at_first = true;
+  int channel_cnt = -1;
+  auto used_nodes_idx = GetLinkedPostIdx(graph, tensor_index);
+  if (used_nodes_idx.size() != 1) {
+    MS_LOG(ERROR) << "Tensor is used by nodes more than one";
+    return RET_ERROR;
+  }
+  auto &used_node = graph.nodes.at(used_nodes_idx.front());
+  auto &primitive = used_node->primitive;
+  int input_index = GetTensorInputIndexInCNode(tensor_index, *used_node);
+  quant::CalQuantAssitInfo(*primitive, tensor->dims, input_index, &channel_at_first, &channel_cnt);
+
+  auto *raw_datas = reinterpret_cast<float *>(tensor->data.data());
+  ShapeVector dims;
+  std::transform(tensor->dims.begin(), tensor->dims.end(), std::back_inserter(dims),
+                 [&](int32_t dim) { return (int64_t)dim; });
+  auto channels = quant::CalChannels(dims, channel_cnt, &channel_at_first);
+  if (channels == 0) {
+    MS_LOG(ERROR) << "channels is zero";
+    return RET_ERROR;
+  }
+  int32_t dst_dtype = tensor->quantParams.front()->dstDtype == kNumberTypeInt32 ? kNumberTypeInt32 : kNumberTypeInt8;
+  size_t elem_count = tensor->data.size() / sizeof(float);
+  size_t data_size = dst_dtype == kNumberTypeInt32 ? elem_count * sizeof(int32_t) : elem_count * sizeof(int8_t);
+  std::vector<int8_t> dst_data(data_size);
+  size_t one_filter_size = elem_count / channels;
+  for (int i = 0; i < channels; i++) {
+    // do quantization
+    for (uint32_t j = 0; j < one_filter_size; j++) {
+      auto index = j + i * one_filter_size;
+      if (!channel_at_first) {
+        index = j * channels + i;
+      }
+      MS_ASSERT(index < elem_count);
+      float raw_data = raw_datas[index];
+      if (tensor->quantParams.at(i)->dstDtype == kNumberTypeInt32) {
+        auto quant_data = (int32_t)std::round(raw_datas[i] / tensor->quantParams.at(i)->scale);
+        auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data());
+        dst_data_int32[index] = quant_data;
+      } else {
+        auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get());
+        dst_data[index] = quant_data;
+      }
+    }
+  }
+  tensor->data.clear();
+  tensor->data.resize(data_size);
+  tensor->dataType = dst_dtype;
+  if (memcpy_s(tensor->data.data(), data_size, dst_data.data(), data_size) != EOK) {
+    MS_LOG(ERROR) << "memcpy_s failed";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
 }  // namespace

 STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
@ -133,8 +191,13 @@ STATUS TensorQuantPass::Run(schema::MetaGraphT *graph) {
      continue;
    }
    if (tensor->quantParams.size() != 1) {  // perchannel
-      MS_LOG(ERROR) << "perchannel do quant is not supported yet";
-      return RET_ERROR;
+      status = ComputeQuantTensorPerChannel(tensor.get(), index, *graph);
+      if (status != RET_OK) {
+        MS_LOG(ERROR) << "compute tensor to int8 prechannel failed.";
+        return RET_ERROR;
+      }
+      index++;
+      continue;
    }
    // perlayer
    auto &quantParam = tensor->quantParams.front();
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/concat_quant_param_propogator.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/concat_quant_param_propogator.cc
@ -52,6 +52,12 @@ STATUS ConcatQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaG
        MS_ASSERT(narrow_range == quantParam->narrowRange);
        MS_ASSERT(num_bits == quantParam->numBits);
      }
+
+      if (in_quant_param->max < in_quant_param->min) {
+        MS_LOG(DEBUG) << "Input quant param is invalid for propogator";
+        return RET_ERROR;
+      }
+
      if (min_min > in_quant_param->min) {
        min_min = in_quant_param->min;
      }
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
@ -35,23 +35,22 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra
        return RET_OK;
      }
      auto &input_quant_param = input_tensor->quantParams.at(0);
-      auto &weight_quant_param = weight_tensor->quantParams.at(0);
-
-      if (bias_tensor->quantParams.empty()) {
-        auto tmp_quant_param = std::make_unique<schema::QuantParamT>();
-        bias_tensor->quantParams.emplace_back(std::move(tmp_quant_param));
+      std::vector<std::unique_ptr<schema::QuantParamT>> bias_quant_params;
+      for (auto &weight_quant_param : weight_tensor->quantParams) {
+        auto bias_quant_param = std::make_unique<schema::QuantParamT>();
+        bias_quant_param->min = 0.0;
+        bias_quant_param->max = 0.0;
+        bias_quant_param->dstDtype = kNumberTypeInt32;
+        bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
+        bias_quant_param->zeroPoint = 0;
+        if (bias_quant_param->inited) {
+          bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
+        }
+        bias_quant_param->roundType = 1;
+        bias_quant_param->multiplier = 1;
+        bias_quant_params.emplace_back(std::move(bias_quant_param));
      }
-      auto &bias_quant_param = bias_tensor->quantParams.front();
-      bias_quant_param->min = 0.0;
-      bias_quant_param->max = 0.0;
-      bias_quant_param->dstDtype = kNumberTypeInt32;
-      bias_quant_param->inited = input_quant_param->inited && weight_quant_param->inited;
-      bias_quant_param->zeroPoint = 0;
-      if (bias_quant_param->inited) {
-        bias_quant_param->scale = input_quant_param->scale * weight_quant_param->scale;
-      }
-      bias_quant_param->roundType = 1;
-      bias_quant_param->multiplier = 1;
+      bias_tensor->quantParams = std::move(bias_quant_params);
    }
    for (auto &quantParam : bias_tensor->quantParams) {
      quantParam->dstDtype = TypeId::kNumberTypeInt32;
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc
@ -1042,4 +1042,32 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes,
  }
 }

+void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
+                       bool *channel_at_first, int *channel_cnt) {
+  if (primitive.value.type == schema::PrimitiveType_MatMul && static_cast<int>(shapes.size()) == 2) {
+    auto matmul_prim = primitive.value.AsMatMul();
+    MS_ASSERT(matmul_prim != nullptr);
+    *channel_at_first = index != 1 || matmul_prim->transpose_b;
+  } else if (primitive.value.type == schema::PrimitiveType_LSTM) {
+    if (index == 1 || index == 2) {
+      if (shapes.size() != 3) {
+        MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
+      } else {
+        *channel_cnt = shapes[0] * shapes[1];
+      }
+    } else if (index == 3) {
+      if (shapes.size() != 2) {
+        MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
+      } else {
+        auto tensor_elem_cnt = shapes[0] * shapes[1];
+        if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) {
+          *channel_cnt = 4;
+        }
+      }
+    } else {
+      MS_LOG(WARNING) << "unexpected index of lstm: " << index;
+    }
+  }
+}
+
 }  // namespace mindspore::lite::quant
--- a/mindspore/lite/tools/converter/quantizer/quantize_util.h
+++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h
@ -120,6 +120,9 @@ int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first
 void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
                       int *channel_cnt);

+void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
+                       bool *channel_at_first, int *channel_cnt);
+
 template <typename T>
 T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
  MS_ASSERT(quantParam != nullptr);
--- a/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc
@ -69,17 +69,19 @@ int ConvertInputQuantParam(const PrimitivePtr &prim, bool narrow_range, int32_t
    quant_param.min = FLT_MAX;
    quant_param.max = FLT_MIN;
    for (int i = 0; i < filterMinPtr->ElementsNum(); ++i) {
-      quant_param.min = (*(minBuf) < quant_param.min) ? (*minBuf) : quant_param.min;
-      quant_param.max = (*(maxBuf) > quant_param.max) ? (*maxBuf) : quant_param.max;
+      schema::QuantParamT tmp_quant_param;
+      tmp_quant_param.min = *minBuf;
+      tmp_quant_param.max = *maxBuf;
+      auto ret =
+        lite::quant::CalQuantizationParams(&tmp_quant_param, tmp_quant_param.min, tmp_quant_param.max, true, numbits);
+      if (ret != RET_OK) {
+        MS_LOG(ERROR) << "Can't calculate quant parameters";
+        return ret;
+      }
+      quants.emplace_back(tmp_quant_param);
      minBuf++;
      maxBuf++;
    }
-    auto ret = lite::quant::CalQuantizationParams(&quant_param, quant_param.min, quant_param.max, true, numbits);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Can't calculate quant parameters";
-      return ret;
-    }
-    quants.emplace_back(quant_param);
    quant_param_holder->set_input_quant_param(1, quants);
  }
  return lite::RET_OK;