From 45268c528954b1f7bb0cf4a24e171fa7e83a6d8d Mon Sep 17 00:00:00 2001 From: xutianchun Date: Tue, 18 Aug 2020 21:24:16 +0800 Subject: [PATCH] PerChannel Quantization --- .../lite/tools/anf_exporter/anf_exporter.cc | 4 +- .../quantizer/post_training_quantizer.cc | 53 ++++++++++++++++--- .../quantizer/post_training_quantizer.h | 5 +- .../converter/quantizer/quantize_util.cc | 2 +- 4 files changed, 51 insertions(+), 13 deletions(-) diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc index 97b178982f6..ed0619cfc32 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc @@ -134,7 +134,7 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr &me for (auto input_quant_param : input_quant_params[i]) { std::unique_ptr input_quant_param_ptr = std::make_unique(input_quant_param); - MS_LOG(DEBUG) << "[input]node: " << dst_node->name << " scale: " << input_quant_param_ptr->scale + MS_LOG(DEBUG) << "[input][" << i << "]node: " << dst_node->name << " scale: " << input_quant_param_ptr->scale << " zp: " << input_quant_param_ptr->zeroPoint; tensor_input->quantParams.emplace_back(std::move(input_quant_param_ptr)); } @@ -152,7 +152,7 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr &me if (tensor_output->quantParams.empty()) { std::unique_ptr output_quant_param_ptr = std::make_unique(output_quant_param); - MS_LOG(DEBUG) << "[input]node: " << dst_node->name << " scale: " << output_quant_param_ptr->scale + MS_LOG(DEBUG) << "[output]node: " << dst_node->name << " scale: " << output_quant_param_ptr->scale << " zp: " << output_quant_param_ptr->zeroPoint; tensor_output->quantParams.emplace_back(std::move(output_quant_param_ptr)); } diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc index 8ff8b111942..8538c6cf9bf 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc @@ -536,7 +536,7 @@ STATUS PostTrainingQuantizer::DoQuantOutput(double scale, int zeropoint, struct } STATUS PostTrainingQuantizer::DoWeightQuant(AnfNodePtr weight, std::shared_ptr primitiveT_value, - bool depthwise) { + bool perchanel, bool depthwise) { // const vector dims = filter->dims; // perlayer if (!weight->isa()) { @@ -544,9 +544,17 @@ STATUS PostTrainingQuantizer::DoWeightQuant(AnfNodePtr weight, std::shared_ptr

(weight); + if (parameter == nullptr) { + MS_LOG(ERROR) << weight->fullname_with_scope() << " can not cast to Parameter"; + return RET_ERROR; + } ParamValueLitePtr paramValue = std::dynamic_pointer_cast(parameter->default_param()); + if (paramValue == nullptr) { + MS_LOG(ERROR) << weight->fullname_with_scope() << " can not get value"; + return RET_ERROR; + } auto status = QuantFilter(paramValue, primitiveT_value, QuantType_PostTraining, quant_max, quant_min, bit_num, - per_channel_, depthwise); + perchanel, depthwise); if (status != RET_OK) { MS_LOG(ERROR) << "QuantFilter failed: " << status; return status; @@ -690,11 +698,29 @@ STATUS PostTrainingQuantizer::QuantNode() { auto op_name = cnode->fullname_with_scope(); auto op_type = primitiveT_value->GetPrimitiveT()->value.type; MS_LOG(INFO) << "OpName: " << op_name; - if (op_type != PrimitiveType_Conv2D && op_type != PrimitiveType_DepthwiseConv2D) { + if (op_type != PrimitiveType_Conv2D && op_type != PrimitiveType_DepthwiseConv2D && + op_type != PrimitiveType_FullConnection) { for (size_t i = 1; i < cnode->inputs().size(); i++) { auto input_node = cnode->input(i); if (!input_node->isa()) { - MS_LOG(WARNING) << "node: " << cnode_name << " input " << i << " not a cnode"; + MS_LOG(DEBUG) << "node: " << cnode_name << " input " << i << " not a cnode"; + // get dtype + auto abstractBase = input_node->abstract(); + if (abstractBase == nullptr) { + MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << input_node->fullname_with_scope(); + return RET_ERROR; + } + if (!utils::isa(abstractBase)) { + MS_LOG(ERROR) << "Abstract of parameter should be anstract tensor, " << input_node->fullname_with_scope(); + return RET_ERROR; + } + auto abstractTensor = utils::cast(abstractBase); + if (abstractTensor->element()->GetTypeTrack()->type_id() == kNumberTypeFloat32) { + MS_LOG(DEBUG) << "this parameter do quant"; + DoWeightQuant(input_node, primitiveT_value, false, false); + } else { + MS_LOG(DEBUG) << "this parameter no need to do quant"; + } continue; } auto input_cnode = std::dynamic_pointer_cast(input_node); @@ -704,8 +730,15 @@ STATUS PostTrainingQuantizer::QuantNode() { << " PrimitiveTValue is null"; continue; } - for (auto &quant_param : input_cnode_primitiveT_value->GetOutputQuantParams()) { - primitiveT_value->AddInputQuantParam(quant_param); + if (!input_cnode_primitiveT_value->GetOutputQuantParams().empty()) { + for (auto &quant_param : input_cnode_primitiveT_value->GetOutputQuantParams()) { + primitiveT_value->AddInputQuantParam(quant_param); + } + } else { + // do input quant + double scale = input_scale[cnode]; + int32_t zp = input_zero_point[cnode]; + DoQuantInput(scale, zp, &input_min_max[cnode], primitiveT_value); } } } else { @@ -715,8 +748,12 @@ STATUS PostTrainingQuantizer::QuantNode() { DoQuantInput(scale, convInputzeropoint, &input_min_max[cnode], primitiveT_value); // do weight quant auto weight = cnode->input(2); - bool depthwise = op_type == PrimitiveType_DeDepthwiseConv2D; - DoWeightQuant(weight, primitiveT_value, depthwise); + bool depthwise = op_type == PrimitiveType_DepthwiseConv2D; + bool perchannel = per_channel_; + if (op_type == PrimitiveType_FullConnection) { + perchannel = false; + } + DoWeightQuant(weight, primitiveT_value, perchannel, depthwise); // do bias quant if (cnode->inputs().size() == 4) { auto bias = cnode->input(3); diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h index 09780440e2e..587689c5c20 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h @@ -60,7 +60,7 @@ struct ConfigParam { class PostTrainingQuantizer : public Quantizer { public: PostTrainingQuantizer(FuncGraphPtr graph, std::string path, int bit_num, TypeId target_type = kNumberTypeInt8, - bool per_channel = false); + bool per_channel = true); STATUS DoQuantize(FuncGraphPtr funcGraph) override; @@ -96,7 +96,8 @@ class PostTrainingQuantizer : public Quantizer { STATUS DoQuantInput(double scale, int32_t zeropoint, struct MaxMin *max_min, std::shared_ptr); STATUS DoQuantOutput(double scale, int32_t zeropoint, struct MaxMin *max_min, std::shared_ptr); - STATUS DoWeightQuant(AnfNodePtr weight, std::shared_ptr primitiveT_value, bool depthwise); + STATUS DoWeightQuant(AnfNodePtr weight, std::shared_ptr primitiveT_value, bool perchannel, + bool depthwise); STATUS DoBiasQuant(AnfNodePtr bias, std::shared_ptr primitiveT_value); }; diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index f66b4bfcdfd..50bf9fa11f7 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -100,7 +100,7 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const { schema::PrimitiveType_Conv2D, schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_Add, schema::PrimitiveType_Pooling, schema::PrimitiveType_Concat, /*schema::PrimitiveType_SoftMax,*/ - schema::PrimitiveType_Reshape, /*schema::PrimitiveType_FullConnection,*/ + schema::PrimitiveType_Reshape, schema::PrimitiveType_FullConnection, schema::PrimitiveType_MatMul, schema::PrimitiveType_Activation}; return IsContain(uint8OpList, type);