From 5e51fa9fffee44f472a4c85f55481b715e5508e2 Mon Sep 17 00:00:00 2001 From: duzhixing <2823612538@qq.com> Date: Fri, 10 Feb 2023 17:59:10 +0800 Subject: [PATCH] fix matmul bug --- .../kernel/cpu/int8/matmul_base_int8.cc | 34 ++++++++++++++----- .../litert/kernel/cpu/int8/matmul_base_int8.h | 1 + .../config_level0/models_posttraining.cfg | 1 + ...coder_int8_202103.onnx_posttraining.config | 14 ++++++++ 4 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 mindspore/lite/test/config_level0/quant/ml_asr_encoder_int8_202103.onnx_posttraining.config diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.cc b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.cc index ea6ff84f775..666f12f5e2f 100644 --- a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.cc +++ b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.cc @@ -179,18 +179,21 @@ void MatmulBaseInt8CPUKernel::FreeQuantParam() { free(quant_param_); quant_param_ = nullptr; } + + if (save_b_const_ != nullptr) { + free(save_b_const_); + save_b_const_ = nullptr; + } } int MatmulBaseInt8CPUKernel::MallocQuantParam() { auto weight_tensor = in_tensors_.at(1); auto weight_quant_params = weight_tensor->quant_params(); - auto w_shape = weight_tensor->shape(); - MS_CHECK_TRUE_MSG(weight_tensor->shape().size() >= DIMENSION_2D, lite::RET_ERROR, "weight dims should >=2"); - int col = param_->b_transpose_ ? w_shape[w_shape.size() - DIMENSION_2D] : w_shape[w_shape.size() - 1]; + MS_CHECK_TRUE_MSG(weight_quant_params.size() >= 1, lite::RET_ERROR, "weight quant params size should >= 1"); filter_per_channel_ = (weight_quant_params.size() > 1); + channel_num_ = weight_quant_params.size(); - channel_num_ = filter_per_channel_ ? col : 1; const int &init_size = channel_num_; quant_param_ = reinterpret_cast(malloc(sizeof(MatmulQuantParameter))); @@ -348,7 +351,8 @@ void MatmulBaseInt8CPUKernel::FreeTmpBuffer() { } int MatmulBaseInt8CPUKernel::TransferB() { - auto weight_data = reinterpret_cast(in_tensors_.at(1)->data()); + auto weight_data = (save_b_const_ == nullptr) ? reinterpret_cast(in_tensors_.at(1)->data()) + : reinterpret_cast(save_b_const_); CHECK_NULL_RETURN(weight_data); CHECK_NULL_RETURN(b_pack_func_); for (int i = 0; i < param_->batch; i++) { @@ -365,6 +369,10 @@ int MatmulBaseInt8CPUKernel::TransferB() { quant_param_->filter_zp_, bias_ptr_, current_sums, RowMajor, filter_per_channel_); } } + if (save_b_const_ != nullptr) { + free(save_b_const_); + save_b_const_ = nullptr; + } return RET_OK; } @@ -407,7 +415,7 @@ int MatmulBaseInt8CPUKernel::InitBias() { FreeTmpBuffer(); return RET_MEMORY_FAILED; } - bias_ptr_ = reinterpret_cast(bias_tensor->data()); + bias_ptr_ = reinterpret_cast(malloc(bias_tensor->ElementsNum() * sizeof(int))); if (bias_ptr_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; FreeTmpBuffer(); @@ -438,7 +446,15 @@ int MatmulBaseInt8CPUKernel::Prepare() { FreeQuantParam(); return ret; } - + if (!InferShapeDone()) { + if (param_->b_const_) { + auto weight_tensor = in_tensors_.at(1); + CHECK_NULL_RETURN(weight_tensor); + CHECK_NULL_RETURN(weight_tensor->data()); + save_b_const_ = reinterpret_cast(malloc(weight_tensor->ElementsNum() * sizeof(int8_t))); + (void)memcpy(save_b_const_, weight_tensor->data(), weight_tensor->ElementsNum() * sizeof(int8_t)); + } + } return RET_OK; } int MatmulBaseInt8CPUKernel::MatmulReSize() { @@ -462,7 +478,7 @@ int MatmulBaseInt8CPUKernel::ReSize() { return ret; } - if (param_->b_const_ == true) { + if (param_->b_const_) { if (TransferB() != RET_OK) { MS_LOG(ERROR) << "TransferB error"; return RET_ERROR; @@ -509,7 +525,7 @@ int MatmulBaseInt8CPUKernel::Run() { return RunArm64Sdot(); } #endif - if (param_->b_const_ == false) { + if (!param_->b_const_) { if (TransferB() != RET_OK) { MS_LOG(ERROR) << "TransferB error"; return RET_ERROR; diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.h b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.h index a0322dfe714..22402e5e043 100644 --- a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.h +++ b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_base_int8.h @@ -83,6 +83,7 @@ class MatmulBaseInt8CPUKernel : public LiteKernel { int8_t *batch_weight_ptr_ = nullptr; int8_t *batch_b_ptr_ = nullptr; int8_t *batch_c_ptr_ = nullptr; + int8_t *save_b_const_ = nullptr; int *batch_sums_ = nullptr; int row_tile_ = C4NUM; int col_tile_ = C4NUM; diff --git a/mindspore/lite/test/config_level0/models_posttraining.cfg b/mindspore/lite/test/config_level0/models_posttraining.cfg index e6715f78481..640074bb95d 100644 --- a/mindspore/lite/test/config_level0/models_posttraining.cfg +++ b/mindspore/lite/test/config_level0/models_posttraining.cfg @@ -1,3 +1,4 @@ +ml_asr_encoder_int8_202103.onnx 17 43591176 ml_face_mnet 86 832744 ml_face_landmark_2 0.8 472112 mobilenet.tflite 0.4 26040 diff --git a/mindspore/lite/test/config_level0/quant/ml_asr_encoder_int8_202103.onnx_posttraining.config b/mindspore/lite/test/config_level0/quant/ml_asr_encoder_int8_202103.onnx_posttraining.config new file mode 100644 index 00000000000..42fdfaa6850 --- /dev/null +++ b/mindspore/lite/test/config_level0/quant/ml_asr_encoder_int8_202103.onnx_posttraining.config @@ -0,0 +1,14 @@ +[common_quant_param] +quant_type=FULL_QUANT +bit_num=8 + +[data_preprocess_param] +calibrate_path=featinput:/home/workspace/mindspore_dataset/mslite/quantTraining/ml_asr_encoder_int8_202103_calibration_data +calibrate_size=1 +input_type=BIN + + +[full_quant_param] +activation_quant_method=MAX_MIN +bias_correction=true +per_channel=true