From 90a53bb79b3700009d7ffd7476928c0b9adc79e6 Mon Sep 17 00:00:00 2001 From: fuzhiye Date: Thu, 20 Aug 2020 09:50:42 +0800 Subject: [PATCH] remove useless comment --- .../src/runtime/kernel/arm/base/matrix.cc | 8 +++-- .../lite/src/runtime/kernel/arm/base/matrix.h | 2 +- .../kernel/arm/fp16/convolution_3x3_fp16.cc | 10 ++---- .../kernel/arm/fp16/convolution_fp16.cc | 3 -- .../kernel/arm/fp16/convolution_sw_fp16.cc | 13 ++++--- .../kernel/arm/fp16/convolution_sw_fp16.h | 8 ++--- .../arm/fp16/convolution_winograd_fp16.cc | 32 +++++++++++------ .../arm/fp16/convolution_winograd_fp16.h | 2 +- .../runtime/kernel/arm/fp32/convolution.cc | 11 ++---- .../kernel/arm/fp32/convolution_3x3.cc | 6 ---- .../kernel/arm/fp32/convolution_depthwise.cc | 6 +++- .../arm/fp32/convolution_slidewindow.cc | 10 +++--- .../kernel/arm/fp32/convolution_slidewindow.h | 8 ++--- .../kernel/arm/fp32/convolution_winograd.cc | 34 ++++++++++++------- .../kernel/arm/fp32/convolution_winograd.h | 4 +-- .../src/runtime/kernel/arm/fp32/softmax.cc | 4 ++- .../src/runtime/kernel/arm/fp32/softmax.h | 2 ++ .../kernel/arm/int8/convolution_3x3_int8.cc | 6 ---- .../arm/int8/convolution_depthwise_int8.cc | 17 +++++++--- .../kernel/arm/int8/convolution_int8.cc | 16 +++------ .../arm/int8/deconvolution_depthwise_int8.cc | 5 ++- .../runtime/kernel/arm/int8/pooling_int8.cc | 2 +- .../runtime/kernel/arm/nnacl/int8/conv_int8.c | 16 +++++++-- 23 files changed, 123 insertions(+), 102 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matrix.cc b/mindspore/lite/src/runtime/kernel/arm/base/matrix.cc index 2c26d1fa884..f84cf92a689 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/matrix.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/matrix.cc @@ -20,8 +20,12 @@ namespace mindspore::kernel { Matrix *TransformMatrixGenerator(int m, int k) { auto matrix = new Matrix; - auto aa = malloc(m * k * sizeof(float)); - matrix->SetData(aa); + auto data = malloc(m * k * sizeof(float)); + if (data == nullptr) { + MS_LOG(ERROR) << "Malloc matrix data failed."; + return nullptr; + } + matrix->SetData(data); matrix->SetNum(m, k); return matrix; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/matrix.h b/mindspore/lite/src/runtime/kernel/arm/base/matrix.h index f5265728aef..e3b94e7819d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/matrix.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/matrix.h @@ -57,7 +57,7 @@ class Matrix { int GetK() { return this->k_; } protected: - void *data_; + void *data_ = nullptr; std::vector shape_; std::vector stride_; int m_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc index 88fc1771603..45b4e1a3892 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc @@ -57,7 +57,7 @@ int Convolution3x3FP16CPUKernel::InitWeightBias() { conv_param_->output_channel_ = output_channel; int iC8 = UP_DIV(input_channel, C8NUM); int oC8 = UP_DIV(output_channel, C8NUM); - // ===========================init weight========================== // + size_t transformed_size = iC8 * C8NUM * oC8 * C8NUM * 36 * sizeof(float16_t); transformed_filter_addr_ = reinterpret_cast(malloc(transformed_size)); if (transformed_filter_addr_ == nullptr) { @@ -72,7 +72,6 @@ int Convolution3x3FP16CPUKernel::InitWeightBias() { } ProcessFilterFp16(execute_weight_, transformed_filter_addr_, conv_param_); - // =============================init bias========================= // size_t new_bias_size = oC8 * C8NUM * sizeof(float16_t); bias_data_ = malloc(new_bias_size); if (bias_data_ == nullptr) { @@ -97,7 +96,7 @@ int Convolution3x3FP16CPUKernel::InitTmpBuffer() { const int k_plane = 36; int oC8 = UP_DIV(conv_param_->output_channel_, C8NUM); MS_ASSERT(ctx_->allocator != nullptr); - /*=============================block_unit_buffer_============================*/ + size_t block_unit_buffer_size = thread_count_ * k_plane * C8NUM * sizeof(float16_t); block_unit_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(block_unit_buffer_size)); if (block_unit_buffer_ == nullptr) { @@ -105,7 +104,6 @@ int Convolution3x3FP16CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_dst_buffer_============================*/ size_t tmp_dst_buffer_size = thread_count_ * tile_num * k_plane * oC8 * C8NUM * sizeof(float16_t); tmp_dst_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_dst_buffer_size)); if (tmp_dst_buffer_ == nullptr) { @@ -113,7 +111,6 @@ int Convolution3x3FP16CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_out_============================*/ int new_out_plane = UP_DIV(conv_param_->output_h_, C4NUM) * UP_DIV(conv_param_->output_w_, C4NUM) * C4NUM * C4NUM; size_t tmp_out_size = oC8 * C8NUM * conv_param_->output_batch_ * new_out_plane * sizeof(float16_t); tmp_out_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_out_size)); @@ -155,7 +152,6 @@ int Convolution3x3FP16CPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (tile_buffer_ != nullptr) { free(tile_buffer_); tile_buffer_ = nullptr; @@ -174,7 +170,6 @@ int Convolution3x3FP16CPUKernel::ReSize() { const int k_plane = 36; int iC8 = UP_DIV(conv_param_->input_channel_, C8NUM); - /*=============================nhwc4_input_============================*/ size_t nhwc8_input_size = iC8 * C8NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float16_t); nhwc4_input_ = malloc(nhwc8_input_size); @@ -184,7 +179,6 @@ int Convolution3x3FP16CPUKernel::ReSize() { } memset(nhwc4_input_, 0, nhwc8_input_size); - /*=============================tile_buffer_============================*/ size_t tile_buffer_size = thread_count_ * tile_num * k_plane * iC8 * C8NUM * sizeof(float16_t); tile_buffer_ = reinterpret_cast(malloc(tile_buffer_size)); if (tile_buffer_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index 43e1d1db581..3cd96fff435 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -96,7 +96,6 @@ int ConvolutionFP16CPUKernel::InitTmpBuffer() { int unit_size = kernel_plane * channel_block * C4NUM; int packed_input_size = output_tile_count * cal_num * unit_size; - /*=============================packed_input_============================*/ packed_input_ = reinterpret_cast(malloc(in_batch * packed_input_size * sizeof(float16_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "malloc packed_input_ failed."; @@ -104,7 +103,6 @@ int ConvolutionFP16CPUKernel::InitTmpBuffer() { } memset(packed_input_, 0, in_batch * packed_input_size * sizeof(float16_t)); - /*=============================nhwc4_input_============================*/ size_t nhwc4_input_size = channel_block * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float16_t); nhwc4_input_ = malloc(nhwc4_input_size); @@ -114,7 +112,6 @@ int ConvolutionFP16CPUKernel::InitTmpBuffer() { } memset(nhwc4_input_, 0, nhwc4_input_size); - /*=============================tmp_output_block_============================*/ tmp_output_block_ = reinterpret_cast(malloc(cal_num * out_channel * sizeof(float16_t))); if (tmp_output_block_ == nullptr) { MS_LOG(ERROR) << "malloc tmp_output_block_ failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc index 1c3f28bee63..f4c756f54f8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.cc @@ -71,7 +71,6 @@ int ConvolutionSWFP16CPUKernel::InitWeightBias() { int kernel_plane = kernel_h * kernel_w; int pack_weight_size = oc4 * ic4 * C4NUM * C4NUM * kernel_plane; - // ========================init weight==================== // packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "malloc packed_weight_ failed."; @@ -84,7 +83,6 @@ int ConvolutionSWFP16CPUKernel::InitWeightBias() { return ret; } - // =======================init bias====================== // bias_data_ = malloc(oc4 * C4NUM * sizeof(float16_t)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "malloc bias_data_ failed."; @@ -107,7 +105,6 @@ int ConvolutionSWFP16CPUKernel::InitTmpBuffer() { int out_channel = conv_param_->output_channel_; int oc4 = UP_DIV(out_channel, C4NUM); - /*=============================tmp_output_block_============================*/ tmp_output_block_ = reinterpret_cast(ctx_->allocator->Malloc( conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * oc4 * C4NUM * sizeof(float16_t))); if (tmp_output_block_ == nullptr) { @@ -148,11 +145,14 @@ int ConvolutionSWFP16CPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; } + if (slidingWindow_param_ != nullptr) { + delete slidingWindow_param_; + slidingWindow_param_ = nullptr; + } ret = ConvolutionBaseCPUKernel::Init(); if (ret != RET_OK) { @@ -160,10 +160,9 @@ int ConvolutionSWFP16CPUKernel::ReSize() { return ret; } - /*=============================nhwc4_input_============================*/ int ic4 = UP_DIV(conv_param_->input_channel_, C4NUM); - size_t nhwc4_input_size = ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * - conv_param_->input_w_ * sizeof(float16_t); + size_t nhwc4_input_size = + ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float16_t); nhwc4_input_ = malloc(nhwc4_input_size); if (nhwc4_input_ == nullptr) { MS_LOG(ERROR) << "malloc nhwc4_input_ failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h index 45133786ded..079c1fab109 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_sw_fp16.h @@ -37,6 +37,10 @@ class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { free(packed_weight_); packed_weight_ = nullptr; } + if (slidingWindow_param_ != nullptr) { + delete slidingWindow_param_; + slidingWindow_param_ = nullptr; + } } int Init() override; @@ -54,10 +58,6 @@ class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { ctx_->allocator->Free(tmp_output_block_); tmp_output_block_ = nullptr; } - if (slidingWindow_param_ != nullptr) { - delete slidingWindow_param_; - slidingWindow_param_ = nullptr; - } } float16_t *packed_weight_ = nullptr; float16_t *tmp_output_block_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc index dd3ece9e355..41324bdd835 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc @@ -35,8 +35,8 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Conv2D; namespace mindspore::kernel { -void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, - ConvParameter *conv_param, int oc_block) { +int WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, + ConvParameter *conv_param, int oc_block) { // original weight format : ohwi auto channel_in = conv_param->input_channel_; auto channel_out = conv_param->output_channel_; @@ -44,7 +44,18 @@ void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_wei // generate matrix_G && matrix_GT auto matrix_g = TransformMatrixGenerator(input_unit, kernel_unit); + if (matrix_g == nullptr) { + MS_LOG(ERROR) << "matrix_g is null."; + delete matrix_g; + return RET_ERROR; + } auto matrix_gt = TransformMatrixGenerator(kernel_unit, input_unit); + if (matrix_gt == nullptr) { + MS_LOG(ERROR) << "matrix_gt is null."; + delete matrix_g; + delete matrix_gt; + return RET_ERROR; + } ChooseMatrixG(matrix_g, matrix_gt); auto matrix_g_data = reinterpret_cast(matrix_g->GetData()); auto matrix_gt_data = reinterpret_cast(matrix_gt->GetData()); @@ -72,7 +83,7 @@ void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_wei free(matrix_gt_data_fp16); delete matrix_g; delete matrix_gt; - return; + return RET_ERROR; } for (int i = 0; i < channel_out; i++) { int out_c_block = i / oc_block; @@ -107,6 +118,7 @@ void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_wei free(matrix_gt_data_fp16); delete matrix_g; delete matrix_gt; + return RET_OK; } int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { @@ -132,7 +144,12 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { MS_LOG(ERROR) << "Get Execute filter failed."; return ret; } - WinogradFilterTransformFp16(execute_weight_, trans_weight_, kernel_unit_, input_unit_, conv_param_, oc_block); + + ret = WinogradFilterTransformFp16(execute_weight_, trans_weight_, kernel_unit_, input_unit_, conv_param_, oc_block); + if (ret != RET_OK) { + MS_LOG(ERROR) << "winograd filter transfrom failed."; + return ret; + } // init bias bias_data_ = malloc(oc_block_num * oc_block * sizeof(float16_t)); @@ -203,7 +220,6 @@ int ConvolutionWinogradFP16CPUKernel::InitTmpBuffer() { int output_w = conv_param_->output_w_; int oc8 = UP_DIV(channel_out, C8NUM); - /*=============================gemm_out_============================*/ gemm_out_ = reinterpret_cast( ctx_->allocator->Malloc(thread_count_ * cal_num * input_unit_ * input_unit_ * oc8 * C8NUM * sizeof(float16_t))); if (gemm_out_ == nullptr) { @@ -211,7 +227,6 @@ int ConvolutionWinogradFP16CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_out_data_============================*/ int out_w_block = UP_DIV(output_w, output_unit_); int out_h_block = UP_DIV(output_h, output_unit_); tmp_out_data_ = reinterpret_cast( @@ -222,7 +237,6 @@ int ConvolutionWinogradFP16CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_data_============================*/ tmp_data_ = reinterpret_cast( ctx_->allocator->Malloc(thread_count_ * C8NUM * input_unit_ * input_unit_ * sizeof(float16_t))); if (tmp_data_ == nullptr) { @@ -279,7 +293,6 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; @@ -302,7 +315,7 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() { int cal_num = 16; int channel_in = conv_param_->input_channel_; int ic8 = UP_DIV(channel_in, C8NUM); - /*=============================nhwc4_input_============================*/ + size_t nhwc8_input_size = ic8 * C8NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float16_t); nhwc4_input_ = malloc(nhwc8_input_size); @@ -312,7 +325,6 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() { } memset(nhwc4_input_, 0, nhwc8_input_size); - /*=============================trans_input_============================*/ size_t tile_buffer_size = thread_count_ * cal_num * input_unit_ * input_unit_ * ic8 * C8NUM * sizeof(float16_t); trans_input_ = reinterpret_cast(malloc(tile_buffer_size)); if (trans_input_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h index a022afcee62..2ea67108eb3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h @@ -84,7 +84,7 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { OutputTransformUnitFp16Func output_trans_func_; TmpBufferAddressFp16 tmp_buffer_address_list_[4]; }; -void WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, +int WinogradFilterTransformFp16(const float16_t *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, ConvParameter *conv_param, int oc_block); } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc index 26f2dda3846..fc9cead6825 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc @@ -54,7 +54,6 @@ int ConvolutionCPUKernel::InitWeightBias() { // #endif int pack_weight_size = oc_block_num * oc_block * ic4 * C4NUM * kernel_plane; - // =====================init weight==========================// auto origin_weight = reinterpret_cast(filter_tensor->Data()); packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); if (packed_weight_ == nullptr) { @@ -64,7 +63,6 @@ int ConvolutionCPUKernel::InitWeightBias() { memset(packed_weight_, 0, pack_weight_size * sizeof(float)); PackWeightFp32(origin_weight, conv_param_, packed_weight_, oc_block, oc_block_num); - // =======================init bias==========================// bias_data_ = reinterpret_cast(malloc(oc_block_num * oc_block * sizeof(float))); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "malloc bias failed."; @@ -84,7 +82,6 @@ int ConvolutionCPUKernel::InitTmpBuffer() { int out_channel = conv_param_->output_channel_; MS_ASSERT(ctx_->allocator != nullptr); - /*=============================tmp_output_block_============================*/ tmp_output_block_ = reinterpret_cast(ctx_->allocator->Malloc(TILE_NUM * out_channel * sizeof(float))); if (tmp_output_block_ == nullptr) { MS_LOG(ERROR) << "malloc tmp output block failed."; @@ -125,7 +122,6 @@ int ConvolutionCPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; @@ -140,7 +136,6 @@ int ConvolutionCPUKernel::ReSize() { return RET_ERROR; } - /*=============================nhwc4_input_============================*/ int ic4 = UP_DIV(conv_param_->input_channel_, C4NUM); size_t nhwc4_input_size = ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float); @@ -151,7 +146,6 @@ int ConvolutionCPUKernel::ReSize() { } memset(nhwc4_input_, 0, nhwc4_input_size); - /*=============================packed_input============================*/ int output_count = conv_param_->output_h_ * conv_param_->output_w_; int output_tile_count = UP_DIV(output_count, TILE_NUM); int unit_size = conv_param_->kernel_h_ * conv_param_->kernel_w_ * ic4 * C4NUM; @@ -192,7 +186,7 @@ int ConvolutionCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; return prepare_ret; } - // ============Init buffer using memory pool allocator=============// + auto ret = InitTmpBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init tmp buffer failed."; @@ -264,8 +258,7 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vectorallocator != nullptr); - /*=============================block_unit_buffer_============================*/ size_t block_unit_buffer_size = thread_count_ * k_plane * C4NUM * sizeof(float); block_unit_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(block_unit_buffer_size)); if (block_unit_buffer_ == nullptr) { @@ -106,7 +105,6 @@ int Convolution3x3CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_dst_buffer_============================*/ size_t tmp_dst_buffer_size = thread_count_ * TILE_NUM * k_plane * oC4 * C4NUM * sizeof(float); tmp_dst_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_dst_buffer_size)); if (tmp_dst_buffer_ == nullptr) { @@ -114,7 +112,6 @@ int Convolution3x3CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================nc4hw4_out_============================*/ size_t nc4hw4_out_size = oC4 * C4NUM * conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * sizeof(float); nc4hw4_out_ = reinterpret_cast(ctx_->allocator->Malloc(nc4hw4_out_size)); @@ -160,7 +157,6 @@ int Convolution3x3CPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; @@ -177,7 +173,6 @@ int Convolution3x3CPUKernel::ReSize() { } int iC4 = UP_DIV(conv_param_->input_channel_, C4NUM); - /*=============================nhwc4_input_============================*/ size_t nhwc4_input_size = iC4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float); nhwc4_input_ = malloc(nhwc4_input_size); @@ -187,7 +182,6 @@ int Convolution3x3CPUKernel::ReSize() { } memset(nhwc4_input_, 0, nhwc4_input_size); - /*=============================tile_buffer_============================*/ size_t tile_buffer_size = thread_count_ * TILE_NUM * C16NUM * iC4 * C4NUM * sizeof(float); tile_buffer_ = reinterpret_cast(malloc(tile_buffer_size)); if (tile_buffer_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc index 7ee96c37934..d9bcba5f67a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc @@ -123,7 +123,11 @@ int ConvolutionDepthwiseCPUKernel::ReSize() { ConvolutionBaseCPUKernel::Init(); // init sliding window param - sliding_ = new SlidingWindowParam; + sliding_ = new (std::nothrow) SlidingWindowParam; + if (sliding_ == nullptr) { + MS_LOG(ERROR) << "new sliding window param failed."; + return RET_ERROR; + } InitSlidingParamConvDw(sliding_, conv_param_, C4NUM); auto ret = InitWeightBias(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc index 82282d4c3c0..1087e3f8837 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.cc @@ -43,7 +43,6 @@ int ConvolutionSWCPUKernel::InitWeightBias() { int oc_block_num = UP_DIV(output_channel, C4NUM); int pack_weight_size = oc_block_num * oc_block * ic4 * C4NUM * kernel_plane; - // ==================================init weight======================================// auto origin_weight = reinterpret_cast(in_tensors_.at(kWeightIndex)->Data()); packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); if (packed_weight_ == nullptr) { @@ -61,7 +60,6 @@ int ConvolutionSWCPUKernel::InitWeightBias() { } } - // ====================================init bias====================================== // bias_data_ = reinterpret_cast(malloc(oc_block_num * oc_block * sizeof(float))); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "malloc bias failed."; @@ -82,7 +80,6 @@ int ConvolutionSWCPUKernel::InitTmpBuffer() { int oc4 = UP_DIV(out_channel, C4NUM); MS_ASSERT(ctx_->allocator != nullptr); - /*=============================tmp_output_block_============================*/ tmp_output_block_ = reinterpret_cast(ctx_->allocator->Malloc( conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * oc4 * C4NUM * sizeof(float))); if (tmp_output_block_ == nullptr) { @@ -119,18 +116,21 @@ int ConvolutionSWCPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; } + if (slidingWindow_param_ != nullptr) { + delete slidingWindow_param_; + slidingWindow_param_ = nullptr; + } ret = ConvolutionBaseCPUKernel::Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "ConvolutionBase init failed."; return RET_ERROR; } - /*=============================nhwc4_input_============================*/ + int ic4 = UP_DIV(conv_param_->input_channel_, C4NUM); size_t nhwc4_input_size = ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h index 199b2d5991b..a63de04b1c2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow.h @@ -37,6 +37,10 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { free(packed_weight_); packed_weight_ = nullptr; } + if (slidingWindow_param_ != nullptr) { + delete slidingWindow_param_; + slidingWindow_param_ = nullptr; + } } int Init() override; @@ -53,10 +57,6 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { ctx_->allocator->Free(tmp_output_block_); tmp_output_block_ = nullptr; } - if (slidingWindow_param_ != nullptr) { - delete slidingWindow_param_; - slidingWindow_param_ = nullptr; - } } float *packed_weight_ = nullptr; float *tmp_output_block_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc index 828d00884b8..c9fbdf6ddc5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc @@ -28,16 +28,27 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Conv2D; namespace mindspore::kernel { -void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, - ConvParameter *conv_param, int oc_block) { - // =============original weight format : ohwi===============// +int WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, + ConvParameter *conv_param, int oc_block) { + // original weight format : ohwi auto channel_in = conv_param->input_channel_; auto channel_out = conv_param->output_channel_; int input_unit_square = input_unit * input_unit; - // =============generate matrix_G && matrix_GT===============// + // generate matrix_G && matrix_GT auto matrix_g = TransformMatrixGenerator(input_unit, kernel_unit); + if (matrix_g == nullptr) { + MS_LOG(ERROR) << "matrix_g is null."; + delete matrix_g; + return RET_ERROR; + } auto matrix_gt = TransformMatrixGenerator(kernel_unit, input_unit); + if (matrix_gt == nullptr) { + MS_LOG(ERROR) << "matrix_gt is null."; + delete matrix_g; + delete matrix_gt; + return RET_ERROR; + } ChooseMatrixG(matrix_g, matrix_gt); auto matrix_g_data = reinterpret_cast(matrix_g->GetData()); auto matrix_gt_data = reinterpret_cast(matrix_gt->GetData()); @@ -59,7 +70,7 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int free(trans_out_data); delete matrix_g; delete matrix_gt; - return; + return RET_ERROR; } for (int i = 0; i < channel_out; i++) { int out_c_block = i / oc_block; @@ -92,6 +103,7 @@ void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int free(trans_out_data); delete matrix_g; delete matrix_gt; + return RET_OK; } int ConvolutionWinogradCPUKernel::InitWeightBias() { @@ -118,7 +130,11 @@ int ConvolutionWinogradCPUKernel::InitWeightBias() { return RET_ERROR; } auto weight_data = reinterpret_cast(filter_tensor->Data()); - WinogradFilterTransform(weight_data, trans_weight_, kernel_unit_, input_unit_, conv_param_, oc_block); + ret = WinogradFilterTransform(weight_data, trans_weight_, kernel_unit_, input_unit_, conv_param_, oc_block); + if (ret != RET_OK) { + MS_LOG(ERROR) << "winograd filter transfrom failed."; + return ret; + } // init bias size_t new_bias_size = oc4 * C4NUM * sizeof(float); @@ -182,7 +198,6 @@ int ConvolutionWinogradCPUKernel::InitTmpBuffer() { int oc4 = UP_DIV(channel_out, C4NUM); MS_ASSERT(ctx_->allocator != nullptr); - /*=============================gemm_out_============================*/ gemm_out_ = reinterpret_cast( ctx_->allocator->Malloc(thread_count_ * TILE_NUM * input_unit_ * input_unit_ * oc4 * C4NUM * sizeof(float))); if (gemm_out_ == nullptr) { @@ -190,7 +205,6 @@ int ConvolutionWinogradCPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_out_data_============================*/ int out_w_block = UP_DIV(output_w, output_unit_); int out_h_block = UP_DIV(output_h, output_unit_); tmp_out_data_ = @@ -201,7 +215,6 @@ int ConvolutionWinogradCPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_data_============================*/ tmp_data_ = reinterpret_cast( ctx_->allocator->Malloc(thread_count_ * C4NUM * input_unit_ * input_unit_ * sizeof(float))); if (tmp_data_ == nullptr) { @@ -263,7 +276,6 @@ int ConvolutionWinogradCPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; @@ -284,7 +296,6 @@ int ConvolutionWinogradCPUKernel::ReSize() { conv_param_->input_unit_ = input_unit_; conv_param_->output_unit_ = output_unit_; - /*=============================nhwc4_input_============================*/ int ic4 = UP_DIV(conv_param_->input_channel_, C4NUM); size_t nhwc4_input_size = ic4 * C4NUM * conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * sizeof(float); @@ -295,7 +306,6 @@ int ConvolutionWinogradCPUKernel::ReSize() { } memset(nhwc4_input_, 0, nhwc4_input_size); - /*=============================trans_input_============================*/ size_t tile_buffer_size = thread_count_ * TILE_NUM * input_unit_ * input_unit_ * ic4 * C4NUM * sizeof(float); trans_input_ = reinterpret_cast(malloc(tile_buffer_size)); if (trans_input_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h index f1989f52921..97b5e149bbf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h @@ -80,7 +80,7 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { TmpBufferAddress tmp_buffer_address_list_[5]; GEMM_FUNC_FP32 gemm_func_ = nullptr; }; -void WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, - ConvParameter *conv_param, int oc_block); +int WinogradFilterTransform(const float *weight_data, Matrix *trans_weight, int kernel_unit, int input_unit, + ConvParameter *conv_param, int oc_block); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_WINOGRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc index f7bde07c1cf..f803f4dba1a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.cc @@ -61,6 +61,8 @@ int SoftmaxCPUKernel::ReSize() { for (int i = axis + 1; i < n_dim; i++) { in_plane_size *= in_shape[i]; } + in_plane_size_ = in_plane_size; + out_plane_size_ = out_plane_size; if (sum_data_ != nullptr) { free(sum_data_); } @@ -69,7 +71,6 @@ int SoftmaxCPUKernel::ReSize() { MS_LOG(ERROR) << "malloc data for softmax fail!"; return RET_ERROR; } - memset(sum_data_, 0, out_plane_size * in_plane_size * sizeof(float)); return RET_OK; } @@ -79,6 +80,7 @@ int SoftmaxCPUKernel::Run() { MS_LOG(ERROR) << "Prepare fail!ret: " << ret; return RET_ERROR; } + memset(sum_data_, 0, in_plane_size_ * out_plane_size_ * sizeof(float)); auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->Data()); auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->Data()); Softmax(input_ptr, output_ptr, sum_data_, softmax_param_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h index 91b4c9d346c..ed7ea27fa00 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax.h @@ -40,6 +40,8 @@ class SoftmaxCPUKernel : public SoftmaxBaseCPUKernel { private: float *sum_data_ = nullptr; + int in_plane_size_; + int out_plane_size_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc index db5a36b5e16..ee885217fea 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc @@ -117,7 +117,6 @@ int Convolution3x3Int8CPUKernel::InitTmpBuffer() { int output_h = conv_param_->output_h_; MS_ASSERT(ctx_->allocator != nullptr); - /*=============================block_unit_buffer_============================*/ size_t block_unit_buffer_size = thread_count_ * 4 * 4 * C8NUM * sizeof(int16_t); block_unit_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(block_unit_buffer_size)); if (block_unit_buffer_ == nullptr) { @@ -125,7 +124,6 @@ int Convolution3x3Int8CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_dst_buffer_============================*/ size_t tmp_dst_buffer_size = thread_count_ * TILE_NUM * 16 * oc4 * C4NUM * sizeof(int32_t); tmp_dst_buffer_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_dst_buffer_size)); if (tmp_dst_buffer_ == nullptr) { @@ -133,7 +131,6 @@ int Convolution3x3Int8CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_out_============================*/ size_t tmp_out_size = oc4 * C4NUM * output_batch * output_w * output_h * sizeof(uint8_t); tmp_out_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_out_size)); if (tmp_out_ == nullptr) { @@ -174,7 +171,6 @@ int Convolution3x3Int8CPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (input_data_ != nullptr) { free(input_data_); input_data_ = nullptr; @@ -190,7 +186,6 @@ int Convolution3x3Int8CPUKernel::ReSize() { return RET_ERROR; } - /*=============================input_data_============================*/ int ic8 = UP_DIV(conv_param_->input_channel_, C8NUM); size_t c8_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * ic8 * C8NUM * sizeof(int16_t); @@ -201,7 +196,6 @@ int Convolution3x3Int8CPUKernel::ReSize() { } memset(input_data_, 0, c8_input_size); - /*=============================tile_buffer_============================*/ size_t tile_buffer_size = thread_count_ * TILE_NUM * C16NUM * ic8 * C8NUM * sizeof(int16_t); tile_buffer_ = reinterpret_cast(malloc(tile_buffer_size)); if (tile_buffer_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index 7da4f720973..fe2bae21980 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -35,22 +35,25 @@ void ConvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { } if (packed_weight_ != nullptr) { - delete packed_weight_; + free(packed_weight_); packed_weight_ = nullptr; } if (packed_input_ != nullptr) { - delete packed_input_; + free(packed_input_); packed_input_ = nullptr; } if (need_align_) { if (packed_output_ != nullptr) { - delete packed_output_; + free(packed_output_); packed_output_ = nullptr; } } } -ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { FreeTmpBuffer(); } +ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { + FreeTmpBuffer(); + FreeQuantParam(); +} int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { // init weight, int8 -> int16 @@ -118,7 +121,11 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() { ConvolutionBaseCPUKernel::Init(); // init sliding window param - sliding = new SlidingWindowParam; + sliding = new (std::nothrow) SlidingWindowParam; + if (sliding == nullptr) { + MS_LOG(ERROR) << "new sliding window param."; + return RET_ERROR; + } InitSlidingParamConvDw(sliding, conv_param_, C4NUM); // init quant param diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc index 7e0f42a58af..54646c08f7d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc @@ -113,25 +113,24 @@ int ConvolutionInt8CPUKernel::InitWeightBias() { } free(weight_sum); - /*=============================input_sum_============================*/ size_t input_sum_size; if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) { input_sum_size = conv_param_->output_channel_ * tile_num_ * thread_count_ * sizeof(int32_t); } else { input_sum_size = tile_num_ * thread_count_ * sizeof(int32_t); } - input_sum_ = reinterpret_cast(ctx_->allocator->Malloc(input_sum_size)); + input_sum_ = reinterpret_cast(malloc(input_sum_size)); if (input_sum_ == nullptr) { MS_LOG(ERROR) << "malloc input_sum_ failed."; return RET_ERROR; } - memset(input_sum_, 0, tile_num_ * thread_count_ * sizeof(int32_t)); + memset(input_sum_, 0, input_sum_size); return RET_OK; } int ConvolutionInt8CPUKernel::InitTmpBuffer() { MS_ASSERT(ctx_->allocator != nullptr); - /*=============================tmp_dst_============================*/ + size_t tmp_dst_size = thread_count_ * tile_num_ * conv_param_->output_channel_ * sizeof(int32_t); tmp_dst_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_dst_size)); if (tmp_dst_ == nullptr) { @@ -139,7 +138,6 @@ int ConvolutionInt8CPUKernel::InitTmpBuffer() { return RET_ERROR; } - /*=============================tmp_out_============================*/ tmp_out_ = reinterpret_cast(ctx_->allocator->Malloc(thread_count_ * tile_num_ * conv_param_->output_channel_)); if (tmp_out_ == nullptr) { @@ -202,7 +200,6 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() { } free(weight_sum); - /*=============================input_sum_============================*/ size_t input_sum_size; if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) { input_sum_size = conv_param_->output_channel_ * tile_num_ * thread_count_ * sizeof(int32_t); @@ -214,13 +211,13 @@ int ConvolutionInt8CPUKernel::InitWeightBiasOpt() { MS_LOG(ERROR) << "malloc input_sum_ failed."; return RET_ERROR; } - memset(input_sum_, 0, tile_num_ * thread_count_ * sizeof(int32_t)); + memset(input_sum_, 0, input_sum_size); return RET_OK; } int ConvolutionInt8CPUKernel::InitTmpBufferOpt() { MS_ASSERT(ctx_->allocator != nullptr); - /*=============================tmp_dst_============================*/ + size_t tmp_dst_size = thread_count_ * tile_num_ * conv_param_->output_channel_ * sizeof(int32_t); tmp_dst_ = reinterpret_cast(ctx_->allocator->Malloc(tmp_dst_size)); if (tmp_dst_ == nullptr) { @@ -228,7 +225,6 @@ int ConvolutionInt8CPUKernel::InitTmpBufferOpt() { return RET_ERROR; } - /*=============================tmp_out_============================*/ tmp_out_ = reinterpret_cast(ctx_->allocator->Malloc(thread_count_ * tile_num_ * conv_param_->output_channel_)); if (tmp_out_ == nullptr) { @@ -287,7 +283,6 @@ int ConvolutionInt8CPUKernel::ReSize() { return ret; } - FreeTmpBuffer(); if (nhwc4_input_ != nullptr) { free(nhwc4_input_); nhwc4_input_ = nullptr; @@ -312,7 +307,6 @@ int ConvolutionInt8CPUKernel::ReSize() { } memset(nhwc4_input_, 0, nhwc4_input_size); - /*=============================packed_input_============================*/ int output_count = conv_param_->output_h_ * conv_param_->output_w_; int output_tile_count = UP_DIV(output_count, tile_num_); int kernel_plane = conv_param_->kernel_h_ * conv_param_->kernel_w_; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc index d1f40743c77..059a790480f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.cc @@ -28,7 +28,10 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; namespace mindspore::kernel { -DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { FreeTmpBuffer(); } +DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { + FreeTmpBuffer(); + FreeQuantParam(); +} void DeconvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { if (sliding != nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc index 541a05f90d6..1b88ce45190 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc @@ -49,7 +49,7 @@ int PoolingInt8CPUKernel::ReSize() { MS_LOG(ERROR) << "PoolingBase Init failed."; return ret; } - SetQuantParam(); + ret = SetQuantParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set pooling quant param failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c index 7971aab0661..5f59786f235 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/conv_int8.c @@ -262,6 +262,12 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight, c int kernel_plane = kernel_h * kernel_w; int unit_size = kernel_plane * ic4 * C4NUM; int packed_input_size = output_tile_count * tile_n * unit_size; + int input_sum_offset; + if (conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL) { + input_sum_offset = tile_n * out_channel; + } else { + input_sum_offset = tile_n; + } for (int b = 0; b < in_batch; b++) { int in_batch_offset = b * ic4 * C4NUM * in_h * in_w; @@ -270,7 +276,7 @@ void ConvInt8(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight, c for (int thread_id = task_id; thread_id < output_tile_count; thread_id += thread_count) { int start_index = thread_id * tile_n; int real_cal_num = (output_count - start_index) < tile_n ? (output_count - start_index) : tile_n; - int32_t *tmp_input_sum = input_sum + task_id * tile_n; + int32_t *tmp_input_sum = input_sum + task_id * input_sum_offset; int8_t *gemm_input = packed_input + thread_id * unit_size * tile_n + gemm_in_batch_offset; // clear tmp buffer before compute memset(gemm_input, (int8_t)input_zp, unit_size * tile_n); @@ -317,6 +323,12 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight int kernel_plane = kernel_h * kernel_w; int unit_size = kernel_plane * ic4 * C4NUM; int packed_input_size = output_tile_count * tile_n * unit_size; + int input_sum_offset; + if (conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL) { + input_sum_offset = tile_n * out_channel; + } else { + input_sum_offset = tile_n; + } for (int b = 0; b < in_batch; b++) { int in_batch_offset = b * ic4 * C4NUM * in_h * in_w; @@ -325,7 +337,7 @@ void ConvInt8Opt(int8_t *input_data, int8_t *packed_input, int8_t *packed_weight for (int thread_id = task_id; thread_id < output_tile_count; thread_id += thread_count) { int start_index = thread_id * tile_n; int real_cal_num = (output_count - start_index) < tile_n ? (output_count - start_index) : tile_n; - int32_t *tmp_input_sum = input_sum + task_id * tile_n; + int32_t *tmp_input_sum = input_sum + task_id * input_sum_offset; int8_t *gemm_input = packed_input + thread_id * unit_size * tile_n + gemm_in_batch_offset; // clear tmp buffer before compute memset(gemm_input, (int8_t)input_zp, unit_size * tile_n);