diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index f37a31c6ece..3229323e2f7 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -169,9 +169,7 @@ mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_f mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_4x64_kernel_nhwc_fp32 mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_5x64_kernel_nhwc_fp32 mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_avx512_fp32.c:nnacl_gemm_avx512_6x64_kernel_nhwc_fp32 -<<<<<<< HEAD mindspore/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc:mindspore::kernel::MatmulFp32BaseCPUKernel::Run -======= mindspore/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc:mindspore::parallel::GetWeights mindspore/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_partition.cc:mindspore::parallel::PartitionNode ->>>>>>> Updating the redistribution cost in D-Rec cost model +mindspore/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c:InstanceNormNC8HW8Fp16 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_depthwise_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_depthwise_fp16.c index 71b47bb09d2..20cc7fdb48e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_depthwise_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/conv_depthwise_fp16.c @@ -398,6 +398,8 @@ void ConvDw3x3Fp16(float16_t *output_data, float16_t *buffer, const float16_t *i void ConvDwFp16(float16_t *output_data, const float16_t *input_data, const float16_t *weight_data, const float16_t *bias_data, const ConvParameter *conv_param, int task_id) { NNACL_CHECK_ZERO_RETURN(conv_param->stride_w_); + NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_); + NNACL_CHECK_ZERO_RETURN(conv_param->thread_num_); int h_step = UP_DIV(conv_param->output_h_, conv_param->thread_num_); int h_start = h_step * task_id; int h_end = MSMIN(h_start + h_step, conv_param->output_h_); @@ -484,6 +486,8 @@ void DepthwiseBorderPixelFp16(float16_t *dst, const float16_t *src, const float1 void DepthwiseBorderFp16(float16_t *dst, const float16_t *src, const float16_t *weight, const float16_t *bias, int top, int bottom, int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { + NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_); + NNACL_CHECK_ZERO_RETURN(conv_param->dilation_w_); bool relu = conv_param->act_type_ == ActType_Relu; bool relu6 = conv_param->act_type_ == ActType_Relu6; float16_t *dst_h = dst + top * sliding->out_h_step_; @@ -644,6 +648,8 @@ void DeconvDepthwiseBorderPixelFp16(float16_t *dst, const float16_t *src, const void DeconvDepthwiseBorderFp16(float16_t *dst, const float16_t *src, const float16_t *weight, int top, int bottom, int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { + NNACL_CHECK_ZERO_RETURN(conv_param->dilation_h_); + NNACL_CHECK_ZERO_RETURN(conv_param->dilation_w_); const float16_t *src_h = src + top * sliding->out_h_step_; for (int ih = top; ih < bottom; ih++) { int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c index 7beeac172ca..3e180bbdacf 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/crop_fp16.c @@ -21,6 +21,7 @@ #include "nnacl/crop_parameter.h" void Fp16Crop(const float16_t *input, float16_t *output, int task_id, const CropParameter *para) { + NNACL_CHECK_ZERO_RETURN(para->thread_count_); int input_dim = para->input_dim_; switch (input_dim) { case 1: diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c index 1469850da81..e90a1d2b28c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_fp16.c @@ -43,6 +43,9 @@ int DeConvPostFp16(const float16_t *src, float16_t *tmp, const float16_t *bias, int dst_kh_stride = conv_param->dilation_h_ * conv_param->output_w_ * C8NUM; int dst_kw_stride = conv_param->dilation_w_ * C8NUM; + NNACL_CHECK_ZERO_RETURN_ERR(conv_param->dilation_h_); + NNACL_CHECK_ZERO_RETURN_ERR(conv_param->dilation_w_); + for (int c = 0; c < oc8; c += 8) { float16_t *dst_ptr = tmp + c * output_plane; const float16_t *src_ptr = src + c * in_plane16 * kernel_plane; @@ -88,10 +91,10 @@ int DeConvPostFp16(const float16_t *src, float16_t *tmp, const float16_t *bias, dst_kw_index[i] += src_kw_index[i]; } #endif - } /*kw*/ - } /*kh*/ - } /*iw*/ - } /*ih*/ + } // kw + } // kh + } // iw + } // ih /* add bias for current oh*ow*C8 * write to output data ptr in nhwc format */ @@ -105,6 +108,6 @@ int DeConvPostFp16(const float16_t *src, float16_t *tmp, const float16_t *bias, vst1q_f16(pack_tmp_data, data_v); pack_tmp_data += C8NUM; } - } /*oc8*/ + } // oc8 return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c index ecf4a3a4d67..c48ee1b09c6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/deconv_winograd_fp16.c @@ -344,9 +344,7 @@ int PackDeConvWgDataFp16(const float16_t *nhwc_weight, DeConvComputeUnit *unit, void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *tile_out, int start_index, int calculate_count, const ConvParameter *conv_param, DeConvParam *deconv_param, int task_id) { - if (deconv_param->in_tile_w_count_ == 0) { - return; - } + NNACL_CHECK_ZERO_RETURN(deconv_param->in_tile_w_count_); /* pack tile input */ int tile_in_unit_stride = deconv_param->ic_up_ * DECONV_WINOGRAD_DEFAULT_TILE; float16x4_t zero = vdup_n_f16(0.0f); @@ -411,6 +409,7 @@ void DeconvWgFp16(const float16_t *nhwc_input_, float16_t *tile_in, float16_t *t void DeconvWgPostFp16(const float16_t *tile_out, float16_t *nc4hw4_output, const ConvParameter *conv_param, const DeConvParam *deconv_param, int calculate_count, int tile_index) { + NNACL_CHECK_ZERO_RETURN(deconv_param->in_tile_w_count_); /* merge */ int src_unit_stride = deconv_param->oc_up_ * DECONV_WINOGRAD_DEFAULT_TILE; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c index 81c791407f8..280834ea932 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/instance_norm_fp16.c @@ -22,8 +22,10 @@ int InstanceNormFp16(const float16_t *src_data, float16_t *dst_data, const float const float16_t *beta_data, const InstanceNormParameter *param, size_t task_id) { NNACL_CHECK_NULL_RETURN_ERR(src_data); NNACL_CHECK_NULL_RETURN_ERR(dst_data); + NNACL_CHECK_ZERO_RETURN_ERR(param->op_parameter_.thread_num_); int channel = param->channel_; int hw_plane = param->inner_size_; + NNACL_CHECK_ZERO_RETURN_ERR(hw_plane); int channel_step = UP_DIV(channel, param->op_parameter_.thread_num_); int channel_begin = task_id * channel_step; int channel_end = MSMIN(channel_begin + channel_step, channel); @@ -86,8 +88,10 @@ int InstanceNormNC8HW8Fp16(const float16_t *src_data, float16_t *dst_data, const const float16_t *beta_data, const InstanceNormParameter *param, size_t task_id) { NNACL_CHECK_NULL_RETURN_ERR(src_data); NNACL_CHECK_NULL_RETURN_ERR(dst_data); + NNACL_CHECK_ZERO_RETURN_ERR(param->op_parameter_.thread_num_); int channel = param->channel_; int hw_plane = param->inner_size_; + NNACL_CHECK_ZERO_RETURN_ERR(hw_plane); int channel_step = UP_DIV(UP_DIV(channel, C8NUM), param->op_parameter_.thread_num_) * C8NUM; int channel_begin = (int)(task_id)*channel_step; int channel_end = MSMIN(channel_begin + channel_step, channel); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/layer_norm_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/layer_norm_fp16.c index 66a8b38533b..8ae3fb7b780 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/layer_norm_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/layer_norm_fp16.c @@ -72,6 +72,7 @@ int LayerNormFp16(const float16_t *src_data, const float16_t *gamma_data, const } NNACL_CHECK_ZERO_RETURN_ERR(param->params_inner_size_); NNACL_CHECK_ZERO_RETURN_ERR(param->params_outer_size_); + NNACL_CHECK_ZERO_RETURN_ERR(param->op_parameter_.thread_num_); int step = UP_DIV(param->norm_outer_size_, param->op_parameter_.thread_num_); int thread_end = MSMIN((task_id + 1) * step, param->norm_outer_size_); for (int i = task_id * step; i < thread_end; i++) { diff --git a/mindspore/lite/src/common/log_util.h b/mindspore/lite/src/common/log_util.h index 72eb5d7ae4c..6321a3da847 100644 --- a/mindspore/lite/src/common/log_util.h +++ b/mindspore/lite/src/common/log_util.h @@ -45,6 +45,14 @@ } \ } while (0) +#define CHECK_NULL_RETURN_VOID(ptr) \ + do { \ + if ((ptr) == nullptr) { \ + MS_LOG(ERROR) << #ptr << " must not be null!"; \ + return; \ + } \ + } while (0) + #define CHECK_LESS_RETURN(size1, size2) \ do { \ if ((size1) < (size2)) { \ @@ -55,6 +63,7 @@ #else #define CHECK_NULL_RETURN(ptr) +#define CHECK_NULL_RETURN_VOID(ptr) #define CHECK_LESS_RETURN(size1, size2) #endif #endif // MINDSPORE_LITE_SRC_COMMON_LOG_UTIL_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index 532e21af69d..2bff58563ea 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -294,6 +294,7 @@ int Convolution1x1FP16CPUKernel::Run() { } if (RepackWeight() != RET_OK) { MS_LOG(ERROR) << "Repack weight failed."; + ctx_->allocator->Free(pack_input_); return RET_ERROR; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc index d46b6bd1052..4846164516a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc @@ -30,7 +30,7 @@ void ConvolutionDepthwise3x3Fp16CPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); int channel = weight_tensor->Batch(); void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_; - MS_ASSERT(origin_weight != nullptr); + CHECK_NULL_RETURN_VOID(origin_weight); PackWeightConvDw3x3Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), channel); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index 6b2e5bb0081..b0fa1ef2c67 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -26,7 +26,7 @@ namespace mindspore::kernel { void ConvolutionDepthwiseFp16CPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_; - MS_ASSERT(origin_weight != nullptr); + CHECK_NULL_RETURN_VOID(origin_weight); PackNCHWToNHWCFp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch(), 0, 0); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index 8c9e39a61b9..2066dc66a7d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -57,7 +57,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() { void ConvolutionDepthwiseSWFp16CPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_; - MS_ASSERT(origin_weight != nullptr); + NNACL_CHECK_NULL_RETURN_VOID(origin_weight); PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); } @@ -171,6 +171,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { } if (RepackWeight() != RET_OK) { MS_LOG(ERROR) << "Repack weight failed."; + FreePackedInputOutput(); return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvDwSWFp16Run, this, conv_param_->thread_num_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index 32e4e2e948a..aca4f5a2d7a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -33,7 +33,7 @@ void ConvolutionFP16CPUKernel::PackWeight() { int out_channel = filter_tensor->Batch(); int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); void *weight_origin = (op_parameter_->is_train_session_) ? filter_tensor->data() : origin_weight_; - MS_ASSERT(weight_origin != nullptr); + CHECK_NULL_RETURN_VOID(weight_origin); RowMajor2Col8MajorFp16(weight_origin, reinterpret_cast(packed_weight_), out_channel, in_channel * kernel_plane, false); } @@ -178,6 +178,7 @@ int ConvolutionFP16CPUKernel::Run() { } if (RepackWeight() != RET_OK) { MS_LOG(ERROR) << "Repack weight failed."; + FreeTmpBuffer(); return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvolutionFp16Impl, this, thread_count_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc index 70c75e808d9..91728d2bcc8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc @@ -242,6 +242,7 @@ int ConvolutionWinogradFP16CPUKernel::Run() { } if (RepackWeight() != RET_OK) { MS_LOG(ERROR) << "Repack weight failed."; + FreeTmpBuffer(); return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradFp16Impl, this, thread_count_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 7c3f0e879fd..9edf2ce18c0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -85,6 +85,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() { bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; + free(packed_weight_); + packed_weight_ = nullptr; return RET_ERROR; } memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t)); @@ -95,7 +97,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() { void DeconvolutionDepthwiseFp16CPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_; - MS_ASSERT(origin_weight != nullptr); + NNACL_CHECK_NULL_RETURN_VOID(origin_weight); PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); } @@ -171,6 +173,13 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { return RET_ERROR; } + auto input_tensor = in_tensors_.at(kInputIndex); + auto output_tensor = out_tensors_.at(kOutputIndex); + auto *input_ptr = reinterpret_cast(input_tensor->data()); + auto *output_ptr = reinterpret_cast(output_tensor->data()); + CHECK_NULL_RETURN(input_ptr); + CHECK_NULL_RETURN(output_ptr); + auto ret = InitPackedInputOutput(); if (ret != 0) { MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitPackedInputOutput failed."; @@ -179,16 +188,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { } if (RepackWeight() != RET_OK) { MS_LOG(ERROR) << "Repack weight failed."; + FreePackedInputOutput(); return RET_ERROR; } - auto input_tensor = in_tensors_.at(kInputIndex); - auto output_tensor = out_tensors_.at(kOutputIndex); - auto *input_ptr = reinterpret_cast(input_tensor->data()); - auto *output_ptr = reinterpret_cast(output_tensor->data()); - CHECK_NULL_RETURN(input_ptr); - CHECK_NULL_RETURN(output_ptr); - if (need_align_) { PackNHWCToNHWC8Fp16(input_ptr, packed_input_, conv_param_->input_batch_, conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index 2340b3819f3..032bb4c3dd8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -60,7 +60,7 @@ void DeConvolutionFp16CPUKernel::PackWeight() { auto kernel_h = weight_tensor->Height(); auto kernel_w = weight_tensor->Width(); void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_; - MS_ASSERT(origin_weight != nullptr); + CHECK_NULL_RETURN_VOID(origin_weight); PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), input_channel, kernel_w * kernel_h, output_channel); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.cc index 0367a377ec3..5417fb315cd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/exp_fp16.cc @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_ExpFusion; namespace mindspore::kernel { int ExpFp16CPUKernel::DoExcute(int task_id) { + CHECK_NULL_RETURN(input_addr_); + CHECK_NULL_RETURN(output_addr_); ExpFusionFp16(reinterpret_cast(input_addr_), reinterpret_cast(output_addr_), param_, task_id); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc index 360033f1316..97bdde24e36 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc @@ -29,34 +29,15 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Gather; namespace mindspore::kernel { -namespace { -constexpr int kSecondInput = 2; -} -GatherFp16CPUKernel::~GatherFp16CPUKernel() { - if (input_data_) { - ms_context_->allocator->Free(input_data_); - input_data_ = nullptr; - } -} - int GatherFp16CPUKernel::Prepare() { CHECK_LESS_RETURN(in_tensors_.size(), 3); CHECK_LESS_RETURN(out_tensors_.size(), 1); - auto input_tensor = in_tensors_.at(0); - CHECK_NULL_RETURN(input_tensor); - if (input_tensor->data_type() == kNumberTypeFloat32 && input_tensor->data() != nullptr) { - const_input_ = true; - input_data_ = - reinterpret_cast(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t))); - if (input_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc failed"; - return RET_ERROR; - } - Float32ToFloat16(reinterpret_cast(input_tensor->data()), input_data_, input_tensor->ElementsNum()); - } - CHECK_NULL_RETURN(in_tensors_.at(kSecondInput)->data()); - (reinterpret_cast(op_parameter_))->axis_ = - *(reinterpret_cast(in_tensors_.at(kSecondInput)->data())); + CHECK_NULL_RETURN(in_tensors_[FIRST_INPUT]); + CHECK_NULL_RETURN(in_tensors_[SECOND_INPUT]); + CHECK_NULL_RETURN(in_tensors_[THIRD_INPUT]); + CHECK_NULL_RETURN(out_tensors_[kOutputIndex]); + CHECK_NULL_RETURN(in_tensors_[THIRD_INPUT]->data()); + (reinterpret_cast(op_parameter_))->axis_ = *(static_cast(in_tensors_[THIRD_INPUT]->data())); if (!InferShapeDone()) { return RET_OK; } @@ -89,9 +70,7 @@ int GatherFp16CPUKernel::DoGather(int task_id) { } auto thread_stride = stride * task_id; int8_t *int8_in = nullptr; - if (input_tensor->data_type() == kNumberTypeFloat32) { - int8_in = reinterpret_cast(input_data_); - } else if (input_tensor->data_type() == kNumberTypeFloat16) { + if (input_tensor->data_type() == kNumberTypeFloat16) { int8_in = reinterpret_cast(input_tensor->data()); } else { MS_LOG(ERROR) << "input data type error"; @@ -121,10 +100,6 @@ void GatherFp16CPUKernel::FreeIndicesData() { ms_context_->allocator->Free(indices_data_); indices_data_ = nullptr; } - if (!const_input_ && input_data_) { - ms_context_->allocator->Free(input_data_); - input_data_ = nullptr; - } } int GatherFp16CPUKernel::Run() { @@ -136,20 +111,6 @@ int GatherFp16CPUKernel::Run() { MS_LOG(ERROR) << "AssignIndicesData failed, error_code[" << ret << "]"; return ret; } - if (!const_input_) { - auto input_tensor = in_tensors_.at(0); - CHECK_NULL_RETURN(input_tensor->data()); - if (input_tensor->data_type() == kNumberTypeFloat32) { - input_data_ = - reinterpret_cast(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t))); - if (input_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc data failed"; - FreeIndicesData(); - return RET_ERROR; - } - Float32ToFloat16(reinterpret_cast(input_tensor->data()), input_data_, input_tensor->ElementsNum()); - } - } ret = ParallelLaunch(this->ms_context_, GatherRunFp16, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Gather function error error_code[" << ret << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h index e543bf6ee70..135afd68fc6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h @@ -30,7 +30,7 @@ class GatherFp16CPUKernel : public InnerKernel { GatherFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) : InnerKernel(parameter, inputs, outputs, ctx) {} - ~GatherFp16CPUKernel() override; + ~GatherFp16CPUKernel() = default; int Prepare() override; int ReSize() override; @@ -41,8 +41,6 @@ class GatherFp16CPUKernel : public InnerKernel { int *indices_data_ = nullptr; int AssignIndicesData(bool isIndicesInt32, int indices_num, const lite::Tensor *indices_tensor); void FreeIndicesData(); - float16_t *input_data_ = nullptr; - bool const_input_ = false; bool is_indices_int32_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc index ee5abdd5e95..94f62882cb4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc @@ -146,8 +146,13 @@ int GroupConvolutionFP16CPUKernel::Prepare() { MS_LOG(ERROR) << "GetSingleConv for fp16 group conv failed."; return lite::RET_ERROR; } - group_convs_.emplace_back(new (std::nothrow) ConvolutionDelegateFP16CPUKernel( - reinterpret_cast(new_conv_param), new_inputs, new_outputs, ctx_)); + auto kernel = new (std::nothrow) + ConvolutionDelegateFP16CPUKernel(reinterpret_cast(new_conv_param), new_inputs, new_outputs, ctx_); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Create kernel failed."; + return lite::RET_ERROR; + } + group_convs_.push_back(kernel); } return GroupConvolutionBaseCPUKernel::Prepare(); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc index cbde9ccdf83..acf77ca2c46 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc @@ -21,6 +21,7 @@ #include "nnacl/fp16/gru_fp16.h" #include "nnacl/fp16/cast_fp16.h" #include "nnacl/fp16/lstm_fp16.h" +#include "nnacl/errorcode.h" using mindspore::kernel::KERNEL_ARCH; using mindspore::lite::KernelRegistrar; @@ -68,9 +69,11 @@ int GruFp16CPUKernel::InitParam() { auto weight_g = in_tensors_.at(1); MS_ASSERT(weight_g != nullptr); std::vector w_shape = weight_g->shape(); + NNACL_CHECK_ZERO_RETURN_ERR(gate_num); gru_param_->hidden_size_ = w_shape.at(1) / gate_num; - weight_batch_ = gru_param_->bidirectional_ ? 2 * gate_num : gate_num; - gru_param_->output_step_ = gru_param_->bidirectional_ ? 2 * gru_param_->batch_ * gru_param_->hidden_size_ + constexpr int twice = 2; + weight_batch_ = gru_param_->bidirectional_ ? twice * gate_num : gate_num; + gru_param_->output_step_ = gru_param_->bidirectional_ ? twice * gru_param_->batch_ * gru_param_->hidden_size_ : gru_param_->batch_ * gru_param_->hidden_size_; gru_param_->input_row_align_ = UP_ROUND(gru_param_->seq_len_ * gru_param_->batch_, C16NUM); @@ -189,8 +192,8 @@ int GruFp16CPUKernel::InitStateWeightBias() { } int GruFp16CPUKernel::Prepare() { - CHECK_LESS_RETURN(in_tensors_.size(), 5); - CHECK_LESS_RETURN(out_tensors_.size(), 2); + CHECK_LESS_RETURN(in_tensors_.size(), C5NUM); + CHECK_LESS_RETURN(out_tensors_.size(), C2NUM); if (!InferShapeDone()) { return RET_OK; } @@ -270,7 +273,7 @@ int GruFp16CPUKernel::Run() { CHECK_NULL_RETURN(hidden_state->data()); memcpy(output_hidden_state->data(), hidden_state->data(), hidden_state->ElementsNum() * sizeof(float16_t)); int check_seq_len = gru_param_->seq_len_; - if (in_tensors_.size() == 6) { + if (in_tensors_.size() == C6NUM) { MS_ASSERT(in_tensors_.at(5) != nullptr); int *seq_len = reinterpret_cast(in_tensors_.at(5)->data()); MS_ASSERT(seq_len != nullptr); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc index 626dbb41dc5..eed4fc0cc84 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc @@ -22,6 +22,7 @@ #include "include/errorcode.h" #include "nnacl/fp16/lstm_fp16.h" #include "nnacl/fp16/cast_fp16.h" +#include "nnacl/errorcode.h" using mindspore::kernel::KERNEL_ARCH; using mindspore::lite::KernelRegistrar; @@ -73,9 +74,11 @@ int LstmFp16CPUKernel::InitParam() { auto weight_i = in_tensors_.at(1); std::vector w_shape = weight_i->shape(); + NNACL_CHECK_ZERO_RETURN_ERR(gate_num); lstm_param_->hidden_size_ = w_shape.at(1) / gate_num; - lstm_param_->output_step_ = lstm_param_->bidirectional_ ? 2 * lstm_param_->batch_ * lstm_param_->hidden_size_ + constexpr int twice = 2; + lstm_param_->output_step_ = lstm_param_->bidirectional_ ? twice * lstm_param_->batch_ * lstm_param_->hidden_size_ : lstm_param_->batch_ * lstm_param_->hidden_size_; weight_batch_ = lstm_param_->bidirectional_ ? 2 * gate_num : gate_num; lstm_param_->input_row_align_ = UP_ROUND(lstm_param_->seq_len_ * lstm_param_->batch_, C16NUM); diff --git a/mindspore/lite/src/runtime/kernel/arm/string/hashtable_lookup.cc b/mindspore/lite/src/runtime/kernel/arm/string/hashtable_lookup.cc index 0e67fdfd479..2261151a2b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/string/hashtable_lookup.cc +++ b/mindspore/lite/src/runtime/kernel/arm/string/hashtable_lookup.cc @@ -26,6 +26,8 @@ using mindspore::schema::PrimitiveType_HashtableLookup; namespace mindspore::kernel { int HashtableLookupCPUKernel::Prepare() { + CHECK_LESS_RETURN(in_tensors_.size(), C3NUM); + CHECK_LESS_RETURN(out_tensors_.size(), C2NUM); if (!InferShapeDone()) { return RET_OK; } @@ -39,11 +41,16 @@ static int CmpKeyFunc(const void *lhs, const void *rhs) { } int HashtableLookupCPUKernel::Run() { - auto input_tensor = in_tensors_.at(0); - auto keys_tensor = in_tensors_.at(1); - auto values_tensor = in_tensors_.at(2); - auto output_tensor = out_tensors_.at(0); - auto hits_tensor = out_tensors_.at(1); + auto input_tensor = in_tensors_[FIRST_INPUT]; + auto keys_tensor = in_tensors_[SECOND_INPUT]; + auto values_tensor = in_tensors_[THIRD_INPUT]; + auto output_tensor = out_tensors_[FIRST_INPUT]; + auto hits_tensor = out_tensors_[SECOND_INPUT]; + CHECK_NULL_RETURN(input_tensor); + CHECK_NULL_RETURN(keys_tensor); + CHECK_NULL_RETURN(values_tensor); + CHECK_NULL_RETURN(output_tensor); + CHECK_NULL_RETURN(hits_tensor); int rows = GetStringCount(values_tensor); if (rows < 0) { diff --git a/mindspore/lite/src/runtime/kernel/arm/string/predict.cc b/mindspore/lite/src/runtime/kernel/arm/string/predict.cc index da97f13f46d..ba248ad9b5d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/string/predict.cc +++ b/mindspore/lite/src/runtime/kernel/arm/string/predict.cc @@ -31,6 +31,8 @@ constexpr int LABEL_INDEX = 2; constexpr int WEIGHT_INDEX = 3; } // namespace int PredictCPUKernel::Prepare() { + CHECK_LESS_RETURN(in_tensors_.size(), C4NUM); + CHECK_LESS_RETURN(out_tensors_.size(), C2NUM); if (!InferShapeDone()) { return RET_OK; } @@ -41,16 +43,19 @@ int PredictCPUKernel::ReSize() { return RET_OK; } std::vector PredictCPUKernel::GetLabelInfo() { std::vector label_info_vec; - auto input_tensor = in_tensors_.at(INPUT_INDEX); - auto keys_tensor = in_tensors_.at(KEY_INDEX); - auto labels_tensor = in_tensors_.at(LABEL_INDEX); - auto weights_tensor = in_tensors_.at(WEIGHT_INDEX); + auto input_tensor = in_tensors_[INPUT_INDEX]; + auto keys_tensor = in_tensors_[KEY_INDEX]; + auto labels_tensor = in_tensors_[LABEL_INDEX]; + auto weights_tensor = in_tensors_[WEIGHT_INDEX]; + if (input_tensor == nullptr || keys_tensor == nullptr || labels_tensor == nullptr || weights_tensor == nullptr) { + return label_info_vec; + } - int32_t *input = reinterpret_cast(input_tensor->MutableData()); - int32_t *key_begin = reinterpret_cast(keys_tensor->MutableData()); + int32_t *input = reinterpret_cast(input_tensor->data()); + int32_t *key_begin = reinterpret_cast(keys_tensor->data()); int32_t *key_end = key_begin + keys_tensor->ElementsNum(); - int32_t *labels = reinterpret_cast(labels_tensor->MutableData()); - float *weights = reinterpret_cast(weights_tensor->MutableData()); + int32_t *labels = reinterpret_cast(labels_tensor->data()); + float *weights = reinterpret_cast(weights_tensor->data()); int32_t input_elements_num = input_tensor->ElementsNum(); int32_t items = labels_tensor->shape().at(1); @@ -82,10 +87,12 @@ int PredictCPUKernel::Run() { std::vector label_info_vec = GetLabelInfo(); std::sort(label_info_vec.begin(), label_info_vec.end(), LabelInfoCmp); - auto output_label_tensor = out_tensors_.at(0); - auto output_weight_tensor = out_tensors_.at(1); - auto output_label = reinterpret_cast(output_label_tensor->MutableData()); - auto output_weight = reinterpret_cast(output_weight_tensor->MutableData()); + auto output_label_tensor = out_tensors_[FIRST_INPUT]; + auto output_weight_tensor = out_tensors_[SECOND_INPUT]; + CHECK_NULL_RETURN(output_label_tensor); + CHECK_NULL_RETURN(output_weight_tensor); + auto output_label = reinterpret_cast(output_label_tensor->data()); + auto output_weight = reinterpret_cast(output_weight_tensor->data()); auto param = reinterpret_cast(op_parameter_); for (int i = 0; i < output_label_tensor->ElementsNum(); i++) { if (static_cast(i) >= label_info_vec.size() || label_info_vec[i].weight < param->weight_threshold) {