!19483 [MS][LITE][Develop] optimize common convolution kernel

Merge pull request !19483 from sunsuodong/optimize_kernel_b
This commit is contained in:
i-robot 2021-07-06 12:58:33 +00:00 committed by Gitee
commit ef08a78112
2 changed files with 0 additions and 2 deletions

View File

@ -42,7 +42,6 @@ void ConvFp16(float16_t *input_data, float16_t *packed_input, float16_t *packed_
float16_t *col_major_gemm_input = col_major_input + task_id * deep * tile_n;
size_t packed_input_size = deep * tile_n * sizeof(float16_t);
memset(gemm_input, 0, packed_input_size);
memset(col_major_gemm_input, 0, packed_input_size);
Im2ColPackUnitFp16(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
int out_offset = thread_id * tile_n * out_channel + out_batch_offset;

View File

@ -75,7 +75,6 @@ void ConvFp32(const float *input_data, float *packed_input, const float *packed_
float *col_major_gemm_input = col_major_input + task_id * deep * cal_num;
size_t packed_input_size = deep * cal_num * sizeof(float);
memset(gemm_input, 0, packed_input_size);
memset(col_major_gemm_input, 0, packed_input_size);
Im2ColPackUnitFp32(input_data + in_batch_offset, conv_param, gemm_input, real_cal_num, start_index);
int out_offset = thread_id * cal_num * out_channel + out_batch_offset;