From 3fa86aa040883ad4310906f0725466a723a8b3f9 Mon Sep 17 00:00:00 2001 From: ling Date: Tue, 19 Apr 2022 15:07:16 +0800 Subject: [PATCH] [MSLITE] vs build bug --- .../experimental/conv_fp32_nchwx_avx512.c | 19 +++++++++---------- .../plugin/device/cpu/kernel/nnacl/op_base.h | 1 - 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c index a2c91372513..cfe5640d306 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c @@ -21,7 +21,7 @@ static const int UNIT_LEN = 512; // register length -static const int UNIT_NR = 512 / sizeof(float); +#define UNIT_NR 128 // (512 / sizeof(float)) static const int TILE_ROW; int conv2d_prepare_fp32_nchwx_avx512(struct KernelBase *self) { KConv2d *conv = (KConv2d *)self; @@ -36,7 +36,7 @@ int conv2d_prepare_fp32_nchwx_avx512(struct KernelBase *self) { int kw = weight->shape_[kNCHW_W]; size_t lineLen = cin * kw * kh; - conv->packedWeight = malloc(lineLen * UP_ROUND_DIV(cout, UNIT_NR) * sizeof(float)); // allocate packed weight buf + conv->packedWeight = malloc(lineLen * UP_DIV(cout, UNIT_NR) * sizeof(float)); // allocate packed weight buf float *rpos[16] = {0}; float *data = (float *)weight->data_; @@ -154,12 +154,10 @@ int conv2d_compute_fp32_nchwx_avx512(struct KernelBase *self) { #ifdef VECTORIZE_OPTIMIZE // use AVX2 instruction to optimize gemm #else - int InputRegNr = 16; - int WeightRegNr = 16; - int OutputRegNr = 16; - float intputReg[InputRegNr][UNIT_NR]; - float weightReg[WeightRegNr][UNIT_NR]; - float outputReg[OutputRegNr][UNIT_NR]; + + float intputReg[C16NUM][UNIT_NR]; + float weightReg[C16NUM][UNIT_NR]; + float outputReg[C16NUM][UNIT_NR]; memset(outputReg, 0, sizeof(outputReg)); int lpos = 0; int rpos = 0; @@ -192,7 +190,7 @@ int conv2d_compute_fp32_nchwx_avx512(struct KernelBase *self) { tilePos += n; } // flush outputReg to output tensor memory - memcpy(out->data_ + outOffset, outputReg, m * k * sizeof(float)); + memcpy((float *)out->data_ + outOffset, outputReg, m * k * sizeof(float)); outOffset += m * k * sizeof(float); y += m; } @@ -201,6 +199,7 @@ int conv2d_compute_fp32_nchwx_avx512(struct KernelBase *self) { #endif return 0; } + int conv2d_infershape_fp32_nchwx_avx512(struct KernelBase *self) { return Conv2dInferShape((const struct TensorC *const *)self->in, self->insize, self->out, self->outsize, self->param); } @@ -220,7 +219,7 @@ int conv2d_resize_fp32_nchwx_avx512(struct KernelBase *self, TensorC *inputs[], self->inferShape(self); out->format_ = Format_NC16HW16; - out->shape_[1] = UP_ROUND_DIV(out->shape_[1], C16NUM); + out->shape_[1] = UP_DIV(out->shape_[1], C16NUM); out->shape_[4] = 16; out->shape_size_ = 5; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h index d81cb296d55..78f679d0027 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h @@ -57,7 +57,6 @@ #define UP_DIV(x, y) (((x) + (y) - (1)) / (y)) #define UP_ROUND(x, y) (((x) + (y) - (1)) / (y) * (y)) -#define UP_ROUND_DIV(x, y) (x % y == 0 ? (x / y) : (x / y) + 1) #define DOWN_DIV(x, y) ((x) / (y)) #define DOWN_ROUND(x, y) ((x) / (y) * (y))