From 3fa86aa040883ad4310906f0725466a723a8b3f9 Mon Sep 17 00:00:00 2001
From: ling <lingqiaomin@huawei.com>
Date: Tue, 19 Apr 2022 15:07:16 +0800
Subject: [PATCH] [MSLITE] vs build bug

---
 .../experimental/conv_fp32_nchwx_avx512.c     | 19 +++++++++----------
 .../plugin/device/cpu/kernel/nnacl/op_base.h  |  1 -
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c
index a2c91372513..cfe5640d306 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/conv_fp32_nchwx_avx512.c
@@ -21,7 +21,7 @@
 
 static const int UNIT_LEN = 512;  // register length
 
-static const int UNIT_NR = 512 / sizeof(float);
+#define UNIT_NR 128  // (512 / sizeof(float))
 static const int TILE_ROW;
 int conv2d_prepare_fp32_nchwx_avx512(struct KernelBase *self) {
   KConv2d *conv = (KConv2d *)self;
@@ -36,7 +36,7 @@ int conv2d_prepare_fp32_nchwx_avx512(struct KernelBase *self) {
   int kw = weight->shape_[kNCHW_W];
 
   size_t lineLen = cin * kw * kh;
-  conv->packedWeight = malloc(lineLen * UP_ROUND_DIV(cout, UNIT_NR) * sizeof(float));  // allocate packed weight buf
+  conv->packedWeight = malloc(lineLen * UP_DIV(cout, UNIT_NR) * sizeof(float));  // allocate packed weight buf
 
   float *rpos[16] = {0};
   float *data = (float *)weight->data_;
@@ -154,12 +154,10 @@ int conv2d_compute_fp32_nchwx_avx512(struct KernelBase *self) {
 #ifdef VECTORIZE_OPTIMIZE
 // use AVX2 instruction to optimize gemm
 #else
-  int InputRegNr = 16;
-  int WeightRegNr = 16;
-  int OutputRegNr = 16;
-  float intputReg[InputRegNr][UNIT_NR];
-  float weightReg[WeightRegNr][UNIT_NR];
-  float outputReg[OutputRegNr][UNIT_NR];
+
+  float intputReg[C16NUM][UNIT_NR];
+  float weightReg[C16NUM][UNIT_NR];
+  float outputReg[C16NUM][UNIT_NR];
   memset(outputReg, 0, sizeof(outputReg));
   int lpos = 0;
   int rpos = 0;
@@ -192,7 +190,7 @@ int conv2d_compute_fp32_nchwx_avx512(struct KernelBase *self) {
         tilePos += n;
       }
       // flush outputReg to output tensor memory
-      memcpy(out->data_ + outOffset, outputReg, m * k * sizeof(float));
+      memcpy((float *)out->data_ + outOffset, outputReg, m * k * sizeof(float));
       outOffset += m * k * sizeof(float);
       y += m;
     }
@@ -201,6 +199,7 @@ int conv2d_compute_fp32_nchwx_avx512(struct KernelBase *self) {
 #endif
   return 0;
 }
+
 int conv2d_infershape_fp32_nchwx_avx512(struct KernelBase *self) {
   return Conv2dInferShape((const struct TensorC *const *)self->in, self->insize, self->out, self->outsize, self->param);
 }
@@ -220,7 +219,7 @@ int conv2d_resize_fp32_nchwx_avx512(struct KernelBase *self, TensorC *inputs[],
 
   self->inferShape(self);
   out->format_ = Format_NC16HW16;
-  out->shape_[1] = UP_ROUND_DIV(out->shape_[1], C16NUM);
+  out->shape_[1] = UP_DIV(out->shape_[1], C16NUM);
   out->shape_[4] = 16;
   out->shape_size_ = 5;
 
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
index d81cb296d55..78f679d0027 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
@@ -57,7 +57,6 @@
 
 #define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
 #define UP_ROUND(x, y) (((x) + (y) - (1)) / (y) * (y))
-#define UP_ROUND_DIV(x, y) (x % y == 0 ? (x / y) : (x / y) + 1)
 #define DOWN_DIV(x, y) ((x) / (y))
 #define DOWN_ROUND(x, y) ((x) / (y) * (y))