!11127 [MSLITE] nnacl: concat cast expanddims

From: @ling_qiao_min Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tong
2021-01-12 09:58:53 +08:00 · 2021-01-12 09:58:53 +08:00 · 6899c46ffd
parent b8a3a539bc b10c8b9c05
commit 6899c46ffd
82 changed files with 325 additions and 582 deletions
--- a/mindspore/lite/nnacl/base/cast_base.h
+++ b/mindspore/lite/nnacl/base/cast_base.h
@ -13,66 +13,66 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+#ifndef MINDSPORE_LITE_NNACL_CAST_BASE_H_
+#define MINDSPORE_LITE_NNACL_CAST_BASE_H_

-#include "nnacl/fp32/cast_fp32.h"
-#include "nnacl/fp32/common_func_fp32.h"
+#include "nnacl/op_base.h"
+#include "nnacl/nnacl_common.h"

-void BoolToFloat32(const bool *input, float *output, int number) {
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+inline void BoolToFloat32(const bool *input, float *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (float)input[i];
  }
 }

-void Uint8ToFloat32(const uint8_t *input, float *output, int number) {
+inline void Uint8ToFloat32(const uint8_t *input, float *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (float)input[i];
  }
 }

-void Uint8ToInt8(const uint8_t *input, int8_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (int8_t)(input[i] - 128);
-  }
-}
-
-void Int8ToUint8(const int8_t *input, uint8_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (uint8_t)(input[i] + 128);
-  }
-}
-
-void Int32ToFloat32(const int32_t *input, float *output, int number) {
+inline void Int32ToFloat32(const int32_t *input, float *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (float)input[i];
  }
 }

-void Fp16ToFloat32(const uint16_t *input, float *output, int number) {
+inline void Fp16ToFloat32(const uint16_t *input, float *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = ShortToFloat32(input[i]);
  }
 }

-void Float32ToFp16(const float *input, uint16_t *output, int number) {
+inline void Float32ToFp16(const float *input, uint16_t *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = Float32ToShort(input[i]);
  }
 }

-void Float32ToInt32(const float *input, int32_t *output, int number) {
+inline void Float32ToInt32(const float *input, int32_t *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (int32_t)input[i];
  }
 }

-void Float32ToInt64(const float *input, int64_t *output, int number) {
+inline void Float32ToInt64(const float *input, int64_t *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (int64_t)input[i];
  }
 }

-void Int32ToInt64(const int32_t *input, int64_t *output, int number) {
+inline void Int32ToInt64(const int32_t *input, int64_t *output, int number) {
  for (int i = 0; i < number; ++i) {
    output[i] = (int64_t)input[i];
  }
 }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MINDSPORE_LITE_NNACL_CAST_BASE_H_
--- a/mindspore/lite/nnacl/base/concat_base.c
+++ b/mindspore/lite/nnacl/base/concat_base.c
@ -14,17 +14,16 @@
 * limitations under the License.
 */

-#include "nnacl/fp32/concat_fp32.h"
-#include <string.h>
+#include "nnacl/base/concat_base.h"

-void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
-            void *output, int task_id, int thread_num) {
+void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
+            int task_id, int thread_num, int data_size) {
  int before_axis_size = 1;
  for (int i = 0; i < axis; ++i) {
    before_axis_size *= inputs_output_shape[0][i];
  }
-  // sizeof float/int32
-  int after_axis_size = 4;
+
+  int after_axis_size = data_size;
  for (size_t i = axis + 1; i < shape_size; ++i) {
    after_axis_size *= inputs_output_shape[0][i];
  }
--- a/mindspore/lite/nnacl/fp32/expandDims_fp32.h
+++ b/mindspore/lite/nnacl/fp32/expandDims_fp32.h
@ -14,23 +14,19 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_LITE_NNACL_EXPANDDIMS_H_
-#define MINDSPORE_LITE_NNACL_EXPANDDIMS_H_
+#ifndef MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_
+#define MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_

+#include <string.h>
 #include "nnacl/op_base.h"

-typedef struct ExpandDimsParameter {
-  // Primitive parameter
-  OpParameter op_parameter_;
-  int dim_;
-} ExpandDimsParameter;
-
 #ifdef __cplusplus
 extern "C" {
 #endif
-int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size);
+void Concat(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
+            int task_id, int thread_num, int data_size);
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_EXPANDDIMS_H_
+#endif  // MINDSPORE_LITE_NNACL_FP32_CONCAT_BASE_H_
--- a/mindspore/lite/nnacl/base/expand_dims_base.h
+++ b/mindspore/lite/nnacl/base/expand_dims_base.h
@ -14,18 +14,23 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_LITE_NNACL_FP32_CONCAT_H_
-#define MINDSPORE_LITE_NNACL_FP32_CONCAT_H_
+#ifndef MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_
+#define MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_

 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
-void Concat(const void **input, int input_num, int axis, const int **inputs_output_shape, size_t shape_size,
-            void *output, int task_id, int thread_num);
+
+inline int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) {
+  memcpy(output_ptr, input_ptr, data_size);
+  return NNACL_OK;
+}
+
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_FP32_CONCAT_H_
+#endif  // MINDSPORE_LITE_NNACL_EXPAND_DIMS_BASE_H_
--- a/mindspore/lite/nnacl/base/fill_base.c
+++ b/mindspore/lite/nnacl/base/fill_base.c
@ -14,9 +14,9 @@
 * limitations under the License.
 */

-#include "nnacl/fp32/fill_fp32.h"
+#include "nnacl/base/fill_base.h"

-int Fill(float *output, int size, float data) {
+int FillFp32(float *output, int size, float data) {
  for (int i = 0; i < size; ++i) {
    output[i] = data;
  }
--- a/mindspore/lite/nnacl/base/fill_base.h
+++ b/mindspore/lite/nnacl/base/fill_base.h
@ -13,17 +13,20 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+#ifndef MINDSPORE_LITE_NNACL_FILL_BASE_H_
+#define MINDSPORE_LITE_NNACL_FILL_BASE_H_

-#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_H_
-#define MINDSPORE_LITE_NNACL_RESHAHPE_H_
 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"
+#include "nnacl/fill_parameter.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
-void Reshape(const void *input_ptr, void *output_ptr, size_t data_size);
+int FillFp32(float *output, int size, float data);
+int FillInt32(int *output, int size, int data);
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_RESHAHPE_H_
+#endif  // MINDSPORE_LITE_NNACL_FILL_BASE_H_
--- a/mindspore/lite/nnacl/base/gather_base.c
+++ b/mindspore/lite/nnacl/base/gather_base.c
@ -14,20 +14,9 @@
 * limitations under the License.
 */

-#include "nnacl/fp32/gather_fp32.h"
-#include <string.h>
-#include "nnacl/errorcode.h"
-
-inline int Stride(const int *shape, int rank, int index) {
-  int i, stride = 1;
-  for (i = index + 1; i < rank; ++i) {
-    stride *= shape[i];
-  }
-  return stride;
-}
-
-int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
-           float *output) {
+#include "nnacl/base/gather_base.h"
+int GatherFp32(const float *input, int outer_size, int inner_size, int limit, const int *indices,
+               int indices_element_size, float *output) {
  for (int m = 0; m < outer_size; ++m) {
    const float *inputm = input + inner_size * m * limit;
    float *outputm = output + inner_size * m * indices_element_size;
--- a/mindspore/lite/nnacl/base/gather_base.h
+++ b/mindspore/lite/nnacl/base/gather_base.h
@ -14,20 +14,22 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_LITE_NNACL_GATHER_H_
-#define MINDSPORE_LITE_NNACL_GATHER_H_
+#ifndef MINDSPORE_LITE_NNACL_GATHER_BASE_H_
+#define MINDSPORE_LITE_NNACL_GATHER_BASE_H_

+#include <string.h>
 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
-int Gather(const float *input, int outer_size, int inner_size, int limit, const int *indices, int indices_element_size,
-           float *output);
+int GatherFp32(const float *input, int outer_size, int inner_size, int limit, const int *indices,
+               int indices_element_size, float *output);
 int GatherInt32(const int32_t *input, int outer_size, int inner_size, int limit, const int *indices,
                int indices_element_size, int32_t *output);
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_GATHER_H_
+#endif  // MINDSPORE_LITE_NNACL_GATHER_BASE_H_
--- a/mindspore/lite/nnacl/base/reshape_base.h
+++ b/mindspore/lite/nnacl/base/reshape_base.h
@ -14,18 +14,22 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_
-#define MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_
+#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
+#define MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_

+#include <string.h>
 #include "nnacl/op_base.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
-void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
-                int dtype_len);
+
+inline void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) {
+  memcpy(output_ptr, input_ptr, data_size);
+}
+
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_FP16_CONCAT_FP16_H_
+#endif  // MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
--- a/mindspore/lite/nnacl/base/squeeze_base.h
+++ b/mindspore/lite/nnacl/base/squeeze_base.h
@ -23,7 +23,7 @@
 extern "C" {
 #endif

-inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
+static inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
  if (input_ptr == NULL || output_ptr == NULL) {
    return NNACL_ERR;
  }
--- a/mindspore/lite/nnacl/broadcast_to_parameter.h
+++ b/mindspore/lite/nnacl/broadcast_to_parameter.h
@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_
+#define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+#define BROADCAST_TO_SHAPE_MAX_SIZE 4
+
+typedef struct BroadcastToParameter {
+  OpParameter op_parameter_;
+  int shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
+  size_t shape_size_;
+} BroadcastToParameter;
+
+typedef struct BroadcastShapeInfo {
+  int input_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
+  int input_shape_size_;
+  int output_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
+  int output_shape_size_;
+} BroadcastShapeInfo;
+
+#endif  // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_PARAMETER_H_
--- a/mindspore/lite/nnacl/cast_parameter.h
+++ b/mindspore/lite/nnacl/cast_parameter.h
@ -13,8 +13,15 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+#ifndef MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_
+#define MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_

-#include "nnacl/reshape.h"
-#include <string.h>
+#include "nnacl/op_base.h"

-void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) { memcpy(output_ptr, input_ptr, data_size); }
+typedef struct CastParameter {
+  OpParameter op_parameter_;
+  int dst_type_;
+  int src_type_;
+} CastParameter;
+
+#endif  // MINDSPORE_LITE_NNACL_CAST_PARAMETER_H_
--- a/mindspore/lite/nnacl/fill_parameter.h
+++ b/mindspore/lite/nnacl/fill_parameter.h
@ -13,14 +13,10 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_NNACL_FILL_H_
-#define MINDSPORE_LITE_NNACL_FILL_H_
+#ifndef MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_
+#define MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_

-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
 #include "nnacl/op_base.h"
-#include "nnacl/errorcode.h"

 #define FILL_DIMS_MAX_SIZE 4

@ -31,14 +27,4 @@ typedef struct FillParameter {
  int num_dims_;
 } FillParameter;

-#ifdef __cplusplus
-extern "C" {
-#endif
-int Fill(float *output, int size, float data);
-
-int FillInt32(int *output, int size, int data);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_FILL_H_
+#endif  // MINDSPORE_LITE_NNACL_FILL_PARAMETER_H_
--- a/mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c
+++ b/mindspore/lite/nnacl/fp16/arithmetic_self_fp16.c
@ -13,7 +13,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#include <assert.h>
 #include <math.h>
 #include "nnacl/fp16/arithmetic_self_fp16.h"

--- a/mindspore/lite/nnacl/fp16/cast_fp16.c
+++ b/mindspore/lite/nnacl/fp16/cast_fp16.c
@ -1,54 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "nnacl/fp16/cast_fp16.h"
-
-void BoolToFloat16(const bool *input, float16_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float16_t)input[i];
-  }
-}
-
-void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float16_t)input[i];
-  }
-}
-
-void Float16ToInt32(const float16_t *input, int32_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (int32_t)input[i];
-  }
-}
-
-void Float16ToInt64(const float16_t *input, int64_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (int64_t)input[i];
-  }
-}
-
-#ifndef ENABLE_ARM64
-void Float32ToFloat16(const float *input, float16_t *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float16_t)input[i];
-  }
-}
-
-void Float16ToFloat32(const float16_t *input, float *output, int number) {
-  for (int i = 0; i < number; ++i) {
-    output[i] = (float)input[i];
-  }
-}
-#endif
--- a/mindspore/lite/nnacl/fp16/cast_fp16.h
+++ b/mindspore/lite/nnacl/fp16/cast_fp16.h
@ -18,16 +18,47 @@

 #include <arm_neon.h>
 #include "nnacl/op_base.h"
-#include "nnacl/fp32/cast_fp32.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-void BoolToFloat16(const bool *input, float16_t *output, int number);
-void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number);
-void Float16ToInt32(const float16_t *input, int32_t *output, int number);
-void Float16ToInt64(const float16_t *input, int64_t *output, int number);
-void Float32ToFloat16(const float *input, float16_t *output, int number);
-void Float16ToFloat32(const float16_t *input, float *output, int number);
+
+inline void BoolToFloat16(const bool *input, float16_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float16_t)input[i];
+  }
+}
+
+inline void Uint8ToFloat16(const uint8_t *input, float16_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float16_t)input[i];
+  }
+}
+
+inline void Float16ToInt32(const float16_t *input, int32_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (int32_t)input[i];
+  }
+}
+
+inline void Float16ToInt64(const float16_t *input, int64_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (int64_t)input[i];
+  }
+}
+
+inline void Float32ToFloat16(const float *input, float16_t *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float16_t)input[i];
+  }
+}
+
+inline void Float16ToFloat32(const float16_t *input, float *output, int number) {
+  for (int i = 0; i < number; ++i) {
+    output[i] = (float)input[i];
+  }
+}
+
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp16/concat_fp16.c
+++ b/mindspore/lite/nnacl/fp16/concat_fp16.c
@ -1,44 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp16/concat_fp16.h"
-#include <string.h>
-
-void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output,
-                int dtype_len) {
-  int before_axis_size = 1;
-  for (int i = 0; i < axis; ++i) {
-    before_axis_size *= inputs_output_shape[0][i];
-  }
-  // sizeof float16,int32
-  int after_axis_size = dtype_len;
-  for (size_t i = axis + 1; i < shape_size; ++i) {
-    after_axis_size *= inputs_output_shape[0][i];
-  }
-  int axis_offset = 0;
-  uint8_t *dst_base = (output);
-  size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
-  for (int i = 0; i < input_num; ++i) {
-    uint8_t *src_base = (input[i]);
-    size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
-    for (int j = 0; j < before_axis_size; ++j) {
-      uint8_t *src = src_base + j * input_stride;
-      uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size;
-      memcpy(dst, src, input_stride);
-    }
-    axis_offset += inputs_output_shape[i][axis];
-  }
-}
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.c
@ -1003,12 +1003,6 @@ int ElementMinimumInt(const int *input0, const int *input1, int *output, const i
  return NNACL_OK;
 }

-int BroadcastMaximum(const float *in0, const float *in1, float *tile_in0, float *tile_in1, float *out, int size,
-                     ArithmeticParameter *param) {
-  TileDimensionsFp32(in0, in1, tile_in0, tile_in1, param);
-  return ElementMaximum(tile_in0, tile_in1, out, size);
-}
-
 int ElementMinimum(const float *in0, const float *in1, float *out, int size) {
  int index = 0;
 #ifdef ENABLE_NEON
@ -1027,65 +1021,6 @@ int ElementMinimum(const float *in0, const float *in1, float *out, int size) {

 #undef ACCURACY_DATA

-#ifdef ENABLE_NNACL_INFER_SHAPE
-int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
-                         int *in_datatype, int *out_datatype, OpParameter *param) {
-  *out_format = in_format[0];
-  *out_datatype = in_datatype[0];
-  const ArithmeticParameter *arithmetic_parameter = (const ArithmeticParameter *)param;
-  int ndim0 = dim_size[0];
-  int ndim1 = dim_size[1];
-  int *in_shape0 = in_shape[0];
-  int *in_shape1 = in_shape[1];
-  if (ndim0 < ndim1) {
-    arithmetic_parameter->ndim_ = ndim1;
-    int fill_dim_num = ndim1 - ndim0;
-    int j = 0;
-    for (int i = 0; i < ndim1; ++i) {
-      if (i < fill_dim_num) {
-        arithmetic_parameter->in_shape0_[i] = 1;
-      } else {
-        arithmetic_parameter->in_shape0_[i] = in_shape0[j++];
-      }
-      arithmetic_parameter->in_shape1_[i] = in_shape1[i];
-    }
-  } else if (ndim0 > ndim1) {
-    arithmetic_parameter->ndim_ = ndim0;
-    int fill_dim_num = ndim0 - ndim1;
-    int j = 0;
-    for (int i = 0; i < ndim0; ++i) {
-      if (i < fill_dim_num) {
-        arithmetic_parameter->in_shape1_[i] = 1;
-      } else {
-        arithmetic_parameter->in_shape1_[i] = in_shape1[j++];
-      }
-      arithmetic_parameter->in_shape0_[i] = in_shape0[i];
-    }
-  } else {
-    arithmetic_parameter->ndim_ = ndim0;
-    for (int i = 0; i < ndim0; ++i) {
-      arithmetic_parameter->in_shape0_[i] = in_shape0[i];
-      arithmetic_parameter->in_shape1_[i] = in_shape1[i];
-    }
-  }
-  int j = 0;
-  for (size_t i = 0; i < arithmetic_parameter->ndim_; ++i) {
-    if (arithmetic_parameter->in_shape0_[i] != arithmetic_parameter->in_shape1_[i]) {
-      if (arithmetic_parameter->in_shape0_[i] == 1) {
-        out_shape[j++] = arithmetic_parameter->in_shape1_[i];
-      } else if (arithmetic_parameter->in_shape1_[i] == 1) {
-        out_shape[j++] = arithmetic_parameter->in_shape0_[i];
-      } else {
-        return NNACL_PARAM_INVALID;
-      }
-    } else {
-      out_shape[j++] = arithmetic_parameter->in_shape0_[i];
-    }
-  }
-  return NNACL_OK;
-}
-#endif
-
 void TileOneDimensionFp32(const float *inData, float *outData, int dim, size_t ndim, const int *inShape,
                          const int *inStrides, const int *outStrides, const int *multiple) {
  int srcDimSize = inShape[dim];
--- a/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
+++ b/mindspore/lite/nnacl/fp32/arithmetic_fp32.h
@ -96,8 +96,6 @@ int ElementMaximum(const float *in0, const float *in1, float *out, int size);
 int ElementMinimum(const float *in0, const float *in1, float *out, int size);
 int ElementMaximumInt(const int *in0, const int *in1, int *out, int size);
 int ElementMinimumInt(const int *input0, const int *input1, int *output, const int element_size);
-int BroadcastMaximum(const float *in0, const float *in1, float *tile_input0, float *tile_input1, float *out, int size,
-                     ArithmeticParameter *param);

 /* floor div */
 int ElementFloorDiv(const float *in0, const float *in1, float *out, int size);
@ -113,10 +111,6 @@ int ElementModInt(const int *in0, const int *in1, int *out, int size);
 int ElementOptMod(const float *in0, const float *in1, float *out, int size, const ArithmeticParameter *param);
 int ElementOptModInt(const int *in0, const int *in1, int *out, int size, const ArithmeticParameter *param);

-#ifdef ENABLE_NNACL_INFER_SHAPE
-int ArithmeticInferShape(int **in_shape, size_t *dim_size, int *out_shape, int *in_format, int *out_format,
-                         int *in_datatype, int *out_datatype, OpParameter *param);
-#endif
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c
+++ b/mindspore/lite/nnacl/fp32/arithmetic_self_fp32.c
@ -16,7 +16,6 @@

 #include <string.h>
 #include <math.h>
-#include <assert.h>
 #include "nnacl/fp32/arithmetic_self_fp32.h"

 // abs:
--- a/mindspore/lite/nnacl/fp32/broadcast_to_fp32.h
+++ b/mindspore/lite/nnacl/fp32/broadcast_to_fp32.h
@ -13,28 +13,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_
-#define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_
+#ifndef MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_
+#define MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_

-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
 #include "nnacl/op_base.h"
-
-#define BROADCAST_TO_SHAPE_MAX_SIZE 4
-
-typedef struct BroadcastToParameter {
-  OpParameter op_parameter_;
-  int shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
-  size_t shape_size_;
-} BroadcastToParameter;
-
-typedef struct BroadcastShapeInfo {
-  int input_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
-  int input_shape_size_;
-  int output_shape_[BROADCAST_TO_SHAPE_MAX_SIZE];
-  int output_shape_size_;
-} BroadcastShapeInfo;
+#include "nnacl/broadcast_to_parameter.h"

 #ifdef __cplusplus
 extern "C" {
@ -44,4 +27,4 @@ int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *outpu
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_H_
+#endif  // MINDSPORE_LITE_NNACL_FP32_BROADCAST_TO_FP32_H_
--- a/mindspore/lite/nnacl/fp32/cast_fp32.h
+++ b/mindspore/lite/nnacl/fp32/cast_fp32.h
@ -1,48 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_NNACL_CAST_H_
-#define MINDSPORE_LITE_NNACL_CAST_H_
-
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
-#include "nnacl/op_base.h"
-
-// For cast.
-typedef struct CastParameter {
-  OpParameter op_parameter_;
-  int src_type_;
-  int dst_type_;
-} CastParameter;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-void BoolToFloat32(const bool *input, float *output, int number);
-void Uint8ToFloat32(const uint8_t *input, float *output, int number);
-void Uint8ToInt8(const uint8_t *input, int8_t *output, int number);
-void Int8ToUint8(const int8_t *input, uint8_t *output, int number);
-void Int32ToFloat32(const int32_t *input, float *output, int number);
-void Fp16ToFloat32(const uint16_t *input, float *output, int number);
-void Float32ToFp16(const float *input, uint16_t *output, int number);
-void Float32ToInt32(const float *input, int32_t *output, int number);
-void Float32ToInt64(const float *input, int64_t *output, int number);
-void Int32ToInt64(const int32_t *input, int64_t *output, int number);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_CAST_H_
--- a/mindspore/lite/nnacl/fp32/common_func_fp32.c
+++ b/mindspore/lite/nnacl/fp32/common_func_fp32.c
@ -115,101 +115,3 @@ void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size
  }
 }
 #endif
-
-typedef union float32_bits {
-  unsigned int u;
-  float f;
-} float32_bits;
-
-float ShortToFloat32(uint16_t src_value) {
-  const float32_bits magic = {113 << 23};
-  const unsigned int shifted_exp = 0x7c00 << 13;
-  float32_bits o;
-
-  o.u = (src_value & 0x7fff) << 13;
-  unsigned int exp = shifted_exp & o.u;
-  o.u += (127 - 15) << 23;
-
-  if (exp == shifted_exp) {
-    o.u += (128 - 16) << 23;
-  } else if (exp == 0) {
-    o.u += 1 << 23;
-    o.f -= magic.f;
-  }
-
-  o.u |= (src_value & 0x8000) << 16;
-  return o.f;
-}
-
-static const unsigned int FP32_BIT_SIZE = 32;
-static const unsigned int FP32_EXPONENT_BIAS = 127;
-static const unsigned int FP32_SIGNIFICAND = 23;
-
-static const unsigned int FP32_EXPONENT_MAX = 255;
-
-static const unsigned int FP16_BIT_SIZE = 16;
-static const unsigned int FP16_EXPONENT_BIAS = 15;
-static const unsigned int FP16_SIGNIFICAND = 10;
-
-static const int FP16_EXPONENT_MAX = 30;
-static const int FP16_EXPONENT_MIN = -10;
-
-uint16_t Float32ToShort(float src_value) {
-  float *psrcValue = NULL;
-  psrcValue = &src_value;
-  unsigned int srcValueBit = (unsigned int)(*psrcValue);
-  unsigned int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
-  unsigned int mantissa = srcValueBit & 0x007FFFFF;
-  // exponent
-  int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
-  uint16_t res;
-  if (exp > 0 && exp < FP16_EXPONENT_MAX) {
-    // use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
-    res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
-          ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-  } else if (srcValueBit == 0) {
-    res = 0;
-  } else {
-    if (exp <= 0) {
-      if (exp < FP16_EXPONENT_MIN) {
-        // value is less than min half float point
-        res = 0;
-      } else {
-        // normalized single, magnitude is less than min normal half float point.
-        mantissa = (mantissa | 0x00800000) >> (1 - exp);
-        // round to nearest
-        if ((mantissa & 0x00001000) > 0) {
-          mantissa = mantissa + 0x00002000;
-        }
-        // combine sign & mantissa (exp is zero to get denormalized number)
-        res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-      }
-    } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
-      if (mantissa == 0) {
-        // input float is infinity, return infinity half
-        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
-      } else {
-        // input float is NaN, return half NaN
-        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-      }
-    } else {
-      // exp > 0, normalized single, round to nearest
-      if ((mantissa & 0x00001000) > 0) {
-        mantissa = mantissa + 0x00002000;
-        if ((mantissa & 0x00800000) > 0) {
-          mantissa = 0;
-          exp = exp + 1;
-        }
-      }
-      if (exp > FP16_EXPONENT_MAX) {
-        // exponent overflow - return infinity half
-        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
-      } else {
-        // combine sign, exp and mantissa into normalized half
-        res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
-              (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
-      }
-    }
-  }
-  return res;
-}
--- a/mindspore/lite/nnacl/fp32/common_func_fp32.h
+++ b/mindspore/lite/nnacl/fp32/common_func_fp32.h
@ -33,10 +33,6 @@ void PostConvFuncFp32C4(const float *c4_out_ptr, float *out_ptr, const float *bi
 void WinogradTransLeft(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length);
 void WinogradTransRight(const float *S, const float *B, float *M, size_t w, size_t h, size_t k, size_t length);

-float ShortToFloat32(uint16_t src_value);
-
-uint16_t Float32ToShort(float src_value);
-
 #if defined(ENABLE_ARM) || defined(ENABLE_SSE)
 void ConvDwFp32Center(float *dst, const float *src, const float *weight, const float *bias, size_t height, size_t width,
                      size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
--- a/mindspore/lite/nnacl/fp32/expandDims_fp32.c
+++ b/mindspore/lite/nnacl/fp32/expandDims_fp32.c
@ -13,12 +13,3 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
-#include "nnacl/fp32/expandDims_fp32.h"
-#include <string.h>
-#include "nnacl/errorcode.h"
-
-int ExpandDims(const void *input_ptr, void *output_ptr, size_t data_size) {
-  memcpy(output_ptr, input_ptr, data_size);
-  return NNACL_OK;
-}
--- a/mindspore/lite/nnacl/fp32/range_fp32.c
+++ b/mindspore/lite/nnacl/fp32/range_fp32.c
@ -1,29 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp32/range_fp32.h"
-
-void Range(float *output_ptr, float start, float delta, int nums) {
-  for (int i = 0; i < nums; ++i, start += delta) {
-    output_ptr[i] = start;
-  }
-}
-
-void RangeInt(int *output_ptr, int start, int delta, int nums) {
-  for (int i = 0; i < nums; ++i, start += delta) {
-    output_ptr[i] = start;
-  }
-}
--- a/mindspore/lite/nnacl/fp32/range_fp32.h
+++ b/mindspore/lite/nnacl/fp32/range_fp32.h
@ -31,8 +31,18 @@ typedef struct RangeParameter {
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Range(float *output_ptr, float start, float delta, int nums);
-void RangeInt(int *output_ptr, int start, int delta, int nums);
+inline void Range(float *output_ptr, float start, float delta, int nums) {
+  for (int i = 0; i < nums; ++i, start += delta) {
+    output_ptr[i] = start;
+  }
+}
+
+inline void RangeInt(int *output_ptr, int start, int delta, int nums) {
+  for (int i = 0; i < nums; ++i, start += delta) {
+    output_ptr[i] = start;
+  }
+}
+
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/fp32/rank_fp32.c
+++ b/mindspore/lite/nnacl/fp32/rank_fp32.c
@ -1,19 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp32/rank_fp32.h"
-
-void Rank(float *output, int rank) { output[0] = (float)(rank); }
--- a/mindspore/lite/nnacl/fp32/rank_fp32.h
+++ b/mindspore/lite/nnacl/fp32/rank_fp32.h
@ -21,7 +21,10 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Rank(float *output, int rank);
+inline void Rank(float *output, int rank) {
+  output[0] = (float)(rank);
+  return;
+}
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/nnacl/int8/arithmetic_self_int8.c
+++ b/mindspore/lite/nnacl/int8/arithmetic_self_int8.c
@ -15,7 +15,6 @@
 */

 #include <math.h>
-#include <assert.h>
 #include "nnacl/int8/arithmetic_self_int8.h"
 #ifdef ENABLE_NEON
 #include <arm_neon.h>
--- a/mindspore/lite/nnacl/nnacl_common.c
+++ b/mindspore/lite/nnacl/nnacl_common.c
@ -15,3 +15,88 @@
 */

 #include "nnacl/nnacl_common.h"
+
+typedef union float32_bits {
+  unsigned int u;
+  float f;
+} float32_bits;
+
+float ShortToFloat32(uint16_t src_value) {
+  const float32_bits magic = {113 << 23};
+  const unsigned int shifted_exp = 0x7c00 << 13;
+  float32_bits o;
+
+  o.u = (src_value & 0x7fff) << 13;
+  unsigned int exp = shifted_exp & o.u;
+  o.u += (127 - 15) << 23;
+
+  if (exp == shifted_exp) {
+    o.u += (128 - 16) << 23;
+  } else if (exp == 0) {
+    o.u += 1 << 23;
+    o.f -= magic.f;
+  }
+
+  o.u |= (src_value & 0x8000) << 16;
+  return o.f;
+}
+
+uint16_t Float32ToShort(float src_value) {
+  float *psrcValue = NULL;
+  psrcValue = &src_value;
+  unsigned int srcValueBit = (unsigned int)(*psrcValue);
+  unsigned int sign = srcValueBit >> (FP32_BIT_SIZE - 1);
+  unsigned int mantissa = srcValueBit & 0x007FFFFF;
+  // exponent
+  int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS;
+  uint16_t res;
+  if (exp > 0 && exp < FP16_EXPONENT_MAX) {
+    // use rte rounding mode, round the significand, combine sign, exponent and significand into a short.
+    res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) |
+          ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+  } else if (srcValueBit == 0) {
+    res = 0;
+  } else {
+    if (exp <= 0) {
+      if (exp < FP16_EXPONENT_MIN) {
+        // value is less than min half float point
+        res = 0;
+      } else {
+        // normalized single, magnitude is less than min normal half float point.
+        mantissa = (mantissa | 0x00800000) >> (1 - exp);
+        // round to nearest
+        if ((mantissa & 0x00001000) > 0) {
+          mantissa = mantissa + 0x00002000;
+        }
+        // combine sign & mantissa (exp is zero to get denormalized number)
+        res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+      }
+    } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) {
+      if (mantissa == 0) {
+        // input float is infinity, return infinity half
+        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
+      } else {
+        // input float is NaN, return half NaN
+        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+      }
+    } else {
+      // exp > 0, normalized single, round to nearest
+      if ((mantissa & 0x00001000) > 0) {
+        mantissa = mantissa + 0x00002000;
+        if ((mantissa & 0x00800000) > 0) {
+          mantissa = 0;
+          exp = exp + 1;
+        }
+      }
+      if (exp > FP16_EXPONENT_MAX) {
+        // exponent overflow - return infinity half
+        res = (sign << FP16_EXPONENT_BIAS) | 0x7C00;
+      } else {
+        // combine sign, exp and mantissa into normalized half
+        res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) |
+              (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND));
+      }
+    }
+  }
+  return res;
+}
--- a/mindspore/lite/nnacl/nnacl_common.h
+++ b/mindspore/lite/nnacl/nnacl_common.h
@ -17,6 +17,8 @@
 #ifndef MINDSPORE_LITE_NNACL_NNACL_COMMON_H_
 #define MINDSPORE_LITE_NNACL_NNACL_COMMON_H_

+#include "nnacl/op_base.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@ -29,6 +31,18 @@ inline void ComputeStrides(const int *shape, int *strides, const int ndim) {
  }
 }

+static const unsigned int FP32_BIT_SIZE = 32;
+static const unsigned int FP32_EXPONENT_BIAS = 127;
+static const unsigned int FP32_SIGNIFICAND = 23;
+static const unsigned int FP32_EXPONENT_MAX = 255;
+static const unsigned int FP16_BIT_SIZE = 16;
+static const unsigned int FP16_EXPONENT_BIAS = 15;
+static const unsigned int FP16_SIGNIFICAND = 10;
+static const int FP16_EXPONENT_MAX = 30;
+static const int FP16_EXPONENT_MIN = -10;
+float ShortToFloat32(uint16_t src_value);
+uint16_t Float32ToShort(float src_value);
+
 #ifdef __cplusplus
 }
 #endif
--- a/mindspore/lite/src/ops/assert_op.cc
+++ b/mindspore/lite/src/ops/assert_op.cc
@ -22,7 +22,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-
 int AssertOP::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  if (this->primitive_ == nullptr) {
    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@ -67,6 +66,5 @@ Registry AssertRegistry(schema::PrimitiveType_Assert, AssertCreator);
 #endif

 int AssertOP::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outputs_) { return RET_OK; }
-
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/ops/merge.cc
+++ b/mindspore/lite/src/ops/merge.cc
@ -23,7 +23,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-
 int Merge::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  if (this->primitive_ == nullptr) {
    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@ -99,6 +98,5 @@ int Merge::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> outpu
  }
  return RET_OK;
 }
-
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/ops/mod.cc
+++ b/mindspore/lite/src/ops/mod.cc
@ -23,7 +23,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-
 int Mod::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  if (this->primitive_ == nullptr) {
    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
--- a/mindspore/lite/src/ops/populate/assert_populate.cc
+++ b/mindspore/lite/src/ops/populate/assert_populate.cc
@ -20,7 +20,6 @@

 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateAssertParameter(const mindspore::lite::PrimitiveC *primitive) {
  OpParameter *assert_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
  if (assert_parameter == nullptr) {
--- a/mindspore/lite/src/ops/populate/cast_populate.cc
+++ b/mindspore/lite/src/ops/populate/cast_populate.cc
@ -17,7 +17,7 @@
 #include "src/ops/cast.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/cast_fp32.h"
+#include "nnacl/cast_parameter.h"

 namespace mindspore {
 namespace lite {
@ -29,9 +29,11 @@ OpParameter *PopulateCastParameter(const mindspore::lite::PrimitiveC *primitive)
  }
  memset(cast_param, 0, sizeof(CastParameter));
  cast_param->op_parameter_.type_ = primitive->Type();
+
  auto param = reinterpret_cast<mindspore::lite::Cast *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
  cast_param->src_type_ = param->GetSrcT();
  cast_param->dst_type_ = param->GetDstT();
+
  return reinterpret_cast<OpParameter *>(cast_param);
 }

--- a/mindspore/lite/src/ops/populate/expand_dims_populate.cc
+++ b/mindspore/lite/src/ops/populate/expand_dims_populate.cc
@ -14,24 +14,19 @@
 * limitations under the License.
 */

-#include "src/ops/expand_dims.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/expandDims_fp32.h"

 namespace mindspore {
 namespace lite {

 OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *primitive) {
-  auto param = reinterpret_cast<mindspore::lite::ExpandDims *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
-  ExpandDimsParameter *expand_dims_param = reinterpret_cast<ExpandDimsParameter *>(malloc(sizeof(ExpandDimsParameter)));
+  OpParameter *expand_dims_param = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
  if (expand_dims_param == nullptr) {
    MS_LOG(ERROR) << "malloc ExpandDimsParameter failed.";
    return nullptr;
  }
-  memset(expand_dims_param, 0, sizeof(ExpandDimsParameter));
-  expand_dims_param->op_parameter_.type_ = primitive->Type();
-  expand_dims_param->dim_ = param->GetDim();
+  memset(expand_dims_param, 0, sizeof(OpParameter));
  return reinterpret_cast<OpParameter *>(expand_dims_param);
 }

--- a/mindspore/lite/src/ops/populate/fill_populate.cc
+++ b/mindspore/lite/src/ops/populate/fill_populate.cc
@ -17,7 +17,7 @@
 #include "src/ops/fill.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/fill_fp32.h"
+#include "nnacl/fill_parameter.h"

 namespace mindspore {
 namespace lite {
--- a/mindspore/lite/src/ops/populate/layer_norm_populate.h
+++ b/mindspore/lite/src/ops/populate/layer_norm_populate.h
@ -20,9 +20,7 @@

 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive);
-
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_
--- a/mindspore/lite/src/ops/populate/merge_populate.cc
+++ b/mindspore/lite/src/ops/populate/merge_populate.cc
@ -19,7 +19,6 @@

 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateMergeParameter(const mindspore::lite::PrimitiveC *primitive) {
  OpParameter *merge_parameter = reinterpret_cast<OpParameter *>(malloc(sizeof(OpParameter)));
  if (merge_parameter == nullptr) {
--- a/mindspore/lite/src/ops/populate/strided_slice_populate.cc
+++ b/mindspore/lite/src/ops/populate/strided_slice_populate.cc
@ -15,6 +15,7 @@
 */

 #include "src/ops/populate/strided_slice_populate.h"
+#include <limits>
 #include "src/ops/strided_slice.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
--- a/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
+++ b/mindspore/lite/src/ops/populate/tensorlistsetlitem_populate.cc
@ -36,6 +36,5 @@ OpParameter *PopulateTensorListSetItemParameter(const mindspore::lite::Primitive
 }
 Registry TensorListSetItemParameterRegistry(schema::PrimitiveType_TensorListSetItem,
                                            PopulateTensorListSetItemParameter);
-
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/ops/reciprocal.cc
+++ b/mindspore/lite/src/ops/reciprocal.cc
@ -28,6 +28,5 @@ PrimitiveC *ReciprocalCreator(const schema::Primitive *primitive) {
 }
 Registry ReciprocalRegistry(schema::PrimitiveType_Reciprocal, ReciprocalCreator);
 #endif
-
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/ops/reciprocal.h
+++ b/mindspore/lite/src/ops/reciprocal.h
@ -39,7 +39,6 @@ class Reciprocal : public ArithmeticSelf {
  }
 #endif
 };
-
 }  // namespace lite
 }  // namespace mindspore

--- a/mindspore/lite/src/ops/space_to_batch_nd.cc
+++ b/mindspore/lite/src/ops/space_to_batch_nd.cc
@ -15,6 +15,7 @@
 */

 #include "src/ops/space_to_batch_nd.h"
+#include <limits>
 #include "src/common/common.h"

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/space_to_depth.cc
+++ b/mindspore/lite/src/ops/space_to_depth.cc
@ -15,6 +15,7 @@
 */

 #include "src/ops/space_to_depth.h"
+#include <limits>
 #include "src/common/common.h"

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/ops/tile.cc
+++ b/mindspore/lite/src/ops/tile.cc
@ -15,6 +15,7 @@
 */

 #include "src/ops/tile.h"
+#include <limits>
 #include <algorithm>

 #ifndef PRIMITIVE_WRITEABLE
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@ -14,11 +14,8 @@
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/fp16/cast_fp16.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "nnacl/fp16/cast_fp16.h"
-#include "nnacl/op_base.h"
 #include "src/runtime/runtime_api.h"
 #include "include/errorcode.h"

--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.h
@ -18,6 +18,9 @@

 #include <vector>
 #include "src/lite_kernel.h"
+#include "nnacl/op_base.h"
+#include "nnacl/fp16/cast_fp16.h"
+#include "nnacl/base/cast_base.h"

 namespace mindspore::kernel {
 class CastFp16CPUKernel : public LiteKernel {
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
@ -113,8 +113,9 @@ int ConcatFp16CPUKernel::Run() {
    fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
  }
  int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t);
-  ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_,
-             inputs_output_shape.data(), output_shape.size(), reinterpret_cast<void *>(fp16_output_), dtype_len);
+
+  Concat(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, concat_param_->axis_, inputs_output_shape.data(),
+         output_shape.size(), reinterpret_cast<void *>(fp16_output_), 0, 1, dtype_len);

  if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32 || out_tensors_.at(0)->data_type() == kNumberTypeFloat) {
    Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum());
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.h
@ -20,7 +20,7 @@
 #include <vector>
 #include "include/context.h"
 #include "include/errorcode.h"
-#include "nnacl/fp16/concat_fp16.h"
+#include "nnacl/base/concat_base.h"
 #include "nnacl/concat_parameter.h"
 #include "nnacl/fp16/cast_fp16.h"
 #include "src/lite_kernel.h"
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
@ -15,9 +15,6 @@
 */

 #include "src/runtime/kernel/arm/fp16/reshape_fp16.h"
-#include <vector>
-#include "nnacl/fp16/cast_fp16.h"
-#include "nnacl/reshape.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
@ -18,8 +18,9 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_

 #include <vector>
+#include "nnacl/fp16/cast_fp16.h"
+#include "nnacl/base/reshape_base.h"
 #include "src/lite_kernel.h"
-
 #include "include/context.h"
 #include "src/runtime/kernel/arm/fp32/reshape_fp32.h"

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
@ -14,14 +14,9 @@
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/fp32/cast_fp32.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "src/tensor.h"
-#include "nnacl/fp32/cast_fp32.h"
-#include "nnacl/op_base.h"
 #include "src/runtime/runtime_api.h"
-#include "include/errorcode.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.h
@ -17,7 +17,11 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_

 #include <vector>
+#include "include/errorcode.h"
 #include "src/lite_kernel.h"
+#include "src/tensor.h"
+#include "nnacl/op_base.h"
+#include "nnacl/base/cast_base.h"

 namespace mindspore::kernel {
 class CastCPUKernel : public LiteKernel {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
@ -40,8 +40,8 @@ int ConcatCPUKernel::ReSize() {

 int ConcatCPUKernel::DoConcat(int task_id) {
  auto input_num = in_tensors_.size();
-  std::vector<const void *> inputs_addr(input_num, nullptr);
-  std::vector<const int *> inputs_output_shape(input_num + 1, nullptr);
+  std::vector<void *> inputs_addr(input_num, nullptr);
+  std::vector<int *> inputs_output_shape(input_num + 1, nullptr);

  std::vector<std::vector<int>> shapes;
  for (size_t i = 0; i < input_num; ++i) {
@ -54,7 +54,7 @@ int ConcatCPUKernel::DoConcat(int task_id) {
  auto output_addr = out_tensors_.at(0)->MutableData();

  Concat(inputs_addr.data(), input_num, concat_param_->axis_, inputs_output_shape.data(), output_shape.size(),
-         output_addr, task_id, op_parameter_->thread_num_);
+         output_addr, task_id, op_parameter_->thread_num_, sizeof(float));
  return RET_OK;
 }

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.h
@ -18,7 +18,7 @@

 #include <vector>
 #include "src/lite_kernel.h"
-#include "nnacl/fp32/concat_fp32.h"
+#include "nnacl/base/concat_base.h"
 #include "nnacl/concat_parameter.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
@ -15,10 +15,8 @@
 */

 #include "src/runtime/kernel/arm/fp32/expandDims_fp32.h"
-#include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.h
@ -18,8 +18,9 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_EXPANDDIMS_H_

 #include <vector>
+#include "include/errorcode.h"
 #include "src/lite_kernel.h"
-#include "nnacl/fp32/expandDims_fp32.h"
+#include "nnacl/base/expand_dims_base.h"
 #include "schema/model_generated.h"

 #include "include/context.h"
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
@ -51,7 +51,7 @@ int FillCPUKernel::DoFill(int task_id) {
  auto input_tensor = in_tensors_.at(0);
  int ret = RET_OK;
  if (input_tensor->data_type() == kNumberTypeFloat32 || input_tensor->data_type() == kNumberTypeFloat) {
-    ret = Fill(out_ptr_ + offset, size, src_data_);
+    ret = FillFp32(out_ptr_ + offset, size, src_data_);
  } else if (input_tensor->data_type() == kNumberTypeInt32 || input_tensor->data_type() == kNumberTypeInt) {
    ret = FillInt32(int32_out_ptr_ + offset, size, int32_src_data_);
  } else {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.h
@ -18,9 +18,8 @@

 #include <vector>
 #include "src/lite_kernel.h"
-
 #include "include/context.h"
-#include "nnacl/fp32/fill_fp32.h"
+#include "nnacl/base/fill_base.h"

 using mindspore::lite::InnerContext;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
@ -16,7 +16,7 @@

 #include "src/runtime/kernel/arm/fp32/gatherNd_fp32.h"
 #include <string.h>
-#include <vector>
+#include <limits>
 #include "schema/model_generated.h"
 #include "include/errorcode.h"
 #include "src/kernel_registry.h"
@ -29,7 +29,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_GatherNd;

 namespace mindspore::kernel {
-
 GatherNdCPUKernel::~GatherNdCPUKernel() {
  if (in_offset_ != nullptr) {
    free(in_offset_);
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.h
@ -17,10 +17,10 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHERND_H_

+#include <string.h>
 #include <vector>
 #include "nnacl/fp32/gatherNd_fp32.h"
 #include "src/lite_kernel.h"
-
 #include "include/context.h"
 #include "nnacl/op_base.h"

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
@ -13,14 +13,12 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+
 #include "src/runtime/kernel/arm/fp32/gather_fp32.h"
-#include <vector>
-#include "nnacl/gather_parameter.h"
-#include "nnacl/fp32/gather_fp32.h"
+#include <limits>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/runtime_api.h"
-#include "include/errorcode.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
@ -29,7 +27,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Gather;

 namespace mindspore::kernel {
-
 int GatherCPUKernel::Init() {
  if (!InferShapeDone()) {
    return RET_OK;
@ -76,7 +73,7 @@ int GatherCPUKernel::DoGather(int task_id) {
  } else {
    input_ptr += thread_stride * limit;
    output_ptr += thread_stride * indices_element_size;
-    error_code = Gather(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr);
+    error_code = GatherFp32(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr);
  }
  return error_code;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.h
@ -18,8 +18,10 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GATHER_H_

 #include <vector>
-#include "nnacl/gather_parameter.h"
+#include "include/errorcode.h"
 #include "src/lite_kernel.h"
+#include "nnacl/gather_parameter.h"
+#include "nnacl/base/gather_base.h"

 namespace mindspore::kernel {
 class GatherCPUKernel : public LiteKernel {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
@ -15,8 +15,6 @@
 */

 #include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
-#include <vector>
-#include "nnacl/reshape.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
@ -20,6 +20,7 @@
 #include <vector>
 #include "src/lite_kernel.h"
 #include "include/context.h"
+#include "nnacl/base/reshape_base.h"

 using mindspore::lite::InnerContext;

@ -35,8 +36,6 @@ class ReshapeCPUKernel : public LiteKernel {
  int Init() override;
  int ReSize() override;
  int Run() override;
-
- private:
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
@ -15,6 +15,7 @@
 */

 #include "src/runtime/kernel/arm/fp32/space_to_depth_fp32.h"
+#include <limits>
 #include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
@ -16,7 +16,7 @@

 #include "src/runtime/kernel/arm/int8/gatherNd_int8.h"
 #include <string.h>
-#include <vector>
+#include <limits>
 #include "schema/model_generated.h"
 #include "include/errorcode.h"
 #include "src/kernel_registry.h"
@ -30,7 +30,6 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_GatherNd;

 namespace mindspore::kernel {
-
 GatherNdInt8CPUKernel::~GatherNdInt8CPUKernel() {
  if (in_offset_ != nullptr) {
    free(in_offset_);
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pooling_int8.cc
@ -16,7 +16,6 @@

 #include "src/runtime/kernel/arm/int8/pooling_int8.h"
 #include "nnacl/int8/pooling_int8.h"
-#include "nnacl/fp32/cast_fp32.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
 #include "src/kernel_registry.h"
--- a/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/cast_npu.cc
@ -35,7 +35,7 @@ int CastNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const
    return RET_ERROR;
  }
  op_->set_input_x(*npu_inputs[0]);
-  op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(cast_parameter_->dst_type_)));
+  op_->set_attr_dst_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(outputs[0]->data_type())));
  op_->set_attr_src_dtype(lite::ConverterToNPUDataType(static_cast<TypeId>(inputs[0]->data_type())));
  return RET_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/npu/cast_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/cast_npu.h
@ -19,16 +19,14 @@
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"
-#include "nnacl/fp32/cast_fp32.h"
+
 namespace mindspore::kernel {
 class CastNPUKernel : public NPUKernel {
 public:
  CastNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                const mindspore::lite::PrimitiveC *primitive)
-      : NPUKernel(parameter, inputs, outputs, ctx, primitive) {
-    cast_parameter_ = reinterpret_cast<CastParameter *>(parameter);
-  }
+      : NPUKernel(parameter, inputs, outputs, ctx, primitive) {}
  ~CastNPUKernel() override;

  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
@ -39,7 +37,6 @@ class CastNPUKernel : public NPUKernel {

 private:
  hiai::op::CastT *op_ = nullptr;
-  CastParameter *cast_parameter_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_CAST_NPU_H_
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
@ -20,7 +20,7 @@
 #include <vector>
 #include <string>
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
-#include "nnacl/fp32/cast_fp32.h"
+#include "nnacl/cast_parameter.h"

 namespace mindspore::kernel {

--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_FILL_H_

 #include <vector>
-#include "mindspore/lite/nnacl/fp32/fill_fp32.h"
+#include "mindspore/lite/nnacl/base/fill_base.h"
 #include "mindspore/lite/nnacl/shape.h"
 #include "src/runtime/kernel/opencl/opencl_kernel.h"

--- a/mindspore/lite/src/train/train_model.h
+++ b/mindspore/lite/src/train/train_model.h
@ -20,7 +20,6 @@

 namespace mindspore {
 namespace lite {
-
 /// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model
 struct TrainModel : public lite::LiteModel {
  /// \brief Static method to create a TrainModel object
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_node_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_node_pass.cc
@ -27,7 +27,6 @@

 namespace mindspore {
 namespace lite {
-
 STATUS SubgraphNodePass::GetSubgraphAllTensorIndices(const std::unique_ptr<SubGraphT> &subgraph,
                                                     schema::MetaGraphT *graph, std::set<uint32_t> *tensors_indices) {
  for (auto &node_idx : subgraph->nodeIndices) {
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_tensor_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/subgraph_tensor_pass.cc
@ -26,7 +26,6 @@

 namespace mindspore {
 namespace lite {
-
 bool SubgraphTensorPass::IsUsing(schema::MetaGraphT *graph, const uint32_t &tensor_idx) {
  for (const auto &node : graph->nodes) {
    if (IsContain<uint32_t>(node->inputIndex, tensor_idx)) {
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_pooling_parser.cc
@ -19,7 +19,6 @@

 namespace mindspore {
 namespace lite {
-
 STATUS CaffePoolingParser::ParsePads(const caffe::PoolingParameter &poolingParam, schema::PoolingT *attr) {
  if (poolingParam.has_pad_h() && poolingParam.has_pad_w()) {
    if (poolingParam.has_pad()) {
--- a/mindspore/lite/tools/cropper/cropper_flags.cc
+++ b/mindspore/lite/tools/cropper/cropper_flags.cc
@ -109,7 +109,6 @@ int CropperFlags::Init(int argc, const char **argv) {
  }
  return RET_OK;
 }
-
 }  // namespace cropper
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/tools/cropper/cropper_flags.h
+++ b/mindspore/lite/tools/cropper/cropper_flags.h
@ -24,7 +24,6 @@
 namespace mindspore {
 namespace lite {
 namespace cropper {
-
 class CropperFlags : public virtual mindspore::lite::FlagParser {
 public:
  CropperFlags();
--- a/mindspore/lite/tools/cropper/cropper_utils.cc
+++ b/mindspore/lite/tools/cropper/cropper_utils.cc
@ -47,7 +47,6 @@ int ValidFile(std::ifstream &in_file, const char *file_path) {
  }
  return RET_OK;
 }
-
 }  // namespace cropper
 }  // namespace lite
 }  // namespace mindspore