diff --git a/mindspore/lite/internal/CMakeLists.txt b/mindspore/lite/internal/CMakeLists.txt
index bf1d374309e..3f9449d1efb 100644
--- a/mindspore/lite/internal/CMakeLists.txt
+++ b/mindspore/lite/internal/CMakeLists.txt
@@ -10,6 +10,8 @@ file(GLOB KERNEL_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic_self.c
         ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c
         ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/matmul.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/reduce.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c
         ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/fp32/*.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/common/*.cc
         )
diff --git a/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc b/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
new file mode 100644
index 00000000000..a5f2dd10dce
--- /dev/null
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic.cc
@@ -0,0 +1,238 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/src/kernel/fp32/arithmetic.h"
+#include "internal/src/lite_log.h"
+#include "internal/include/errorcode.h"
+#include "internal/include/model.h"
+#include "internal/include/ms_tensor.h"
+#include "internal/include/lite_utils.h"
+#include "src/runtime/allocator.h"
+#include "nnacl/arithmetic_common.h"
+#include "nnacl/fp32/arithmetic.h"
+#include "schema/ops_generated.h"
+
+typedef int (*ArithmeticRun)(float *input0, float *input1, float *output, int element_size);
+typedef int (*ArithmeticOptRun)(float *input0, float *input1, float *output, int element_size,
+                                ArithmeticParameter *param);
+
+int BroadcastRun(float *input0, float *input1, float *output, int dim, int out_count, int break_pos,
+                 ArithmeticRun arithmetic_run, ArithmeticParameter *params) {
+  if (dim > break_pos) {
+    return arithmetic_run(input0, input1, output, out_count);
+  }
+  for (int i = 0; i < params->out_shape_[dim]; ++i) {
+    int pos0_ = params->in_shape0_[dim] == 1 ? 0 : i;
+    int pos1_ = params->in_shape1_[dim] == 1 ? 0 : i;
+    int error_code =
+      BroadcastRun(input0 + pos0_ * params->in_strides0_[dim], input1 + pos1_ * params->in_strides1_[dim],
+                   output + i * params->out_strides_[dim], dim + 1, out_count, break_pos, arithmetic_run, params);
+    if (error_code != RET_OK) {
+      return error_code;
+    }
+  }
+  return RET_OK;
+}
+
+int CalBroadCasting(const TensorPtrVector &in_tensors, int *outside, int *break_pos, ArithmeticParameter *params) {
+  params->broadcasting_ = false;
+  for (int i = 0; i < params->ndim_; i++) {
+    if (params->in_shape0_[i] != params->in_shape1_[i]) {
+      if (params->in_shape0_[i] == 1) {
+        params->out_shape_[i] = params->in_shape1_[i];
+      } else if (params->in_shape1_[i] == 1) {
+        params->out_shape_[i] = params->in_shape0_[i];
+      } else {
+        LITE_ERROR_LOG("shapes of input tensors can not be broadCasted");
+        return RET_INPUT_TENSOR_ERROR;
+      }
+      params->broadcasting_ = true;
+    } else {
+      params->out_shape_[i] = params->in_shape0_[i];
+    }
+  }
+  if (params->broadcasting_) {
+    *outside = 1;
+    for (auto i = params->ndim_ - 1; i >= 0; --i) {
+      if (params->in_shape0_[i] != params->in_shape1_[i]) {
+        *break_pos = i;
+        break;
+      }
+      (*outside) *= params->out_shape_[i];
+    }
+    ComputeStrides(params->in_shape0_, params->in_strides0_, params->ndim_);
+    ComputeStrides(params->in_shape1_, params->in_strides1_, params->ndim_);
+    ComputeStrides(params->out_shape_, params->out_strides_, params->ndim_);
+  }
+  return RET_OK;
+}
+
+int RunArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, ArithmeticRun arithmetic_run,
+                  ArithmeticOptRun arithmetic_opt_run, int outside, int break_pos, ArithmeticParameter *params) {
+  int error_code = RET_OK;
+  int count = out_tensors[0]->ElementsNum();
+  float *input0_data = reinterpret_cast<float *>(in_tensors[0]->data_);
+  float *input1_data1 = reinterpret_cast<float *>(in_tensors[1]->data_);
+  float *output_data = reinterpret_cast<float *>(out_tensors[0]->data_);
+  if (params->broadcasting_) {
+    error_code = BroadcastRun(input0_data, input1_data1, output_data, 0, outside, break_pos, arithmetic_run, params);
+  } else if (arithmetic_opt_run != NULL) {
+    error_code = arithmetic_opt_run(input0_data, input1_data1, output_data, count, params);
+  } else {
+    error_code = arithmetic_run(input0_data, input1_data1, output_data, count);
+  }
+  if (error_code != RET_OK) {
+    return error_code;
+  }
+  return RET_OK;
+}
+
+int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
+  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
+    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    LITE_ERROR_LOG("output tensors num not correct!")
+    return RET_ERROR;
+  }
+  ShapeVector in_shape0 = in_tensors[0]->shape_;
+  ShapeVector in_shape1 = in_tensors[1]->shape_;
+  int ndim0 = in_shape0.size();
+  int ndim1 = in_shape1.size();
+  ArithmeticParameter *arithmeticParameter = (ArithmeticParameter *)param;
+  if (ndim0 < ndim1) {
+    arithmeticParameter->ndim_ = ndim1;
+    int fill_dim_num = ndim1 - ndim0;
+    int j = 0;
+    for (size_t i = 0; i < ndim1; i++) {
+      if (i < fill_dim_num) {
+        arithmeticParameter->in_shape0_[i] = 1;
+      } else {
+        arithmeticParameter->in_shape0_[i] = in_shape0[j++];
+      }
+      arithmeticParameter->in_shape1_[i] = in_shape1[i];
+    }
+  } else if (ndim0 > ndim1) {
+    arithmeticParameter->ndim_ = ndim0;
+    int fill_dim_num = ndim0 - ndim1;
+    int j = 0;
+    for (size_t i = 0; i < ndim0; i++) {
+      if (i < fill_dim_num) {
+        arithmeticParameter->in_shape1_[i] = 1;
+      } else {
+        arithmeticParameter->in_shape1_[i] = in_shape1[j++];
+      }
+      arithmeticParameter->in_shape0_[i] = in_shape0[i];
+    }
+  } else {
+    arithmeticParameter->ndim_ = ndim0;
+    for (size_t i = 0; i < ndim0; i++) {
+      arithmeticParameter->in_shape0_[i] = in_shape0[i];
+      arithmeticParameter->in_shape1_[i] = in_shape1[i];
+    }
+  }
+  ShapeVector out_shape;
+  for (int i = 0; i < arithmeticParameter->ndim_; i++) {
+    if (arithmeticParameter->in_shape0_[i] != arithmeticParameter->in_shape1_[i]) {
+      if (arithmeticParameter->in_shape0_[i] == 1) {
+        out_shape.push_back(arithmeticParameter->in_shape1_[i]);
+      } else if (arithmeticParameter->in_shape1_[i] == 1) {
+        out_shape.push_back(arithmeticParameter->in_shape0_[i]);
+      } else {
+        LITE_ERROR_LOG("shapes of input tensors can not be broadcasted!")
+        return RET_INPUT_TENSOR_ERROR;
+      }
+    } else {
+      out_shape.push_back(arithmeticParameter->in_shape0_[i]);
+    }
+  }
+  out_tensors[0]->shape_ = out_shape;
+  out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
+  out_tensors[0]->format_ = in_tensors[0]->format_;
+  return RET_OK;
+}
+
+int ChooseKernel(const int kernel_type, ArithmeticRun *arithmetic_run, ArithmeticParameter *params) {
+  if (kernel_type == KernelType::Mul) {
+    if (params->activation_type_ == mindspore::schema::ActivationType_RELU) {
+      *arithmetic_run = ElementMulRelu;
+    } else if (params->activation_type_ == mindspore::schema::ActivationType_RELU6) {
+      *arithmetic_run = ElementMulRelu6;
+    } else {
+      *arithmetic_run = ElementMul;
+    }
+  } else {
+    LITE_ERROR_LOG("unsupported operator type");
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int ChooseOptKernel(const int kernel_type, ArithmeticOptRun *arithmetic_opt_run, ArithmeticParameter *params) {
+  if (kernel_type == KernelType::Mul) {
+    if (params->activation_type_ == mindspore::schema::ActivationType_RELU) {
+      *arithmetic_opt_run = ElementOptMulRelu;
+    } else if (params->activation_type_ == mindspore::schema::ActivationType_RELU6) {
+      *arithmetic_opt_run = ElementOptMulRelu6;
+    } else {
+      *arithmetic_opt_run = ElementOptMul;
+    }
+  } else {
+    LITE_INFO_LOG("kernel not have opt version");
+  }
+  return RET_OK;
+}
+
+int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+                 mindspore::lite::Allocator *allocator) {
+  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
+    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
+    LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
+    return RET_ERROR;
+  }
+  if (allocator == NULL) {
+    LITE_ERROR_LOG("allocator is NULL!")
+    return RET_ERROR;
+  }
+  ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_);
+
+  ArithmeticRun arithmetic_run = NULL;
+  int kernel_type = params->op_parameter_.type_;
+  int status = ChooseKernel(kernel_type, &arithmetic_run, params);
+  if (status != RET_OK) {
+    return status;
+  }
+  int outside = 0;
+  int break_pos = 0;
+  // when one of input only has one element
+  params->in_elements_num0_ = in_tensors[0]->ElementsNum();
+  params->in_elements_num1_ = in_tensors[1]->ElementsNum();
+  params->out_elements_num_ = out_tensors[0]->ElementsNum();
+  ArithmeticOptRun arithmetic_opt_run = NULL;
+  if (params->in_elements_num0_ == 1 || params->in_elements_num1_ == 1) {
+    params->broadcasting_ = false;
+    ChooseOptKernel(kernel_type, &arithmetic_opt_run, params);
+  } else {
+    int ret = CalBroadCasting(in_tensors, &outside, &break_pos, params);
+    if (ret != RET_OK) {
+      return ret;
+    }
+  }
+  return RunArithmetic(in_tensors, out_tensors, arithmetic_run, arithmetic_opt_run, outside, break_pos, params);
+}
diff --git a/mindspore/lite/internal/src/kernel/fp32/arithmetic.h b/mindspore/lite/internal/src/kernel/fp32/arithmetic.h
new file mode 100644
index 00000000000..42cad0df659
--- /dev/null
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic.h
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_
+#define INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_
+
+#include "internal/include/model.h"
+#include "internal/include/lite_utils.h"
+#include "src/runtime/allocator.h"
+#include "nnacl/arithmetic_common.h"
+
+int DoArithmeticInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
+
+int DoArithmetic(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+                 mindspore::lite::Allocator *allocator);
+
+#endif  // INTERNAL_SRC_RUNTIME_KERNEL_MUL_H_
diff --git a/mindspore/lite/internal/src/kernel/fp32/bias_add.cc b/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
new file mode 100644
index 00000000000..2cd63f40f3b
--- /dev/null
+++ b/mindspore/lite/internal/src/kernel/fp32/bias_add.cc
@@ -0,0 +1,82 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/src/kernel/fp32/bias_add.h"
+#include "internal/include/model.h"
+#include "internal/include/ms_tensor.h"
+#include "internal/include/lite_utils.h"
+#include "src/runtime/allocator.h"
+#include "internal/src/lite_log.h"
+#include "internal/include/errorcode.h"
+#include "nnacl/arithmetic_common.h"
+#include "nnacl/fp32/arithmetic.h"
+
+int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
+  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
+    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    LITE_ERROR_LOG("output tensors num not correct!")
+    return RET_ERROR;
+  }
+  out_tensors[0]->shape_ = in_tensors[0]->shape_;
+  out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
+  out_tensors[0]->format_ = in_tensors[0]->format_;
+  return RET_OK;
+}
+
+int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+              mindspore::lite::Allocator *allocator) {
+  if (in_tensors.size() != 2 || in_tensors[0]->data_ == NULL || in_tensors[1]->data_ == NULL) {
+    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
+    LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
+    return RET_ERROR;
+  }
+  if (allocator == NULL) {
+    LITE_ERROR_LOG("allocator is NULL!")
+    return RET_ERROR;
+  }
+  ArithmeticParameter *params = reinterpret_cast<ArithmeticParameter *>(node->primitive_);
+
+  ShapeVector dims = in_tensors[0]->shape_;
+  params->ndim_ = dims.size();
+  for (size_t i = 0; i < params->ndim_; i++) {
+    params->in_shape0_[i] = dims[i];
+    params->in_shape1_[i] = 1;
+    params->out_shape_[i] = dims[i];
+  }
+  params->in_shape1_[params->ndim_ - 1] = dims[params->ndim_ - 1];
+
+  float *in = reinterpret_cast<float *>(in_tensors[0]->data_);
+  float *bias = reinterpret_cast<float *>(in_tensors[1]->data_);
+  float *out = reinterpret_cast<float *>(out_tensors[0]->data_);
+  size_t data_size = in_tensors[0]->ElementsNum();
+  float *tile_in = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float)));
+  float *tile_bias = reinterpret_cast<float *>(allocator->Malloc(data_size * sizeof(float)));
+  if (tile_in == NULL || tile_bias == NULL) {
+    LITE_ERROR_LOG("Memory allocation failed!")
+    allocator->Free(tile_in);
+    allocator->Free(tile_bias);
+    return RET_ERROR;
+  }
+  BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, params);
+  allocator->Free(tile_in);
+  allocator->Free(tile_bias);
+  return RET_OK;
+}
diff --git a/mindspore/lite/internal/src/kernel/fp32/bias_add.h b/mindspore/lite/internal/src/kernel/fp32/bias_add.h
new file mode 100644
index 00000000000..c368b2c3887
--- /dev/null
+++ b/mindspore/lite/internal/src/kernel/fp32/bias_add.h
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_
+#define INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_
+
+#include "internal/include/model.h"
+#include "internal/include/lite_utils.h"
+#include "src/runtime/allocator.h"
+
+int DoBiasAddInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
+
+int DoBiasAdd(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+              mindspore::lite::Allocator *allocator);
+
+#endif  // INTERNAL_SRC_RUNTIME_KERNEL_BIAS_H_
diff --git a/mindspore/lite/internal/src/kernel/fp32/reduce.cc b/mindspore/lite/internal/src/kernel/fp32/reduce.cc
new file mode 100644
index 00000000000..77062814dd3
--- /dev/null
+++ b/mindspore/lite/internal/src/kernel/fp32/reduce.cc
@@ -0,0 +1,233 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/src/kernel/fp32/reduce.h"
+#include <vector>
+#include "internal/include/model.h"
+#include "internal/include/lite_utils.h"
+#include "src/runtime/allocator.h"
+#include "internal/src/lite_log.h"
+#include "internal/include/errorcode.h"
+#include "nnacl/reduce_parameter.h"
+#include "nnacl/fp32/reduce.h"
+#include "schema/ops_generated.h"
+
+typedef int (*Reducer)(const int outer_size, const int inner_size, const int axis_size, const float *src_data,
+                       float *dst_data, const int tid, const int thread_num);
+
+int MallocTmpBuffer(std::vector<float *> *data_buffers, const ShapeVector &shape, const int *axes, const int num_axes,
+                    mindspore::lite::Allocator *allocator) {
+  for (int i = 0; i < data_buffers->size(); ++i) {
+    if (data_buffers->at(i) != NULL) {
+      free(data_buffers->at(i));
+      data_buffers->at(i) = NULL;
+    }
+  }
+  data_buffers->clear();
+
+  ShapeVector input_shape = shape;
+  const int rank = input_shape.size();
+  for (auto i = 0; i < num_axes - 1; i++) {
+    int axis = axes[i];
+    size_t size = 1;
+    for (int j = 0; j < rank; j++) {
+      if (axis != j) {
+        size *= input_shape[j];
+      }
+    }
+    float *buffer = reinterpret_cast<float *>(allocator->Malloc(size * sizeof(float)));
+    if (buffer == NULL) {
+      LITE_ERROR_LOG("Memory allocation failed!")
+      return RET_ERROR;
+    }
+    data_buffers->emplace_back(buffer);
+    input_shape[axis] = 1;
+  }
+  return RET_OK;
+}
+
+int FreeTmpBuffer(std::vector<float *> *data_buffers, mindspore::lite::Allocator *allocator) {
+  for (int i = 0; i < data_buffers->size(); ++i) {
+    allocator->Free(data_buffers->at(i));
+  }
+  data_buffers->clear();
+  return RET_OK;
+}
+
+int RunReduce(Reducer reducer, std::vector<float *> data_buffers, float *in_data, float *out_data, Int32Vector axes,
+              ShapeVector shape) {
+  int rank = shape.size();
+  float *dst_data = NULL;
+  float *src_data = in_data;
+  ShapeVector tmp_shape = shape;
+  for (size_t i = 0; i < axes.size(); ++i) {
+    if (i != axes.size() - 1) {
+      dst_data = data_buffers[i];
+    } else {
+      dst_data = out_data;
+    }
+    int axis = axes[i];
+    int outer_size = 1;
+    for (int j = 0; j < axis; j++) {
+      outer_size *= tmp_shape[j];
+    }
+    int inner_size = 1;
+    for (int k = axis + 1; k < rank; k++) {
+      inner_size *= tmp_shape[k];
+    }
+    int axis_size = tmp_shape[axis];
+    int error_code = reducer(outer_size, inner_size, axis_size, src_data, dst_data, 0, 1);
+    if (error_code != RET_OK) {
+      LITE_ERROR_LOG("Reduce run error!")
+      return RET_ERROR;
+    }
+    tmp_shape[axis] = 1;
+    src_data = dst_data;
+  }
+  return RET_OK;
+}
+
+int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
+  if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) {
+    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    LITE_ERROR_LOG("output tensors num not correct!")
+    return RET_ERROR;
+  }
+
+  ReduceParameter *reduceParameter = reinterpret_cast<ReduceParameter *>(param);
+  bool keep_dims = reduceParameter->keep_dims_;
+  int num_axes = reduceParameter->num_axes_;
+  ShapeVector in_shape = in_tensors[0]->shape_;
+  int rank = in_shape.size();
+  Int32Vector out_shape;
+  Int32Vector axes;
+  int actual_axes_num = num_axes;
+  for (int i = 0; i < num_axes; ++i) {
+    if (reduceParameter->axes_[i] < -rank || reduceParameter->axes_[i] >= rank) {
+      LITE_ERROR_LOG("reduce_sum got invalid axis!")
+      return RET_ERROR;
+    }
+    if (reduceParameter->axes_[i] < 0) {
+      axes.push_back(reduceParameter->axes_[i] + rank);
+    } else {
+      axes.push_back(reduceParameter->axes_[i]);
+    }
+  }
+  if (reduceParameter->reduce_to_end_) {
+    if (num_axes != 1) {
+      LITE_ERROR_LOG("Reduce when reduce_to_end, num of axis should be 1!")
+      return RET_ERROR;
+    }
+    int begin_axis = axes[0];
+    num_axes = rank - begin_axis;
+    for (auto i = begin_axis + 1; i < rank; ++i) {
+      axes[actual_axes_num++] = i;
+    }
+  }
+
+  if (num_axes == 0) {
+    axes.resize(rank);
+    for (size_t i = 0; i < rank; i++) {
+      axes[i] = i;
+      if (keep_dims) {
+        out_shape.push_back(1);
+      }
+    }
+    reduceParameter->num_axes_ = axes.size();
+    for (int i = 0; i < axes.size(); ++i) {
+      reduceParameter->axes_[i] = axes[i];
+    }
+    out_tensors[0]->shape_ = out_shape;
+    out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
+    out_tensors[0]->format_ = in_tensors[0]->format_;
+    return RET_OK;
+  }
+  // reduce on selected axes
+  for (size_t i = 0; i < rank; i++) {
+    bool reduce_axis = false;
+    for (size_t idx = 0; idx < num_axes; ++idx) {
+      if (axes[idx] == i) {
+        reduce_axis = true;
+        break;
+      }
+    }
+    if (reduce_axis) {
+      if (keep_dims) {
+        out_shape.push_back(1);
+      }
+    } else {
+      out_shape.push_back(in_shape[i]);
+    }
+  }
+  reduceParameter->num_axes_ = axes.size();
+  for (int i = 0; i < axes.size(); ++i) {
+    reduceParameter->axes_[i] = axes[i];
+  }
+  out_tensors[0]->shape_ = out_shape;
+  out_tensors[0]->data_type_ = in_tensors[0]->data_type_;
+  out_tensors[0]->format_ = in_tensors[0]->format_;
+  return RET_OK;
+}
+
+int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+             mindspore::lite::Allocator *allocator) {
+  if (in_tensors.size() != 1 || in_tensors[0]->data_ == NULL) {
+    LITE_ERROR_LOG("input tensors num not correct or input data is NULL!")
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  if (out_tensors.size() != 1 || out_tensors[0]->data_ == NULL) {
+    LITE_ERROR_LOG("output tensors num not correct or output data is NULL!")
+    return RET_ERROR;
+  }
+  if (allocator == NULL) {
+    LITE_ERROR_LOG("allocator is NULL!")
+    return RET_ERROR;
+  }
+
+  ReduceParameter *params = reinterpret_cast<ReduceParameter *>(node->primitive_);
+  Reducer reducer = NULL;
+  if (params->mode_ == mindspore::schema::ReduceMode::ReduceMode_ReduceSum) {
+    reducer = ReduceSum;
+  } else if (params->mode_ == mindspore::schema::ReduceMode::ReduceMode_ReduceMean) {
+    reducer = ReduceMean;
+  }
+
+  std::vector<float *> data_buffers;
+  int status = MallocTmpBuffer(&data_buffers, in_tensors[0]->shape_, params->axes_, params->num_axes_, allocator);
+  if (status != RET_OK) {
+    FreeTmpBuffer(&data_buffers, allocator);
+    return status;
+  }
+
+  Int32Vector axes;
+  for (int i = 0; i < params->num_axes_; ++i) {
+    axes.push_back(params->axes_[i]);
+  }
+  status = RunReduce(reducer, data_buffers, reinterpret_cast<float *>(in_tensors[0]->data_),
+                     reinterpret_cast<float *>(out_tensors[0]->data_), axes, in_tensors[0]->shape_);
+  if (status != RET_OK) {
+    return status;
+  }
+
+  status = FreeTmpBuffer(&data_buffers, allocator);
+  if (status != RET_OK) {
+    return status;
+  }
+  return RET_OK;
+}
diff --git a/mindspore/lite/internal/src/kernel/fp32/reduce.h b/mindspore/lite/internal/src/kernel/fp32/reduce.h
new file mode 100644
index 00000000000..2372b9fb7e7
--- /dev/null
+++ b/mindspore/lite/internal/src/kernel/fp32/reduce.h
@@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_
+#define INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_
+
+#include "internal/include/model.h"
+#include "internal/include/ms_tensor.h"
+#include "internal/include/lite_utils.h"
+#include "src/runtime/allocator.h"
+
+int DoReduceInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
+
+int DoReduce(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+             mindspore::lite::Allocator *allocator);
+
+#endif  // INTERNAL_SRC_KERNEL_FP32_REDUCE_COMMON_H_
diff --git a/mindspore/lite/test/ut/internal/infer_test.cc b/mindspore/lite/test/ut/internal/infer_test.cc
index c13f1438933..b8a5416bad5 100644
--- a/mindspore/lite/test/ut/internal/infer_test.cc
+++ b/mindspore/lite/test/ut/internal/infer_test.cc
@@ -33,6 +33,7 @@ class InferTest : public mindspore::CommonTest {
 TEST_F(InferTest, TestSession) {
   Model model;
   Node node;
+  node.name_ = String("node");
   model.nodes_.push_back(&node);
 
   node.node_type_ = NodeType::NodeType_CNode;
@@ -64,7 +65,7 @@ TEST_F(InferTest, TestSession) {
   TensorPtrVector outvec = session.GetOutputs();
   ASSERT_EQ(outvec.size(), 1);
   for (int i = 0; i < kOutSize; ++i) {
-    std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_)+ i) << " ";
+    std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_) + i) << " ";
   }
   std::cout << "\n";
   CompareOutputData(reinterpret_cast<float *>(outvec.at(0)->data_), expect_out, kOutSize, 0.000001);