!12210 add softmax and reducer coder

From: @zhujingxuan Reviewed-by: Signed-off-by:
2021-02-07 16:34:27 +08:00 · 2021-02-07 16:34:27 +08:00 · d14ac1c964
parent 027152b6ac 68ebf04d05
commit d14ac1c964
9 changed files with 782 additions and 67 deletions
--- a/mindspore/lite/micro/cmake/file_list.cmake
+++ b/mindspore/lite/micro/cmake/file_list.cmake
@ -1,80 +1,84 @@
 #### classify all .h .c .cc files to FILE_SET
 set(CODER_SRC
-  ${MICRO_DIR}/coder/coder.cc
-  ${MICRO_DIR}/coder/coder_context.cc
-  ${MICRO_DIR}/coder/coder_graph.cc
-  ${MICRO_DIR}/coder/debug.cc
-  ${MICRO_DIR}/coder/session_coder.cc
-  )
+        ${MICRO_DIR}/coder/coder.cc
+        ${MICRO_DIR}/coder/coder_context.cc
+        ${MICRO_DIR}/coder/coder_graph.cc
+        ${MICRO_DIR}/coder/debug.cc
+        ${MICRO_DIR}/coder/session_coder.cc
+        )

 set(CODER_ALLOC_SRC
-  ${MICRO_DIR}/coder/allocator/allocator.cc
-  ${MICRO_DIR}/coder/allocator/memory_manager.cc
-  )
+        ${MICRO_DIR}/coder/allocator/allocator.cc
+        ${MICRO_DIR}/coder/allocator/memory_manager.cc
+        )

 set(CODER_GENERATOR_SRC
-  ${MICRO_DIR}/coder/generator/generator.cc
-  ${MICRO_DIR}/coder/generator/inference/inference_generator.cc
-  ${MICRO_DIR}/coder/generator/utils/generator_utils.cc
-  )
+        ${MICRO_DIR}/coder/generator/generator.cc
+        ${MICRO_DIR}/coder/generator/inference/inference_generator.cc
+        ${MICRO_DIR}/coder/generator/utils/generator_utils.cc
+        )

 set(CODER_OPCODERS_SRC
-  ${MICRO_DIR}/coder/opcoders/file_collector.cc
-  ${MICRO_DIR}/coder/opcoders/op_coder.cc
-  ${MICRO_DIR}/coder/opcoders/op_coder_builder.cc
-  ${MICRO_DIR}/coder/opcoders/op_coder_register.cc
-  #### serializer
-  ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
-  ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.cc
-  #### base coder
-  ${MICRO_DIR}/coder/opcoders/base/conv2d_base_coder.cc
-  ${MICRO_DIR}/coder/opcoders/base/dtype_cast_coder.cc
-  ${MICRO_DIR}/coder/opcoders/base/full_connection_base_coder.cc
-  ${MICRO_DIR}/coder/opcoders/base/quant_dtype_cast_coder.cc
-  ${MICRO_DIR}/coder/opcoders/base/reduce_base_coder.cc
-  ${MICRO_DIR}/coder/opcoders/base/softmax_base_coder.cc
-  #### cmsis int8 coder
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc
-  #### nnacl fp32 coder
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/expand_dims_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reshape_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/squeeze_dims_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
-  #### nnacl int8 coder
-  ${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc
-  ${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc
-  )
+        ${MICRO_DIR}/coder/opcoders/file_collector.cc
+        ${MICRO_DIR}/coder/opcoders/op_coder.cc
+        ${MICRO_DIR}/coder/opcoders/op_coder_builder.cc
+        ${MICRO_DIR}/coder/opcoders/op_coder_register.cc
+        #### serializer
+        ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc
+        ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.cc
+        #### base coder
+        ${MICRO_DIR}/coder/opcoders/base/conv2d_base_coder.cc
+        ${MICRO_DIR}/coder/opcoders/base/dtype_cast_coder.cc
+        ${MICRO_DIR}/coder/opcoders/base/full_connection_base_coder.cc
+        ${MICRO_DIR}/coder/opcoders/base/quant_dtype_cast_coder.cc
+        ${MICRO_DIR}/coder/opcoders/base/reduce_base_coder.cc
+        ${MICRO_DIR}/coder/opcoders/base/softmax_base_coder.cc
+        #### cmsis int8 coder
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc
+        #### nnacl fp32 coder
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/expand_dims_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reshape_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/squeeze_dims_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc
+        #### nnacl int8 coder
+        ${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc
+        ${MICRO_DIR}/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
+        )

 set(CODER_UTILS_SRC
-  ${MICRO_DIR}/coder/utils/coder_utils.cc
-  ${MICRO_DIR}/coder/utils/dir_utils.cc
-  ${MICRO_DIR}/coder/utils/print_utils.cc
-  )
+        ${MICRO_DIR}/coder/utils/coder_utils.cc
+        ${MICRO_DIR}/coder/utils/dir_utils.cc
+        ${MICRO_DIR}/coder/utils/print_utils.cc
+        )

 set(PRIMITIVE_OP_SRC
        ${LITE_DIR}/src/ops/batch_norm.cc
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc
@ -0,0 +1,121 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h"
+#include <string>
+#include "micro/coder/log.h"
+#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
+#include "micro/coder/opcoders/file_collector.h"
+
+using mindspore::schema::PrimitiveType_Reduce;
+
+namespace mindspore::lite::micro::nnacl {
+int ReduceFP32Coder::Prepare(CoderContext *const context) {
+  MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "init failed");
+  MS_CHECK_RET_CODE(ReSize(), "resize failed");
+  MS_CHECK_RET_CODE(MallocTmpBuffer(), "malloc buffer failed");
+  return RET_OK;
+}
+
+int ReduceFP32Coder::MallocTmpBuffer() {
+  data_buffers_.clear();
+  for (auto size : buffer_sizes_) {
+    auto *buffer = static_cast<float *>(allocator_->Malloc(kNumberTypeFloat, size * sizeof(float), kWorkspace));
+    MS_CHECK_PTR(buffer);
+    data_buffers_.emplace_back(buffer);
+  }
+  return RET_OK;
+}
+
+int ReduceFP32Coder::ReSize() {
+  if (input_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
+    data_type_ = kDataTypeFloat;
+  } else {
+    data_type_ = kDataTypeInt;
+  }
+  return ReduceBaseCoder::ReSize();
+}
+
+int ReduceFP32Coder::DoCode(CoderContext *const context) {
+  // generate code .h .c
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSum)) {
+    Collect(context, {"runtime/kernel/fp32/reduce_sum.h"}, {"reduce_sum.c"});
+  } else if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
+    Collect(context, {"runtime/kernel/fp32/reduce_mean.h"}, {"reduce_mean.c"});
+  } else {
+    Collect(context, {"runtime/kernel/fp32/reduce.h"}, {"reduce.c"});
+  }
+
+  NNaclFp32Serializer code;
+  // call the op function
+  std::string reduce;
+  std::string int_reduce;
+  switch (mode_) {
+    case static_cast<int>(schema::ReduceMode_ReduceSum): {
+      reduce = "ReduceSum";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceMean): {
+      reduce = "ReduceMean";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceMax): {
+      reduce = "ReduceMax";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceMin): {
+      reduce = "ReduceMin";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceProd): {
+      reduce = "ReduceProd";
+      int_reduce = "IntReduceProd";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceSumSquare): {
+      reduce = "ReduceSumSquare";
+      break;
+    }
+    default:
+      MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_;
+      return RET_ERROR;
+  }
+
+  std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_);
+  std::string dst_addr;
+  for (int i = 0; i < num_axes_; ++i) {
+    if (i != num_axes_ - 1) {
+      dst_addr = allocator_->GetRuntimeAddr(data_buffers_.at(i));
+    } else {
+      dst_addr = allocator_->GetRuntimeAddr(output_tensor_);
+    }
+    outer_size_ = outer_sizes_.at(i);
+    inner_size_ = inner_sizes_.at(i);
+    axis_size_ = axis_sizes_.at(i);
+    if (data_type_ == kDataTypeFloat) {
+      code.CodeFunction(reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_);
+    } else {
+      code.CodeFunction(int_reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_);
+    }
+    src_addr = dst_addr;
+  }
+  context->AppendCode(code.str());
+  return RET_OK;
+}
+
+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Reduce, CPUOpCoderCreator<ReduceFP32Coder>)
+
+}  // namespace mindspore::lite::micro::nnacl
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h
@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_FP32_CODER_H_
+#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_FP32_CODER_H_
+
+#include <string>
+#include <vector>
+#include "micro/coder/opcoders/base/reduce_base_coder.h"
+#include "micro/coder/opcoders/op_coder.h"
+
+namespace mindspore::lite::micro::nnacl {
+class ReduceFP32Coder : public ReduceBaseCoder {
+ public:
+  ReduceFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                  const Model::Node *node, size_t node_index, Target target)
+      : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
+
+  ~ReduceFP32Coder() override = default;
+
+  int Prepare(CoderContext *const context) override;
+
+  int DoCode(CoderContext *const context) override;
+
+ private:
+  int ReSize() override;
+  int MallocTmpBuffer();
+  LiteDataType data_type_{kDataTypeFloat};
+  std::vector<float *> data_buffers_;
+};
+}  // namespace mindspore::lite::micro::nnacl
+#endif  // MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_FP32_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
@ -0,0 +1,63 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h"
+#include <string>
+#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h"
+#include "schema/inner/ops_generated.h"
+#include "micro/coder/opcoders/file_collector.h"
+
+using mindspore::schema::PrimitiveType_SoftMax;
+
+namespace mindspore::lite::micro::nnacl {
+
+int SoftMaxFP32Coder::Prepare(CoderContext *const context) {
+  SoftmaxBaseCoder::Init();
+  // malloc tmp buffer
+  int n_dim = softmax_param_->n_dim_;
+  int32_t axis = softmax_param_->axis_;
+  if (axis == -1) {
+    softmax_param_->axis_ += n_dim;
+    axis = softmax_param_->axis_;
+  }
+  auto in_shape = input_tensor_->shape();
+  int out_plane_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    out_plane_size *= in_shape.at(i);
+  }
+  int in_plane_size = 1;
+  for (int i = axis + 1; i < n_dim; i++) {
+    in_plane_size *= in_shape.at(i);
+  }
+  sum_data_size_ = out_plane_size * in_plane_size * sizeof(float);
+  sum_data_ = static_cast<float *>(allocator_->Malloc(kNumberTypeFloat, sum_data_size_, kWorkspace));
+  return RET_OK;
+}
+
+int SoftMaxFP32Coder::DoCode(CoderContext *const context) {
+  Collect(context, {"nnacl/fp32/softmax.h"}, {"softmax.c"});
+  NNaclFp32Serializer code;
+  code.CodeStruct("softmax_parameter", *softmax_param_);
+  code.CodeFunction("memset", sum_data_, "0", sum_data_size_);
+  code.CodeFunction("Softmax", input_tensor_, output_tensor_, sum_data_, "&softmax_parameter");
+  context->AppendCode(code.str());
+
+  return RET_OK;
+}
+
+REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxFP32Coder>)
+
+}  // namespace mindspore::lite::micro::nnacl
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h
@ -0,0 +1,41 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_
+#define MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_
+
+#include <vector>
+#include "micro/coder/opcoders/base/softmax_base_coder.h"
+namespace mindspore::lite::micro::nnacl {
+
+class SoftMaxFP32Coder final : public SoftmaxBaseCoder {
+ public:
+  SoftMaxFP32Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                   const Model::Node *node, size_t node_index, Target target)
+      : SoftmaxBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
+
+  int Prepare(CoderContext *const context) override;
+
+  int DoCode(CoderContext *const context) override;
+
+  ~SoftMaxFP32Coder() override = default;
+
+ private:
+  float *sum_data_{nullptr};
+  size_t sum_data_size_{0};
+};
+
+}  // namespace mindspore::lite::micro::nnacl
+#endif  // MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
@ -0,0 +1,235 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h"
+#include <vector>
+#include <string>
+#include "micro/coder/opcoders/file_collector.h"
+#include "micro/coder/log.h"
+#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
+
+using mindspore::schema::PrimitiveType_Reduce;
+namespace mindspore::lite::micro::nnacl {
+int ReduceInt8Coder::CalculateQuantArgs() {
+  QuantArg input_quant = input_tensor_->quant_params().at(0);
+  QuantArg output_quant = output_tensor_->quant_params().at(0);
+  quant_arg_.in_scale_ = input_quant.scale;
+  quant_arg_.in_zp_ = input_quant.zeroPoint;
+  quant_arg_.out_scale_ = output_quant.scale;
+  quant_arg_.out_zp_ = output_quant.zeroPoint;
+  const double input_output_multiplier = quant_arg_.in_scale_ / quant_arg_.out_scale_;
+  int shift;
+  QuantizeMultiplierSmallerThanOne(input_output_multiplier, &quant_arg_.in_out_multiplier_, &shift);
+  quant_arg_.in_out_left_shift_ = shift < 0 ? -shift : 0;
+  quant_arg_.in_out_right_shift_ = shift > 0 ? shift : 0;
+  MS_CHECK_TRUE(num_axes_ < MAX_SHAPE_SIZE, "the number of axes should be less the max num");
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
+    for (int i = 0; i < num_axes_; ++i) {
+      auto axis = axes_[i];
+      std::vector<int> in_shape = input_tensor_->shape();
+      if (static_cast<int>(in_shape.size()) - 1 < axis) {
+        MS_LOG(ERROR) << "input tensor shape is invalid";
+        return RET_ERROR;
+      }
+      double reciprocal = 1.0 / in_shape.at(axis);
+      auto *qm = new (std::nothrow) QuantMulArg;
+      MS_CHECK_PTR(qm);
+      QuantizeMultiplierSmallerThanOne(reciprocal, &qm->multiplier_, &shift);
+      qm->left_shift_ = shift < 0 ? -shift : 0;
+      qm->right_shift_ = shift > 0 ? shift : 0;
+      mean_multipliers_.push_back(qm);
+    }
+  }
+
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) {
+    for (int i = 0; i < num_axes_; ++i) {
+      int axis = axes_[i];
+      std::vector<int> in_shape = input_tensors_.at(kInputIndex)->shape();
+      if (static_cast<int>(in_shape.size()) - 1 < axis) {
+        MS_LOG(ERROR) << "input tensor shape is invalid";
+        return RET_ERROR;
+      }
+      int axis_size = in_shape.at(axis);
+      double prod_multiplier = std::pow(quant_arg_.in_scale_, axis_size - 1);
+      auto *qm = new (std::nothrow) QuantMulArg;
+      MS_CHECK_PTR(qm);
+      QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift);
+      qm->left_shift_ = shift < 0 ? -shift : 0;
+      qm->right_shift_ = shift > 0 ? shift : 0;
+      mean_multipliers_.push_back(qm);
+    }
+  }
+
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) {
+    for (int i = 0; i < num_axes_ - 1; ++i) {
+      auto *qm = new (std::nothrow) QuantMulArg;
+      MS_CHECK_PTR(qm);
+      double sum_square_multiplier = quant_arg_.in_scale_;
+      QuantizeMultiplierSmallerThanOne(sum_square_multiplier, &qm->multiplier_, &shift);
+      qm->left_shift_ = shift < 0 ? -shift : 0;
+      qm->right_shift_ = shift > 0 ? shift : 0;
+      sum_square_multipliers_.push_back(qm);
+    }
+    // for last num_axes
+    auto *qm = new (std::nothrow) QuantMulArg;
+    MS_CHECK_PTR(qm);
+    double sum_square_multiplier = quant_arg_.in_scale_ * (quant_arg_.in_scale_ / quant_arg_.out_scale_);
+    QuantizeMultiplierSmallerThanOne(sum_square_multiplier, &qm->multiplier_, &shift);
+    qm->left_shift_ = shift < 0 ? -shift : 0;
+    qm->right_shift_ = shift > 0 ? shift : 0;
+    sum_square_multipliers_.push_back(qm);
+  }
+
+  return RET_OK;
+}
+
+int ReduceInt8Coder::MallocTmpBuffer() {
+  data_buffers_.clear();
+  if (num_axes_ != static_cast<int>(buffer_sizes_.size())) {
+    MS_LOG(ERROR) << "num_axes_ size is invalid";
+    return RET_ERROR;
+  }
+  for (auto buffer_size : buffer_sizes_) {
+    auto *buffer =
+      static_cast<int32_t *>(allocator_->Malloc(kNumberTypeInt32, buffer_size * sizeof(int32_t), kWorkspace));
+    MS_CHECK_PTR(buffer);
+    data_buffers_.emplace_back(buffer);
+  }
+  return RET_OK;
+}
+
+void ReduceInt8Coder::GetQuantArgs(size_t index) {
+  if (index > static_cast<size_t>(num_axes_)) {
+    MS_LOG(ERROR) << "index is invalid, beyond num_axes_";
+    return;
+  }
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) {
+    quant_arg_.mean_multiplier_ = mean_multipliers_.at(index)->multiplier_;
+    quant_arg_.mean_left_shift_ = mean_multipliers_.at(index)->left_shift_;
+    quant_arg_.mean_right_shift_ = mean_multipliers_.at(index)->right_shift_;
+  }
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceProd)) {
+    quant_arg_.prod_multiplier_ = prod_multipliers_.at(index)->multiplier_;
+    quant_arg_.prod_left_shift_ = prod_multipliers_.at(index)->left_shift_;
+    quant_arg_.prod_right_shift_ = prod_multipliers_.at(index)->right_shift_;
+  }
+
+  if (mode_ == static_cast<int>(schema::ReduceMode_ReduceSumSquare)) {
+    quant_arg_.sum_square_multiplier_ = sum_square_multipliers_.at(index)->multiplier_;
+    quant_arg_.sum_square_left_shift_ = sum_square_multipliers_.at(index)->left_shift_;
+    quant_arg_.sum_square_right_shift_ = sum_square_multipliers_.at(index)->right_shift_;
+  }
+}
+
+int ReduceInt8Coder::Prepare(CoderContext *const context) {
+  MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "Init failed");
+  std::vector<int> in_shape = input_tensor_->shape();
+  if (!in_shape.empty()) {
+    this->valid_shape_ = true;
+    MS_CHECK_RET_CODE(CalculateQuantArgs(), "CalculateQuantArgs failed");
+  } else {
+    this->valid_shape_ = false;
+  }
+  switch (mode_) {
+    case static_cast<int>(schema::ReduceMode_ReduceMean): {
+      reducer_ = "ReduceMeanInt8";
+      last_reducer_ = "ReduceMeanLastAxis";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceSum): {
+      reducer_ = "ReduceSumInt8";
+      last_reducer_ = "ReduceSumLastAxis";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceMax): {
+      reducer_ = "ReduceMaxInt8";
+      last_reducer_ = "ReduceMaxLastAxis";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceMin): {
+      reducer_ = "ReduceMinInt8";
+      last_reducer_ = "ReduceMinLastAxis";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceProd): {
+      reducer_ = "ReduceProdInt8";
+      last_reducer_ = "ReduceProdLastAxis";
+      break;
+    }
+    case static_cast<int>(schema::ReduceMode_ReduceSumSquare): {
+      reducer_ = "ReduceSumSquareInt8";
+      last_reducer_ = "ReduceSumSquareLastAxis";
+      break;
+    }
+    default:
+      MS_LOG(ERROR) << "Reduce unsupported reduce mode" << mode_;
+      return RET_ERROR;
+  }
+  MS_CHECK_RET_CODE(ReduceBaseCoder::ReSize(), "ReSize failed");
+  if (!this->valid_shape_) {
+    MS_CHECK_RET_CODE(CalculateQuantArgs(), "CalculateQuantArgs failed");
+  }
+  MS_CHECK_RET_CODE(MallocTmpBuffer(), "MallocTmpBuffer failed");
+  begin_src_data_ = static_cast<int32_t *>(
+    allocator_->Malloc(kNumberTypeInt32, sizeof(int32_t) * input_tensor_->ElementsNum(), kWorkspace));
+  MS_CHECK_PTR(begin_src_data_);
+  return RET_OK;
+}
+
+int ReduceInt8Coder::DoCode(CoderContext *const context) {
+  MS_LOG(DEBUG) << "*****Reduce code start*****";
+  int task_id = 0;
+  NNaclInt8Serializer code;
+  Collect(context, {"nnacl/int8/reduce_int8.h"}, {"reduce_int8.c", "fixed_point.c"});
+  std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_);
+  std::string dst_addr;
+  std::string begin_src_data_src = allocator_->GetRuntimeAddr(begin_src_data_);
+
+  code << "int *begin_data = (int *)(" << begin_src_data_src << ");\n";
+  code << "int8_t *ori_data = (int8_t *)(" << src_addr << ");\n";
+  code << "for (int i = 0; i < " << input_tensor_->ElementsNum() << "; ++i) {\n"
+       << "    begin_data[i] = (int)ori_data[i];\n"
+       << "  }\n";
+  for (int i = 0; i < num_axes_; ++i) {
+    GetQuantArgs(i);
+    std::string quant_arg_i = "quant_arg_" + std::to_string(i);
+    std::string ptr_quan_arg_i = "&" + quant_arg_i;
+    code.CodeStruct(quant_arg_i, quant_arg_);
+    if (i != num_axes_ - 1) {
+      is_last_axis = false;
+      dst_addr = allocator_->GetRuntimeAddr(data_buffers_.at(i));
+    } else {
+      is_last_axis = true;
+      dst_addr = allocator_->GetRuntimeAddr(output_tensor_);
+    }
+    outer_size_ = outer_sizes_.at(i);
+    inner_size_ = inner_sizes_.at(i);
+    axis_size_ = axis_sizes_.at(i);
+    if (!is_last_axis) {
+      code.CodeFunction(reducer_, outer_size_, inner_size_, axis_size_, begin_src_data_src, dst_addr, ptr_quan_arg_i,
+                        task_id, thread_num_);
+    } else {
+      code.CodeFunction(last_reducer_, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, ptr_quan_arg_i,
+                        task_id, thread_num_);
+    }
+    begin_src_data_src = dst_addr;
+  }
+  context->AppendCode(code.str());
+  return RET_OK;
+}
+
+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Reduce, CPUOpCoderCreator<ReduceInt8Coder>)
+
+}  // namespace mindspore::lite::micro::nnacl
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
@ -0,0 +1,55 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MICRO_LITE_MICRO_CODER_OPCODERS_INT8_REDUCE_INT8_CODER_H_
+#define MICRO_LITE_MICRO_CODER_OPCODERS_INT8_REDUCE_INT8_CODER_H_
+
+#include <string>
+#include <vector>
+#include "coder/opcoders/op_coder.h"
+#include "nnacl/int8/quantize.h"
+#include "nnacl/int8/reduce_int8.h"
+#include "micro/coder/opcoders/base/reduce_base_coder.h"
+namespace mindspore::lite::micro::nnacl {
+class ReduceInt8Coder : public ReduceBaseCoder {
+ public:
+  ReduceInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                  const Model::Node *node, size_t node_index, Target target)
+      : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
+
+  ~ReduceInt8Coder() override = default;
+  int Prepare(CoderContext *const context) override;
+  int DoCode(CoderContext *const context) override;
+
+ private:
+  int MallocTmpBuffer();
+  int CalculateQuantArgs();
+  void GetQuantArgs(size_t index);
+
+ private:
+  ReduceQuantArg quant_arg_{0};
+  int32_t *begin_src_data_{nullptr};
+  std::vector<int32_t *> data_buffers_;
+  bool valid_shape_{false};
+  bool is_last_axis{false};
+  std::string reducer_;
+  std::string last_reducer_;
+  std::vector<QuantMulArg *> mean_multipliers_;
+  std::vector<QuantMulArg *> prod_multipliers_;
+  std::vector<QuantMulArg *> sum_square_multipliers_;
+};
+}  // namespace mindspore::lite::micro::nnacl
+#endif  // MICRO_LITE_MICRO_CODER_OPCODERS_INT8_REDUCE_INT8_CODER_H_
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
@ -0,0 +1,105 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h"
+#include <vector>
+#include <string>
+#include <memory>
+#include <limits>
+#include "schema/inner/ops_generated.h"
+#include "nnacl/softmax_parameter.h"
+#include "micro/coder/log.h"
+#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
+#include "micro/coder/opcoders/file_collector.h"
+
+using mindspore::schema::PrimitiveType_SoftMax;
+
+namespace mindspore::lite::micro::nnacl {
+int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
+  SoftmaxBaseCoder::Init();
+  std::vector<QuantArg> in_quant_args = input_tensor_->quant_params();
+  quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale;
+  quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint;
+
+  std::vector<QuantArg> out_quant_args = output_tensor_->quant_params();
+  quant_params_.out_quant_arg_.scale_ = out_quant_args.at(0).scale;
+  quant_params_.out_quant_arg_.zp_ = out_quant_args.at(0).zeroPoint;
+  quant_params_.output_activation_min_ = std::numeric_limits<int8_t>::min();
+  quant_params_.output_activation_max_ = std::numeric_limits<int8_t>::max();
+
+  const double input_real_multiplier =
+    MSMIN(quant_params_.in_quant_args_.scale_ * (1 << (unsigned int)(31 - 5)), (1ll << 31) - 1.0);
+  int right_shift = 0;
+  QuantizeMultiplierSmallerThanOne(input_real_multiplier, &quant_params_.output_multiplier_, &right_shift);
+  quant_params_.shift_left_ = right_shift < 0 ? -right_shift : 0;
+  quant_params_.shift_right_ = right_shift < 0 ? -right_shift : 0;
+  // malloc tmp buffer
+  exp_data_size_ = softmax_param_->element_size_ * sizeof(int);
+  exp_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, exp_data_size_, kWorkspace));
+  MS_CHECK_PTR(exp_data_);
+  int inner_size = 1;
+  MS_CHECK_TRUE(softmax_param_->n_dim_ < 5, "n_dim should be less than the length of maximum value of input_shape");
+  for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) {
+    inner_size *= softmax_param_->input_shape_[i];
+  }
+  sum_data_size_ = inner_size * sizeof(int);
+  sum_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace));
+  MS_CHECK_PTR(sum_data_);
+  ReSize();
+  return RET_OK;
+}
+
+int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
+  int outter_size = 1;
+  int inner_size = 1;
+  for (int i = 0; i < softmax_param_->axis_; i++) {
+    outter_size *= softmax_param_->input_shape_[i];
+  }
+  MS_CHECK_TRUE(softmax_param_->n_dim_ < 5, "n_dim should be less than the length of maximum value of input_shape");
+  for (int i = softmax_param_->axis_; i < softmax_param_->n_dim_; i++) {
+    inner_size *= softmax_param_->input_shape_[i];
+  }
+
+  Collect(context, {"nnacl/int8/softmax_int8.h"}, {"softmax_int8.c", "fixed_point.c"});
+
+  NNaclInt8Serializer code;
+  code.precision(kPrecision);
+
+  code.CodeStruct("quant_args", quant_params_);
+  code.CodeStruct("softmax_parameter", *softmax_param_);
+
+  code.CodeFunction("memset", exp_data_, 0, exp_data_size_);
+  code.CodeFunction("memset", sum_data_, 0, sum_data_size_);
+
+  if (thread_num_ > 1) {
+    code.CodeBaseStruct("SoftmaxInt8Args", "args", input_tensor_, output_tensor_, outter_size, inner_size, exp_data_,
+                        sum_data_, thread_num_s_, "quant_args", "(SoftmaxParameter *)&softmax_param");
+    code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "SoftmaxInt8Run", "&args", "thread_num");
+  } else {
+    int task_id = 0;
+    MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0");
+    int stride = UP_DIV(outter_size, thread_num_);
+    int count = MSMIN(stride, outter_size - stride * task_id);
+    code.CodeFunction("SoftmaxInt8", input_tensor_, output_tensor_, count, exp_data_, sum_data_, "quant_args",
+                      "(SoftmaxParameter *)&softmax_parameter");
+  }
+  context->AppendCode(code.str());
+
+  return RET_OK;
+}
+
+REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_SoftMax, CPUOpCoderCreator<SoftMaxInt8Coder>)
+}  // namespace mindspore::lite::micro::nnacl
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h
@ -0,0 +1,46 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_MICRO_CODER_SOFTMAX_INT8_CODER_H_
+#define MINDSPORE_LITE_MICRO_CODER_SOFTMAX_INT8_CODER_H_
+
+#include <string>
+#include <memory>
+#include <vector>
+#include "micro/coder/opcoders/base/softmax_base_coder.h"
+
+namespace mindspore::lite::micro::nnacl {
+
+class SoftMaxInt8Coder final : public SoftmaxBaseCoder {
+ public:
+  SoftMaxInt8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
+                   const Model::Node *node, size_t node_index, Target target)
+      : SoftmaxBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
+
+  ~SoftMaxInt8Coder() override = default;
+
+  int Prepare(CoderContext *const context) override;
+
+  int DoCode(CoderContext *const context) override;
+
+ private:
+  int *sum_data_{nullptr};
+  int *exp_data_{nullptr};
+  size_t exp_data_size_{0};
+  size_t sum_data_size_{0};
+};
+
+}  // namespace mindspore::lite::micro::nnacl
+#endif  // MINDSPORE_LITE_MICRO_CODER_SOFTMAX_INT8_CODER_H_