From 68ebf04d0520e59aa6aac31eb7b8e89e665e0292 Mon Sep 17 00:00:00 2001 From: zhujingxuan Date: Sun, 7 Feb 2021 11:31:22 +0800 Subject: [PATCH] add softmax and reducer coder --- mindspore/lite/micro/cmake/file_list.cmake | 138 +++++----- .../opcoders/nnacl/fp32/reduce_fp32_coder.cc | 121 +++++++++ .../opcoders/nnacl/fp32/reduce_fp32_coder.h | 45 ++++ .../opcoders/nnacl/fp32/softmax_fp32_coder.cc | 63 +++++ .../opcoders/nnacl/fp32/softmax_fp32_coder.h | 41 +++ .../opcoders/nnacl/int8/reduce_int8_coder.cc | 235 ++++++++++++++++++ .../opcoders/nnacl/int8/reduce_int8_coder.h | 55 ++++ .../opcoders/nnacl/int8/softmax_int8_coder.cc | 105 ++++++++ .../opcoders/nnacl/int8/softmax_int8_coder.h | 46 ++++ 9 files changed, 782 insertions(+), 67 deletions(-) create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake index 39a5024403c..fe0dfda9c61 100644 --- a/mindspore/lite/micro/cmake/file_list.cmake +++ b/mindspore/lite/micro/cmake/file_list.cmake @@ -1,80 +1,84 @@ #### classify all .h .c .cc files to FILE_SET set(CODER_SRC - ${MICRO_DIR}/coder/coder.cc - ${MICRO_DIR}/coder/coder_context.cc - ${MICRO_DIR}/coder/coder_graph.cc - ${MICRO_DIR}/coder/debug.cc - ${MICRO_DIR}/coder/session_coder.cc - ) + ${MICRO_DIR}/coder/coder.cc + ${MICRO_DIR}/coder/coder_context.cc + ${MICRO_DIR}/coder/coder_graph.cc + ${MICRO_DIR}/coder/debug.cc + ${MICRO_DIR}/coder/session_coder.cc + ) set(CODER_ALLOC_SRC - ${MICRO_DIR}/coder/allocator/allocator.cc - ${MICRO_DIR}/coder/allocator/memory_manager.cc - ) + ${MICRO_DIR}/coder/allocator/allocator.cc + ${MICRO_DIR}/coder/allocator/memory_manager.cc + ) set(CODER_GENERATOR_SRC - ${MICRO_DIR}/coder/generator/generator.cc - ${MICRO_DIR}/coder/generator/inference/inference_generator.cc - ${MICRO_DIR}/coder/generator/utils/generator_utils.cc - ) + ${MICRO_DIR}/coder/generator/generator.cc + ${MICRO_DIR}/coder/generator/inference/inference_generator.cc + ${MICRO_DIR}/coder/generator/utils/generator_utils.cc + ) set(CODER_OPCODERS_SRC - ${MICRO_DIR}/coder/opcoders/file_collector.cc - ${MICRO_DIR}/coder/opcoders/op_coder.cc - ${MICRO_DIR}/coder/opcoders/op_coder_builder.cc - ${MICRO_DIR}/coder/opcoders/op_coder_register.cc - #### serializer - ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc - ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.cc - #### base coder - ${MICRO_DIR}/coder/opcoders/base/conv2d_base_coder.cc - ${MICRO_DIR}/coder/opcoders/base/dtype_cast_coder.cc - ${MICRO_DIR}/coder/opcoders/base/full_connection_base_coder.cc - ${MICRO_DIR}/coder/opcoders/base/quant_dtype_cast_coder.cc - ${MICRO_DIR}/coder/opcoders/base/reduce_base_coder.cc - ${MICRO_DIR}/coder/opcoders/base/softmax_base_coder.cc - #### cmsis int8 coder - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc - #### nnacl fp32 coder - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/expand_dims_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reshape_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/squeeze_dims_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc - #### nnacl int8 coder - ${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc - ${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc - ) + ${MICRO_DIR}/coder/opcoders/file_collector.cc + ${MICRO_DIR}/coder/opcoders/op_coder.cc + ${MICRO_DIR}/coder/opcoders/op_coder_builder.cc + ${MICRO_DIR}/coder/opcoders/op_coder_register.cc + #### serializer + ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.cc + ${MICRO_DIR}/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.cc + #### base coder + ${MICRO_DIR}/coder/opcoders/base/conv2d_base_coder.cc + ${MICRO_DIR}/coder/opcoders/base/dtype_cast_coder.cc + ${MICRO_DIR}/coder/opcoders/base/full_connection_base_coder.cc + ${MICRO_DIR}/coder/opcoders/base/quant_dtype_cast_coder.cc + ${MICRO_DIR}/coder/opcoders/base/reduce_base_coder.cc + ${MICRO_DIR}/coder/opcoders/base/softmax_base_coder.cc + #### cmsis int8 coder + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/add_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_base_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/dwconv_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/fullconnection_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/mul_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/pooling_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/reshape_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/cmsis-nn/int8/softmax_int8_coder.cc + #### nnacl fp32 coder + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/activation_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/arithmetic_self_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/assign_add_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/concat_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/expand_dims_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nchw2nhwc_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/nhwc2nchw_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pad_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/pooling_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/power_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/reshape_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/scale_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/slice_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/squeeze_dims_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/tile_fp32_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/fp32/transpose_fp32_coder.cc + #### nnacl int8 coder + ${MICRO_DIR}/coder/opcoders/nnacl/int8/concat_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/int8/reduce_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/int8/softmax_int8_coder.cc + ) set(CODER_UTILS_SRC - ${MICRO_DIR}/coder/utils/coder_utils.cc - ${MICRO_DIR}/coder/utils/dir_utils.cc - ${MICRO_DIR}/coder/utils/print_utils.cc - ) + ${MICRO_DIR}/coder/utils/coder_utils.cc + ${MICRO_DIR}/coder/utils/dir_utils.cc + ${MICRO_DIR}/coder/utils/print_utils.cc + ) set(PRIMITIVE_OP_SRC ${LITE_DIR}/src/ops/batch_norm.cc diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc new file mode 100644 index 00000000000..6d82e764dce --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.cc @@ -0,0 +1,121 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h" +#include +#include "micro/coder/log.h" +#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" +#include "micro/coder/opcoders/file_collector.h" + +using mindspore::schema::PrimitiveType_Reduce; + +namespace mindspore::lite::micro::nnacl { +int ReduceFP32Coder::Prepare(CoderContext *const context) { + MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "init failed"); + MS_CHECK_RET_CODE(ReSize(), "resize failed"); + MS_CHECK_RET_CODE(MallocTmpBuffer(), "malloc buffer failed"); + return RET_OK; +} + +int ReduceFP32Coder::MallocTmpBuffer() { + data_buffers_.clear(); + for (auto size : buffer_sizes_) { + auto *buffer = static_cast(allocator_->Malloc(kNumberTypeFloat, size * sizeof(float), kWorkspace)); + MS_CHECK_PTR(buffer); + data_buffers_.emplace_back(buffer); + } + return RET_OK; +} + +int ReduceFP32Coder::ReSize() { + if (input_tensors_.at(0)->data_type() == kNumberTypeFloat32) { + data_type_ = kDataTypeFloat; + } else { + data_type_ = kDataTypeInt; + } + return ReduceBaseCoder::ReSize(); +} + +int ReduceFP32Coder::DoCode(CoderContext *const context) { + // generate code .h .c + if (mode_ == static_cast(schema::ReduceMode_ReduceSum)) { + Collect(context, {"runtime/kernel/fp32/reduce_sum.h"}, {"reduce_sum.c"}); + } else if (mode_ == static_cast(schema::ReduceMode_ReduceMean)) { + Collect(context, {"runtime/kernel/fp32/reduce_mean.h"}, {"reduce_mean.c"}); + } else { + Collect(context, {"runtime/kernel/fp32/reduce.h"}, {"reduce.c"}); + } + + NNaclFp32Serializer code; + // call the op function + std::string reduce; + std::string int_reduce; + switch (mode_) { + case static_cast(schema::ReduceMode_ReduceSum): { + reduce = "ReduceSum"; + break; + } + case static_cast(schema::ReduceMode_ReduceMean): { + reduce = "ReduceMean"; + break; + } + case static_cast(schema::ReduceMode_ReduceMax): { + reduce = "ReduceMax"; + break; + } + case static_cast(schema::ReduceMode_ReduceMin): { + reduce = "ReduceMin"; + break; + } + case static_cast(schema::ReduceMode_ReduceProd): { + reduce = "ReduceProd"; + int_reduce = "IntReduceProd"; + break; + } + case static_cast(schema::ReduceMode_ReduceSumSquare): { + reduce = "ReduceSumSquare"; + break; + } + default: + MS_LOG(ERROR) << "Reduce unsupported reduce mode: " << mode_; + return RET_ERROR; + } + + std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_); + std::string dst_addr; + for (int i = 0; i < num_axes_; ++i) { + if (i != num_axes_ - 1) { + dst_addr = allocator_->GetRuntimeAddr(data_buffers_.at(i)); + } else { + dst_addr = allocator_->GetRuntimeAddr(output_tensor_); + } + outer_size_ = outer_sizes_.at(i); + inner_size_ = inner_sizes_.at(i); + axis_size_ = axis_sizes_.at(i); + if (data_type_ == kDataTypeFloat) { + code.CodeFunction(reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_); + } else { + code.CodeFunction(int_reduce, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, 0, thread_num_); + } + src_addr = dst_addr; + } + context->AppendCode(code.str()); + return RET_OK; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_Reduce, CPUOpCoderCreator) + +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h new file mode 100644 index 00000000000..b7379f4fce8 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/reduce_fp32_coder.h @@ -0,0 +1,45 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_FP32_CODER_H_ +#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_FP32_CODER_H_ + +#include +#include +#include "micro/coder/opcoders/base/reduce_base_coder.h" +#include "micro/coder/opcoders/op_coder.h" + +namespace mindspore::lite::micro::nnacl { +class ReduceFP32Coder : public ReduceBaseCoder { + public: + ReduceFP32Coder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {} + + ~ReduceFP32Coder() override = default; + + int Prepare(CoderContext *const context) override; + + int DoCode(CoderContext *const context) override; + + private: + int ReSize() override; + int MallocTmpBuffer(); + LiteDataType data_type_{kDataTypeFloat}; + std::vector data_buffers_; +}; +} // namespace mindspore::lite::micro::nnacl +#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_REDUCE_FP32_CODER_H_ diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc new file mode 100644 index 00000000000..edd4f21f034 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc @@ -0,0 +1,63 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h" +#include +#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_fp32_serializer.h" +#include "schema/inner/ops_generated.h" +#include "micro/coder/opcoders/file_collector.h" + +using mindspore::schema::PrimitiveType_SoftMax; + +namespace mindspore::lite::micro::nnacl { + +int SoftMaxFP32Coder::Prepare(CoderContext *const context) { + SoftmaxBaseCoder::Init(); + // malloc tmp buffer + int n_dim = softmax_param_->n_dim_; + int32_t axis = softmax_param_->axis_; + if (axis == -1) { + softmax_param_->axis_ += n_dim; + axis = softmax_param_->axis_; + } + auto in_shape = input_tensor_->shape(); + int out_plane_size = 1; + for (int i = 0; i < axis; ++i) { + out_plane_size *= in_shape.at(i); + } + int in_plane_size = 1; + for (int i = axis + 1; i < n_dim; i++) { + in_plane_size *= in_shape.at(i); + } + sum_data_size_ = out_plane_size * in_plane_size * sizeof(float); + sum_data_ = static_cast(allocator_->Malloc(kNumberTypeFloat, sum_data_size_, kWorkspace)); + return RET_OK; +} + +int SoftMaxFP32Coder::DoCode(CoderContext *const context) { + Collect(context, {"nnacl/fp32/softmax.h"}, {"softmax.c"}); + NNaclFp32Serializer code; + code.CodeStruct("softmax_parameter", *softmax_param_); + code.CodeFunction("memset", sum_data_, "0", sum_data_size_); + code.CodeFunction("Softmax", input_tensor_, output_tensor_, sum_data_, "&softmax_parameter"); + context->AppendCode(code.str()); + + return RET_OK; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeFloat32, PrimitiveType_SoftMax, CPUOpCoderCreator) + +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h new file mode 100644 index 00000000000..92414d67373 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.h @@ -0,0 +1,41 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_ +#define MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_ + +#include +#include "micro/coder/opcoders/base/softmax_base_coder.h" +namespace mindspore::lite::micro::nnacl { + +class SoftMaxFP32Coder final : public SoftmaxBaseCoder { + public: + SoftMaxFP32Coder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : SoftmaxBaseCoder(in_tensors, out_tensors, node, node_index, target) {} + + int Prepare(CoderContext *const context) override; + + int DoCode(CoderContext *const context) override; + + ~SoftMaxFP32Coder() override = default; + + private: + float *sum_data_{nullptr}; + size_t sum_data_size_{0}; +}; + +} // namespace mindspore::lite::micro::nnacl +#endif // MINDSPORE_LITE_MICRO_CODER_SOFTMAX_CODER_H_ diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc new file mode 100644 index 00000000000..218b4fdf45c --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc @@ -0,0 +1,235 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h" +#include +#include +#include "micro/coder/opcoders/file_collector.h" +#include "micro/coder/log.h" +#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" + +using mindspore::schema::PrimitiveType_Reduce; +namespace mindspore::lite::micro::nnacl { +int ReduceInt8Coder::CalculateQuantArgs() { + QuantArg input_quant = input_tensor_->quant_params().at(0); + QuantArg output_quant = output_tensor_->quant_params().at(0); + quant_arg_.in_scale_ = input_quant.scale; + quant_arg_.in_zp_ = input_quant.zeroPoint; + quant_arg_.out_scale_ = output_quant.scale; + quant_arg_.out_zp_ = output_quant.zeroPoint; + const double input_output_multiplier = quant_arg_.in_scale_ / quant_arg_.out_scale_; + int shift; + QuantizeMultiplierSmallerThanOne(input_output_multiplier, &quant_arg_.in_out_multiplier_, &shift); + quant_arg_.in_out_left_shift_ = shift < 0 ? -shift : 0; + quant_arg_.in_out_right_shift_ = shift > 0 ? shift : 0; + MS_CHECK_TRUE(num_axes_ < MAX_SHAPE_SIZE, "the number of axes should be less the max num"); + if (mode_ == static_cast(schema::ReduceMode_ReduceMean)) { + for (int i = 0; i < num_axes_; ++i) { + auto axis = axes_[i]; + std::vector in_shape = input_tensor_->shape(); + if (static_cast(in_shape.size()) - 1 < axis) { + MS_LOG(ERROR) << "input tensor shape is invalid"; + return RET_ERROR; + } + double reciprocal = 1.0 / in_shape.at(axis); + auto *qm = new (std::nothrow) QuantMulArg; + MS_CHECK_PTR(qm); + QuantizeMultiplierSmallerThanOne(reciprocal, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + mean_multipliers_.push_back(qm); + } + } + + if (mode_ == static_cast(schema::ReduceMode_ReduceProd)) { + for (int i = 0; i < num_axes_; ++i) { + int axis = axes_[i]; + std::vector in_shape = input_tensors_.at(kInputIndex)->shape(); + if (static_cast(in_shape.size()) - 1 < axis) { + MS_LOG(ERROR) << "input tensor shape is invalid"; + return RET_ERROR; + } + int axis_size = in_shape.at(axis); + double prod_multiplier = std::pow(quant_arg_.in_scale_, axis_size - 1); + auto *qm = new (std::nothrow) QuantMulArg; + MS_CHECK_PTR(qm); + QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + mean_multipliers_.push_back(qm); + } + } + + if (mode_ == static_cast(schema::ReduceMode_ReduceSumSquare)) { + for (int i = 0; i < num_axes_ - 1; ++i) { + auto *qm = new (std::nothrow) QuantMulArg; + MS_CHECK_PTR(qm); + double sum_square_multiplier = quant_arg_.in_scale_; + QuantizeMultiplierSmallerThanOne(sum_square_multiplier, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + sum_square_multipliers_.push_back(qm); + } + // for last num_axes + auto *qm = new (std::nothrow) QuantMulArg; + MS_CHECK_PTR(qm); + double sum_square_multiplier = quant_arg_.in_scale_ * (quant_arg_.in_scale_ / quant_arg_.out_scale_); + QuantizeMultiplierSmallerThanOne(sum_square_multiplier, &qm->multiplier_, &shift); + qm->left_shift_ = shift < 0 ? -shift : 0; + qm->right_shift_ = shift > 0 ? shift : 0; + sum_square_multipliers_.push_back(qm); + } + + return RET_OK; +} + +int ReduceInt8Coder::MallocTmpBuffer() { + data_buffers_.clear(); + if (num_axes_ != static_cast(buffer_sizes_.size())) { + MS_LOG(ERROR) << "num_axes_ size is invalid"; + return RET_ERROR; + } + for (auto buffer_size : buffer_sizes_) { + auto *buffer = + static_cast(allocator_->Malloc(kNumberTypeInt32, buffer_size * sizeof(int32_t), kWorkspace)); + MS_CHECK_PTR(buffer); + data_buffers_.emplace_back(buffer); + } + return RET_OK; +} + +void ReduceInt8Coder::GetQuantArgs(size_t index) { + if (index > static_cast(num_axes_)) { + MS_LOG(ERROR) << "index is invalid, beyond num_axes_"; + return; + } + if (mode_ == static_cast(schema::ReduceMode_ReduceMean)) { + quant_arg_.mean_multiplier_ = mean_multipliers_.at(index)->multiplier_; + quant_arg_.mean_left_shift_ = mean_multipliers_.at(index)->left_shift_; + quant_arg_.mean_right_shift_ = mean_multipliers_.at(index)->right_shift_; + } + if (mode_ == static_cast(schema::ReduceMode_ReduceProd)) { + quant_arg_.prod_multiplier_ = prod_multipliers_.at(index)->multiplier_; + quant_arg_.prod_left_shift_ = prod_multipliers_.at(index)->left_shift_; + quant_arg_.prod_right_shift_ = prod_multipliers_.at(index)->right_shift_; + } + + if (mode_ == static_cast(schema::ReduceMode_ReduceSumSquare)) { + quant_arg_.sum_square_multiplier_ = sum_square_multipliers_.at(index)->multiplier_; + quant_arg_.sum_square_left_shift_ = sum_square_multipliers_.at(index)->left_shift_; + quant_arg_.sum_square_right_shift_ = sum_square_multipliers_.at(index)->right_shift_; + } +} + +int ReduceInt8Coder::Prepare(CoderContext *const context) { + MS_CHECK_RET_CODE(ReduceBaseCoder::Init(), "Init failed"); + std::vector in_shape = input_tensor_->shape(); + if (!in_shape.empty()) { + this->valid_shape_ = true; + MS_CHECK_RET_CODE(CalculateQuantArgs(), "CalculateQuantArgs failed"); + } else { + this->valid_shape_ = false; + } + switch (mode_) { + case static_cast(schema::ReduceMode_ReduceMean): { + reducer_ = "ReduceMeanInt8"; + last_reducer_ = "ReduceMeanLastAxis"; + break; + } + case static_cast(schema::ReduceMode_ReduceSum): { + reducer_ = "ReduceSumInt8"; + last_reducer_ = "ReduceSumLastAxis"; + break; + } + case static_cast(schema::ReduceMode_ReduceMax): { + reducer_ = "ReduceMaxInt8"; + last_reducer_ = "ReduceMaxLastAxis"; + break; + } + case static_cast(schema::ReduceMode_ReduceMin): { + reducer_ = "ReduceMinInt8"; + last_reducer_ = "ReduceMinLastAxis"; + break; + } + case static_cast(schema::ReduceMode_ReduceProd): { + reducer_ = "ReduceProdInt8"; + last_reducer_ = "ReduceProdLastAxis"; + break; + } + case static_cast(schema::ReduceMode_ReduceSumSquare): { + reducer_ = "ReduceSumSquareInt8"; + last_reducer_ = "ReduceSumSquareLastAxis"; + break; + } + default: + MS_LOG(ERROR) << "Reduce unsupported reduce mode" << mode_; + return RET_ERROR; + } + MS_CHECK_RET_CODE(ReduceBaseCoder::ReSize(), "ReSize failed"); + if (!this->valid_shape_) { + MS_CHECK_RET_CODE(CalculateQuantArgs(), "CalculateQuantArgs failed"); + } + MS_CHECK_RET_CODE(MallocTmpBuffer(), "MallocTmpBuffer failed"); + begin_src_data_ = static_cast( + allocator_->Malloc(kNumberTypeInt32, sizeof(int32_t) * input_tensor_->ElementsNum(), kWorkspace)); + MS_CHECK_PTR(begin_src_data_); + return RET_OK; +} + +int ReduceInt8Coder::DoCode(CoderContext *const context) { + MS_LOG(DEBUG) << "*****Reduce code start*****"; + int task_id = 0; + NNaclInt8Serializer code; + Collect(context, {"nnacl/int8/reduce_int8.h"}, {"reduce_int8.c", "fixed_point.c"}); + std::string src_addr = allocator_->GetRuntimeAddr(input_tensor_); + std::string dst_addr; + std::string begin_src_data_src = allocator_->GetRuntimeAddr(begin_src_data_); + + code << "int *begin_data = (int *)(" << begin_src_data_src << ");\n"; + code << "int8_t *ori_data = (int8_t *)(" << src_addr << ");\n"; + code << "for (int i = 0; i < " << input_tensor_->ElementsNum() << "; ++i) {\n" + << " begin_data[i] = (int)ori_data[i];\n" + << " }\n"; + for (int i = 0; i < num_axes_; ++i) { + GetQuantArgs(i); + std::string quant_arg_i = "quant_arg_" + std::to_string(i); + std::string ptr_quan_arg_i = "&" + quant_arg_i; + code.CodeStruct(quant_arg_i, quant_arg_); + if (i != num_axes_ - 1) { + is_last_axis = false; + dst_addr = allocator_->GetRuntimeAddr(data_buffers_.at(i)); + } else { + is_last_axis = true; + dst_addr = allocator_->GetRuntimeAddr(output_tensor_); + } + outer_size_ = outer_sizes_.at(i); + inner_size_ = inner_sizes_.at(i); + axis_size_ = axis_sizes_.at(i); + if (!is_last_axis) { + code.CodeFunction(reducer_, outer_size_, inner_size_, axis_size_, begin_src_data_src, dst_addr, ptr_quan_arg_i, + task_id, thread_num_); + } else { + code.CodeFunction(last_reducer_, outer_size_, inner_size_, axis_size_, src_addr, dst_addr, ptr_quan_arg_i, + task_id, thread_num_); + } + begin_src_data_src = dst_addr; + } + context->AppendCode(code.str()); + return RET_OK; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_Reduce, CPUOpCoderCreator) + +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h new file mode 100644 index 00000000000..ff961ce176c --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h @@ -0,0 +1,55 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MICRO_LITE_MICRO_CODER_OPCODERS_INT8_REDUCE_INT8_CODER_H_ +#define MICRO_LITE_MICRO_CODER_OPCODERS_INT8_REDUCE_INT8_CODER_H_ + +#include +#include +#include "coder/opcoders/op_coder.h" +#include "nnacl/int8/quantize.h" +#include "nnacl/int8/reduce_int8.h" +#include "micro/coder/opcoders/base/reduce_base_coder.h" +namespace mindspore::lite::micro::nnacl { +class ReduceInt8Coder : public ReduceBaseCoder { + public: + ReduceInt8Coder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {} + + ~ReduceInt8Coder() override = default; + int Prepare(CoderContext *const context) override; + int DoCode(CoderContext *const context) override; + + private: + int MallocTmpBuffer(); + int CalculateQuantArgs(); + void GetQuantArgs(size_t index); + + private: + ReduceQuantArg quant_arg_{0}; + int32_t *begin_src_data_{nullptr}; + std::vector data_buffers_; + bool valid_shape_{false}; + bool is_last_axis{false}; + std::string reducer_; + std::string last_reducer_; + std::vector mean_multipliers_; + std::vector prod_multipliers_; + std::vector sum_square_multipliers_; +}; +} // namespace mindspore::lite::micro::nnacl +#endif // MICRO_LITE_MICRO_CODER_OPCODERS_INT8_REDUCE_INT8_CODER_H_ diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc new file mode 100644 index 00000000000..ff4a07740bf --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc @@ -0,0 +1,105 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h" +#include +#include +#include +#include +#include "schema/inner/ops_generated.h" +#include "nnacl/softmax_parameter.h" +#include "micro/coder/log.h" +#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" +#include "micro/coder/opcoders/file_collector.h" + +using mindspore::schema::PrimitiveType_SoftMax; + +namespace mindspore::lite::micro::nnacl { +int SoftMaxInt8Coder::Prepare(CoderContext *const context) { + SoftmaxBaseCoder::Init(); + std::vector in_quant_args = input_tensor_->quant_params(); + quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale; + quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint; + + std::vector out_quant_args = output_tensor_->quant_params(); + quant_params_.out_quant_arg_.scale_ = out_quant_args.at(0).scale; + quant_params_.out_quant_arg_.zp_ = out_quant_args.at(0).zeroPoint; + quant_params_.output_activation_min_ = std::numeric_limits::min(); + quant_params_.output_activation_max_ = std::numeric_limits::max(); + + const double input_real_multiplier = + MSMIN(quant_params_.in_quant_args_.scale_ * (1 << (unsigned int)(31 - 5)), (1ll << 31) - 1.0); + int right_shift = 0; + QuantizeMultiplierSmallerThanOne(input_real_multiplier, &quant_params_.output_multiplier_, &right_shift); + quant_params_.shift_left_ = right_shift < 0 ? -right_shift : 0; + quant_params_.shift_right_ = right_shift < 0 ? -right_shift : 0; + // malloc tmp buffer + exp_data_size_ = softmax_param_->element_size_ * sizeof(int); + exp_data_ = static_cast(allocator_->Malloc(kNumberTypeInt32, exp_data_size_, kWorkspace)); + MS_CHECK_PTR(exp_data_); + int inner_size = 1; + MS_CHECK_TRUE(softmax_param_->n_dim_ < 5, "n_dim should be less than the length of maximum value of input_shape"); + for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) { + inner_size *= softmax_param_->input_shape_[i]; + } + sum_data_size_ = inner_size * sizeof(int); + sum_data_ = static_cast(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace)); + MS_CHECK_PTR(sum_data_); + ReSize(); + return RET_OK; +} + +int SoftMaxInt8Coder::DoCode(CoderContext *const context) { + int outter_size = 1; + int inner_size = 1; + for (int i = 0; i < softmax_param_->axis_; i++) { + outter_size *= softmax_param_->input_shape_[i]; + } + MS_CHECK_TRUE(softmax_param_->n_dim_ < 5, "n_dim should be less than the length of maximum value of input_shape"); + for (int i = softmax_param_->axis_; i < softmax_param_->n_dim_; i++) { + inner_size *= softmax_param_->input_shape_[i]; + } + + Collect(context, {"nnacl/int8/softmax_int8.h"}, {"softmax_int8.c", "fixed_point.c"}); + + NNaclInt8Serializer code; + code.precision(kPrecision); + + code.CodeStruct("quant_args", quant_params_); + code.CodeStruct("softmax_parameter", *softmax_param_); + + code.CodeFunction("memset", exp_data_, 0, exp_data_size_); + code.CodeFunction("memset", sum_data_, 0, sum_data_size_); + + if (thread_num_ > 1) { + code.CodeBaseStruct("SoftmaxInt8Args", "args", input_tensor_, output_tensor_, outter_size, inner_size, exp_data_, + sum_data_, thread_num_s_, "quant_args", "(SoftmaxParameter *)&softmax_param"); + code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "SoftmaxInt8Run", "&args", "thread_num"); + } else { + int task_id = 0; + MS_CHECK_TRUE(thread_num_ > 0, "thread_num_ <= 0"); + int stride = UP_DIV(outter_size, thread_num_); + int count = MSMIN(stride, outter_size - stride * task_id); + code.CodeFunction("SoftmaxInt8", input_tensor_, output_tensor_, count, exp_data_, sum_data_, "quant_args", + "(SoftmaxParameter *)&softmax_parameter"); + } + context->AppendCode(code.str()); + + return RET_OK; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_SoftMax, CPUOpCoderCreator) +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h new file mode 100644 index 00000000000..41ef1c7de88 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.h @@ -0,0 +1,46 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_MICRO_CODER_SOFTMAX_INT8_CODER_H_ +#define MINDSPORE_LITE_MICRO_CODER_SOFTMAX_INT8_CODER_H_ + +#include +#include +#include +#include "micro/coder/opcoders/base/softmax_base_coder.h" + +namespace mindspore::lite::micro::nnacl { + +class SoftMaxInt8Coder final : public SoftmaxBaseCoder { + public: + SoftMaxInt8Coder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : SoftmaxBaseCoder(in_tensors, out_tensors, node, node_index, target) {} + + ~SoftMaxInt8Coder() override = default; + + int Prepare(CoderContext *const context) override; + + int DoCode(CoderContext *const context) override; + + private: + int *sum_data_{nullptr}; + int *exp_data_{nullptr}; + size_t exp_data_size_{0}; + size_t sum_data_size_{0}; +}; + +} // namespace mindspore::lite::micro::nnacl +#endif // MINDSPORE_LITE_MICRO_CODER_SOFTMAX_INT8_CODER_H_