From 3499218aa16bceee06fa1a8945dc56aebe9d42af Mon Sep 17 00:00:00 2001 From: zhaozhenlong Date: Mon, 19 Apr 2021 11:11:39 +0800 Subject: [PATCH] add op softplus cumsum --- .../cpu/nnacl/cumsum_parameter.h | 29 ++ .../cpu/nnacl/fp32/activation_fp32.c | 9 + .../cpu/nnacl/fp32/activation_fp32.h | 1 + .../cpu/nnacl/fp32/cumsum_fp32.c | 245 +++++++++++ .../cpu/nnacl/fp32/cumsum_fp32.h | 32 ++ .../cpu/nnacl/infer/cumsum_infer.c | 40 ++ .../cpu/nnacl/infer/cumsum_infer.h | 31 ++ .../cpu/nnacl/infer/infer_register.h | 3 +- mindspore/core/ops/cumsum.cc | 59 +++ mindspore/core/ops/cumsum.h | 46 +++ mindspore/core/ops/op_utils.h | 2 + mindspore/lite/schema/ops.fbs | 6 + mindspore/lite/src/ops/ops_def.cc | 6 + mindspore/lite/src/ops/ops_func_declare.h | 2 + mindspore/lite/src/ops/ops_utils.cc | 6 + .../lite/src/ops/populate/cumsum_populate.cc | 41 ++ .../kernel/arm/fp32/activation_fp32.cc | 6 +- .../runtime/kernel/arm/fp32/cumsum_fp32.cc | 152 +++++++ .../src/runtime/kernel/arm/fp32/cumsum_fp32.h | 48 +++ .../test/ut/nnacl/infer/cumsum_infer_test.cc | 63 +++ .../kernel/arm/fp32/activation_fp32_test.cc | 46 ++- .../runtime/kernel/arm/fp32/cumsum_tests.cc | 384 ++++++++++++++++++ 22 files changed, 1254 insertions(+), 3 deletions(-) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/cumsum_parameter.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.c create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.c create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.h create mode 100644 mindspore/core/ops/cumsum.cc create mode 100644 mindspore/core/ops/cumsum.h create mode 100644 mindspore/lite/src/ops/populate/cumsum_populate.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.h create mode 100644 mindspore/lite/test/ut/nnacl/infer/cumsum_infer_test.cc create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/cumsum_tests.cc diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/cumsum_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/cumsum_parameter.h new file mode 100644 index 00000000000..767979f7239 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/cumsum_parameter.h @@ -0,0 +1,29 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_NNACL_CUMSUM_PARAMETER_H_ +#define MINDSPORE_NNACL_CUMSUM_PARAMETER_H_ + +#include "nnacl/op_base.h" + +typedef struct CumSumParameter { + OpParameter op_parameter_; + bool reverse_; + bool exclusive_; + int axis_; +} CumsumParameter; + +#endif // MINDSPORE_NNACL_CUMSUM_PARAMETER_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.c index 6df6935e22e..cb1be6d2209 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.c @@ -261,3 +261,12 @@ int Gelu(const float *src, int length, float *dst, bool approximate) { } return NNACL_OK; } + +int Softplus(const float *src, int length, float *dst) { + int i = 0; + for (; i < length; ++i) { + single_exp(src[i], dst + i); + dst[i] = log1p(dst[i]); + } + return NNACL_OK; +} diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h index 3a3b4183d3c..f3e32687de0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/activation_fp32.h @@ -41,6 +41,7 @@ int Swish(const float *src, int length, float *dst); int HSwish(const float *src, int length, float *dst); int HardTanh(const float *src, int length, float *dst, float min_val, float max_val); int Gelu(const float *src, int length, float *dst, bool approximate); +int Softplus(const float *src, int length, float *dst); float TanhOpt(float src); #ifdef __cplusplus diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.c new file mode 100644 index 00000000000..55c2e8a8b74 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.c @@ -0,0 +1,245 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp32/cumsum_fp32.h" +#include "nnacl/op_base.h" + +// (a, b, c) -> (a, a+b, a+b+c) exclusive == false +// (a, b, c) -> (0, a, a+b) exclusive == true +void Cumsum(const float *input, float *output, int out_dim, int axis_dim, int inner_dim, bool exclusive) { + // when not exclusive, output axis dim[0] is the same as that of input. + // when exclusive, output axis dim[0] is 0.0f + if (!exclusive) { + for (int i = 0; i < out_dim; ++i) { + const float *layer_input = input + i * axis_dim * inner_dim; + float *layer_output = output + i * axis_dim * inner_dim; + int j = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; j <= inner_dim - C4NUM; j += C4NUM) { + MS_FLOAT32X4 val = MS_LDQ_F32(layer_input + j); + MS_STQ_F32(layer_output + j, val); + } +#endif + for (; j < inner_dim; ++j) { + *(layer_output + j) = *(layer_input + j); + } + } + } else { + for (int i = 0; i < out_dim; ++i) { + float *layer_output = output + i * axis_dim * inner_dim; + int j = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; j <= inner_dim - C4NUM; j += C4NUM) { + MS_FLOAT32X4 zero_val = MS_MOVQ_F32(0.0f); + MS_STQ_F32(layer_output + j, zero_val); + } +#endif + for (; j < inner_dim; ++j) { + *(layer_output + j) = 0.0f; + } + } + } + int input_offset = exclusive ? 0 : 1; + for (int i = 0; i < out_dim; ++i) { + const float *layer_input = input + i * axis_dim * inner_dim + inner_dim * input_offset; + float *layer_last_output = output + i * axis_dim * inner_dim; + float *layer_output = layer_last_output + inner_dim; + + for (int j = 1; j < axis_dim; ++j) { + int k = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; k <= inner_dim - C4NUM; k += C4NUM) { + MS_FLOAT32X4 input_val = MS_LDQ_F32(layer_input + k); + MS_FLOAT32X4 last_output_val = MS_LDQ_F32(layer_last_output + k); + MS_FLOAT32X4 out_val = MS_ADDQ_F32(input_val, last_output_val); + MS_STQ_F32(layer_output + k, out_val); + } +#endif + for (; k < inner_dim; ++k) { + // layer_output (i, j, k) = layer_input (i, j, k) + layer_last_output (i,j-1, k) + *(layer_output + k) = *(layer_input + k) + *(layer_last_output + k); + } + layer_input += inner_dim; + layer_last_output += inner_dim; + layer_output += inner_dim; + } + } +} + +// (a, b, c) -> (c+b+a, c+b, c) exclusive==false +// (a, b, c) -> (c+b, c, 0) exclusive==true +void CumsumReverse(const float *input, float *output, int out_dim, int axis_dim, int inner_dim, bool exclusive) { + if (!exclusive) { + for (int i = 0; i < out_dim; ++i) { + const float *layer_input = input + i * axis_dim * inner_dim + (axis_dim - 1) * inner_dim; + float *layer_output = output + i * axis_dim * inner_dim + (axis_dim - 1) * inner_dim; + int j = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; j <= inner_dim - C4NUM; j += C4NUM) { + MS_FLOAT32X4 val = MS_LDQ_F32(layer_input + j); + MS_STQ_F32(layer_output + j, val); + } +#endif + for (; j < inner_dim; ++j) { + *(layer_output + j) = *(layer_input + j); + } + } + } else { + for (int i = 0; i < out_dim; ++i) { + float *layer_output = output + i * axis_dim * inner_dim + (axis_dim - 1) * inner_dim; + int j = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; j <= inner_dim - C4NUM; j += C4NUM) { + MS_FLOAT32X4 zero_val = MS_MOVQ_F32(0.0f); + MS_STQ_F32(layer_output + j, zero_val); + } +#endif + for (; j < inner_dim; ++j) { + *(layer_output + j) = 0.0f; + } + } + } + int input_offset = exclusive ? 0 : 1; + for (int i = 0; i < out_dim; ++i) { + const float *layer_input = input + (i + 1) * axis_dim * inner_dim - 1 - input_offset * inner_dim; + float *layer_last_output = output + (i + 1) * axis_dim * inner_dim - 1; + float *layer_output = layer_last_output - inner_dim; + + for (int j = 1; j < axis_dim; ++j) { + int k = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; k <= inner_dim - C4NUM; k += C4NUM) { + MS_FLOAT32X4 input_val = MS_LDQ_F32(layer_input - k - 3); + MS_FLOAT32X4 last_output_val = MS_LDQ_F32(layer_last_output - k - 3); + MS_FLOAT32X4 out_val = MS_ADDQ_F32(input_val, last_output_val); + MS_STQ_F32(layer_output - k - 3, out_val); + } +#endif + for (; k < inner_dim; ++k) { + *(layer_output - k) = *(layer_input - k) + *(layer_last_output - k); + } + layer_input -= inner_dim; + layer_last_output -= inner_dim; + layer_output -= inner_dim; + } + } +} + +// (a, b, c) -> (a, a+b, a+b+c) exclusive == false +// (a, b, c) -> (0, a, a+b) exclusive == true +void CumsumInt(const int *input, int *output, int out_dim, int axis_dim, int inner_dim, bool exclusive) { + // when not exclusive, output axis dim[0] is the same as that of input. + // when exclusive, output axis dim[0] is 0 + if (!exclusive) { + for (int i = 0; i < out_dim; ++i) { + const int *layer_input = input + i * axis_dim * inner_dim; + int *layer_output = output + i * axis_dim * inner_dim; + int j = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; j <= inner_dim - C4NUM; j += C4NUM) { + MS_INT32X4 val = MS_LDQ_EPI32(layer_input + j); + MS_STQ_EPI32(layer_output + j, val); + } +#endif + for (; j < inner_dim; ++j) { + *(layer_output + j) = *(layer_input + j); + } + } + } else { + for (int i = 0; i < out_dim; ++i) { + int *layer_output = output + i * axis_dim * inner_dim; + int j = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; j <= inner_dim - C4NUM; j += C4NUM) { + MS_INT32X4 zero_val = MS_MOVQ_EPI32(0); + MS_STQ_EPI32(layer_output + j, zero_val); + } +#endif + for (; j < inner_dim; ++j) { + *(layer_output++) = 0; + } + } + } + int input_offset = exclusive ? 0 : 1; + for (int i = 0; i < out_dim; ++i) { + const int *layer_input = input + i * axis_dim * inner_dim + inner_dim * input_offset; + int *layer_last_output = output + i * axis_dim * inner_dim; + int *layer_output = layer_last_output + inner_dim; + + for (int j = 1; j < axis_dim; ++j) { + int k = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; k <= inner_dim - C4NUM; k += C4NUM) { + MS_INT32X4 input_val = MS_LDQ_EPI32(layer_input + k); + MS_INT32X4 last_output_val = MS_LDQ_EPI32(layer_last_output + k); + MS_INT32X4 out_val = MS_ADDQ_EPI32(input_val, last_output_val); + MS_STQ_EPI32(layer_output + k, out_val); + } +#endif + for (; k < inner_dim; ++k) { + *(layer_output + k) = *(layer_input + k) + *(layer_last_output + k); + } + layer_input += inner_dim; + layer_last_output += inner_dim; + layer_output += inner_dim; + } + } +} + +// (a, b, c) -> (c+b+a, c+b, c) exclusive==false +// (a, b, c) -> (c+b, c, 0) exclusive==true +void CumsumReverseInt(const int *input, int *output, int out_dim, int axis_dim, int inner_dim, bool exclusive) { + if (!exclusive) { + for (int i = 0; i < out_dim; ++i) { + const int *layer_input = input + i * axis_dim * inner_dim + (axis_dim - 1) * inner_dim; + int *layer_output = output + i * axis_dim * inner_dim + (axis_dim - 1) * inner_dim; + for (int j = 0; j < inner_dim; ++j) { + *(layer_output++) = *(layer_input++); + } + } + } else { + for (int i = 0; i < out_dim; ++i) { + int *layer_output = output + i * axis_dim * inner_dim + (axis_dim - 1) * inner_dim; + for (int j = 0; j < inner_dim; ++j) { + *(layer_output++) = 0.0f; + } + } + } + int input_offset = exclusive ? 0 : 1; + for (int i = 0; i < out_dim; ++i) { + const int *layer_input = input + (i + 1) * axis_dim * inner_dim - 1 - input_offset * inner_dim; + int *layer_last_output = output + (i + 1) * axis_dim * inner_dim - 1; + int *layer_output = layer_last_output - inner_dim; + + for (int j = 1; j < axis_dim; ++j) { + int k = 0; +#if defined(ENABLE_NEON) || defined(ENABLE_SSE) + for (; k <= inner_dim - C4NUM; k += C4NUM) { + MS_INT32X4 input_val = MS_LDQ_EPI32(layer_input - k - 3); + MS_INT32X4 last_output_val = MS_LDQ_EPI32(layer_last_output - k - 3); + MS_INT32X4 out_val = MS_ADDQ_EPI32(input_val, last_output_val); + MS_STQ_EPI32(layer_output - k - 3, out_val); + } +#endif + for (; k < inner_dim; ++k) { + *(layer_output - k) = *(layer_input - k) + *(layer_last_output - k); + } + layer_input -= inner_dim; + layer_last_output -= inner_dim; + layer_output -= inner_dim; + } + } +} diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.h new file mode 100644 index 00000000000..0f1842b2616 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/cumsum_fp32.h @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_FP32_CUMSUM_H_ +#define MINDSPORE_NNACL_FP32_CUMSUM_H_ +#include "nnacl/op_base.h" +#include "nnacl/cumsum_parameter.h" + +#ifdef __cplusplus +extern "C" { +#endif +void Cumsum(const float *input, float *output, int out_dim, int axis_dim, int inner_dim, bool exclusive); +void CumsumReverse(const float *input, float *output, int out_dim, int axis_dim, int inner_dim, bool exclusive); +void CumsumInt(const int *input, int *output, int out_dim, int axis_dim, int inner_dim, bool exclusive); +void CumsumReverseInt(const int *input, int *output, int out_dim, int axis_dim, int inner_dim, bool exclusive); +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_NNACL_FP32_CUMSUM_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.c new file mode 100644 index 00000000000..ff4d1c61a8f --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.c @@ -0,0 +1,40 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/infer/cumsum_infer.h" +#include "nnacl/infer/infer_register.h" + +int CumsumInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, + OpParameter *parameter) { +#ifdef Debug + int check_ret = CheckAugmentNullOutputSize(inputs, inputs_size, outputs, outputs_size, parameter, 1); + if (check_ret != NNACL_OK) { + return check_ret; + } +#endif + + const TensorC *input = inputs[0]; + TensorC *output = outputs[0]; + SetDataTypeFormat(output, input); + if (!parameter->infer_flag_) { + return NNACL_INFER_INVALID; + } + + SetShapeTensor(output, input); + return NNACL_OK; +} + +REG_INFER(Cumsum, PrimType_CumSum, CumsumInferShape) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.h new file mode 100644 index 00000000000..7680f3e438b --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/cumsum_infer.h @@ -0,0 +1,31 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_NNACL_CUMSUM_INFER_H +#define MINDSPORE_NNACL_CUMSUM_INFER_H + +#include "nnacl/infer/common_infer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int CumsumInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, + OpParameter *parameter); + +#ifdef __cplusplus +} +#endif +#endif // MINDSPORE_NNACL_CUMSUM_INFER_H diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h index 7000c15c370..c57a02dfa7a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h @@ -216,8 +216,9 @@ enum PrimType { PrimType_LogSoftmax = 189, PrimType_Call = 190, PrimType_Custom = 191, + PrimType_CumSum = 192, PrimType_MIN = PrimType_NONE, - PrimType_MAX = PrimType_Custom + 1 + PrimType_MAX = PrimType_CumSum + 1 }; void RegInfer(int prim_type, InferShape func); diff --git a/mindspore/core/ops/cumsum.cc b/mindspore/core/ops/cumsum.cc new file mode 100644 index 00000000000..8dc7db24ac9 --- /dev/null +++ b/mindspore/core/ops/cumsum.cc @@ -0,0 +1,59 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "ops/cumsum.h" +#include "utils/check_convert_utils.h" +#include "abstract/primitive_infer_map.h" +#include "ops/op_utils.h" + +namespace mindspore { +namespace ops { +void CumSum::Init(const bool exclusive, const bool reverse) { + this->set_exclusive(exclusive); + this->set_reverse(reverse); +} + +void CumSum::set_exclusive(const bool exclusive) { this->AddAttr(kExclusive, MakeValue(exclusive)); } + +bool CumSum::get_exclusive() const { + auto value_ptr = this->GetAttr(kExclusive); + return GetValue(value_ptr); +} + +void CumSum::set_reverse(const bool reverse) { this->AddAttr(kReverse, MakeValue(reverse)); } + +bool CumSum::get_reverse() const { + auto value_ptr = this->GetAttr(kReverse); + return GetValue(value_ptr); +} +AbstractBasePtr CumSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(primitive); + auto prim_name = primitive->name(); + CheckAndConvertUtils::CheckInteger("input number", input_args.size(), kEqual, 2, prim_name); + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } + // infer shape + auto out_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; + // infer type + auto x_type = input_args[0]->BuildType()->cast()->element(); + return std::make_shared(x_type, out_shape); +} +REGISTER_PRIMITIVE_C(kNameCumSum, CumSum); +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/cumsum.h b/mindspore/core/ops/cumsum.h new file mode 100644 index 00000000000..d458187e3d3 --- /dev/null +++ b/mindspore/core/ops/cumsum.h @@ -0,0 +1,46 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_CUMSUM_H_ +#define MINDSPORE_CORE_OPS_CUMSUM_H_ +#include +#include + +#include "ops/primitive_c.h" +#include "abstract/abstract_value.h" +#include "utils/check_convert_utils.h" + +namespace mindspore { +namespace ops { +constexpr auto kNameCumSum = "CumSum"; +class CumSum : public PrimitiveC { + public: + CumSum() : PrimitiveC(kNameCumSum) {} + ~CumSum() = default; + MS_DECLARE_PARENT(CumSum, PrimitiveC); + void Init(const bool exclusive, const bool reverse); + void set_exclusive(const bool exclusive); + void set_reverse(const bool reverse); + bool get_exclusive() const; + bool get_reverse() const; +}; +AbstractBasePtr CumSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args); +using PrimCumSum = std::shared_ptr; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_CUMSUM_H_ diff --git a/mindspore/core/ops/op_utils.h b/mindspore/core/ops/op_utils.h index 69efa1433f2..703e1a720da 100644 --- a/mindspore/core/ops/op_utils.h +++ b/mindspore/core/ops/op_utils.h @@ -232,6 +232,8 @@ constexpr auto kSpliceForwardIndexes = "forward_indexes"; constexpr auto kSpliceOutputDims = "output_dim"; constexpr auto kSideEffectIO = "side_effect_io"; constexpr auto kDeviceType = "device_type"; +constexpr auto kExclusive = "exclusive"; +constexpr auto kReverse = "reverse"; const std::set common_valid_types = {kInt8, kInt16, kInt32, kInt64, kUInt8, kUInt16, kUInt32, kUInt64, kFloat16, kFloat32, kFloat64}; diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index 62f1ad4141c..da0bd88273c 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -209,6 +209,7 @@ union PrimitiveType { LogSoftmax, Call, Custom, + CumSum, } table Abs { @@ -442,6 +443,11 @@ table Crop { offsets: [long]; } +table CumSum { + exclusive: bool = false; + reverse: bool = false; +} + table CustomExtractFeatures { } diff --git a/mindspore/lite/src/ops/ops_def.cc b/mindspore/lite/src/ops/ops_def.cc index 5b17ecef5bd..43888a54060 100644 --- a/mindspore/lite/src/ops/ops_def.cc +++ b/mindspore/lite/src/ops/ops_def.cc @@ -208,6 +208,7 @@ OP_TYPE(Splice) OP_TYPE(LogSoftmax) OP_TYPE(Call) OP_TYPE(Custom) +OP_TYPE(CumSum) OP_TYPE_DEF_END(PrimitiveType) OP_SCHEMA_DEF(Abs) @@ -1104,6 +1105,11 @@ OP_SCHEMA_DEF_END(LogSoftmax) OP_SCHEMA_DEF(Call) OP_SCHEMA_DEF_END(Call) +OP_SCHEMA_DEF(CumSum) +OP_ATTR(exclusive, bool) +OP_ATTR(reverse, bool) +OP_SCHEMA_DEF_END(CumSum) + OP_SCHEMA_DEF_ONLY(Custom) OP_ATTR_ONLY(type, string) OP_ATTR_ONLY(attr, [Attribute]) diff --git a/mindspore/lite/src/ops/ops_func_declare.h b/mindspore/lite/src/ops/ops_func_declare.h index f768e0b43f7..6ebeb025419 100644 --- a/mindspore/lite/src/ops/ops_func_declare.h +++ b/mindspore/lite/src/ops/ops_func_declare.h @@ -245,6 +245,7 @@ #include "ops/splice.h" #include "ops/log_softmax.h" #include "ops/call.h" +#include "ops/cumsum.h" #define FUNC_MSOP2SCHEMAOP_DECLARE(OP) \ namespace mindspore::lite::ops { \ @@ -459,5 +460,6 @@ FUNC_MSOP2SCHEMAOP_DECLARE(ResizeGrad); FUNC_MSOP2SCHEMAOP_DECLARE(Splice); FUNC_MSOP2SCHEMAOP_DECLARE(LogSoftmax); FUNC_MSOP2SCHEMAOP_DECLARE(Call); +FUNC_MSOP2SCHEMAOP_DECLARE(CumSum); #endif #endif // MINDSPORE_LITE_SRC_OPS_OPS_FUNC_DECLARE_H_ diff --git a/mindspore/lite/src/ops/ops_utils.cc b/mindspore/lite/src/ops/ops_utils.cc index eaddf862329..2d6ef9d2019 100644 --- a/mindspore/lite/src/ops/ops_utils.cc +++ b/mindspore/lite/src/ops/ops_utils.cc @@ -760,6 +760,11 @@ schema::PrimitiveT *CallPrimitiveCreator(const AnfNodePtr &node) { return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr; } +schema::PrimitiveT *CumSumPrimitiveCreator(const AnfNodePtr &node) { + auto ms_primc = GetValueNode>(node); + return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr; +} + RegistryMSOps g_absPrimitiveCreatorRegistry("Abs", AbsPrimitiveCreator); RegistryMSOps g_absGradPrimitiveCreatorRegistry("AbsGrad", AbsGradPrimitiveCreator); RegistryMSOps g_activationPrimitiveCreatorRegistry("Activation", ActivationPrimitiveCreator); @@ -975,6 +980,7 @@ RegistryMSOps g_erfPrimitiveCreatorRegistry("Erf", ErfPrimitiveCreator); RegistryMSOps g_SplicePrimitiveCreatorRegistry("Splice", SplicePrimitiveCreator); RegistryMSOps g_LogSoftmaxPrimitiveCreatorRegistry("LogSoftmax", LogSoftmaxPrimitiveCreator); RegistryMSOps g_CallPrimitiveCreatorRegistry("call", CallPrimitiveCreator); +RegistryMSOps g_CumSumPrimitiveCreatorRegistry("CumSum", CumSumPrimitiveCreator); schema::PrimitiveT *CustomPrimitiveCreator(const AnfNodePtr &node) { auto ms_primc = GetValueNode>(node); diff --git a/mindspore/lite/src/ops/populate/cumsum_populate.cc b/mindspore/lite/src/ops/populate/cumsum_populate.cc new file mode 100644 index 00000000000..b5a925829fa --- /dev/null +++ b/mindspore/lite/src/ops/populate/cumsum_populate.cc @@ -0,0 +1,41 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/ops/populate/populate_register.h" +#include "nnacl/cumsum_parameter.h" +using mindspore::schema::PrimitiveType_CumSum; + +namespace mindspore { +namespace lite { +namespace { +OpParameter *PopulateCumSumParameter(const void *prim) { + auto primitive = static_cast(prim); + auto cumsum_prim = primitive->value_as_CumSum(); + CumSumParameter *cumsum_param = reinterpret_cast(malloc(sizeof(CumSumParameter))); + if (cumsum_param == nullptr) { + MS_LOG(ERROR) << "malloc CumsumParameter failed."; + return nullptr; + } + memset(cumsum_param, 0, sizeof(CumSumParameter)); + cumsum_param->op_parameter_.type_ = primitive->value_type(); + cumsum_param->exclusive_ = cumsum_prim->exclusive(); + cumsum_param->reverse_ = cumsum_prim->reverse(); + return reinterpret_cast(cumsum_param); +} +} // namespace + +REG_POPULATE(PrimitiveType_CumSum, PopulateCumSumParameter, SCHEMA_CUR) +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc index 130ebb60f6b..286be906c8d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc @@ -29,6 +29,7 @@ using mindspore::schema::ActivationType_HSWISH; using mindspore::schema::ActivationType_LEAKY_RELU; using mindspore::schema::ActivationType_RELU; using mindspore::schema::ActivationType_RELU6; +using mindspore::schema::ActivationType_SOFTPLUS; using mindspore::schema::ActivationType_SWISH; using mindspore::schema::PrimitiveType_Activation; @@ -38,7 +39,8 @@ int ActivationCPUKernel::Init() { type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && type_ != schema::ActivationType_SWISH && type_ != schema::ActivationType_HSIGMOID && - type_ != schema::ActivationType_HARD_TANH && type_ != schema::ActivationType_GELU) { + type_ != schema::ActivationType_HARD_TANH && type_ != schema::ActivationType_GELU && + type_ != schema::ActivationType_SOFTPLUS) { MS_LOG(ERROR) << "Activation fp32 not support type: " << type_; return RET_ERROR; } @@ -80,6 +82,8 @@ int ActivationCPUKernel::DoActivation(int task_id) { ret = HardTanh(input_addr + stride * task_id, count, output_addr + stride * task_id, min_val_, max_val_); } else if (type_ == schema::ActivationType_GELU) { ret = Gelu(input_addr + stride * task_id, count, output_addr + stride * task_id, true); + } else if (type_ == schema::ActivationType_SOFTPLUS) { + ret = Softplus(input_addr + stride * task_id, count, output_addr + stride * task_id); } else { MS_LOG(ERROR) << "Activation type error"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.cc new file mode 100644 index 00000000000..6762d7b8b55 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.cc @@ -0,0 +1,152 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/fp32/cumsum_fp32.h" +#include "nnacl/fp32/cumsum_fp32.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "src/runtime/runtime_api.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_CumSum; + +namespace mindspore::kernel { +namespace { +int CumsumLaunch(void *cdata, int task_id) { + if (cdata == nullptr) { + MS_LOG(ERROR) << "cdata is nullptr!"; + return RET_NULL_PTR; + } + auto kernel = reinterpret_cast(cdata); + auto input_tensor = kernel->in_tensors().at(0); + int ret; + if (input_tensor->data_type() == kNumberTypeFloat32) { + ret = kernel->DoCumsum(task_id); + } else if (input_tensor->data_type() == kNumberTypeInt32) { + ret = kernel->DoCumsumInt(task_id); + } else { + MS_LOG(ERROR) << "Cumsum support data type int32 or float32"; + return RET_ERROR; + } + return ret; +} +} // namespace + +int CumSumCPUKernel::Init() { + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int CumSumCPUKernel::ReSize() { + MS_ASSERT(in_tensors_.size() == 2); + auto input_tensor = in_tensors_.at(0); + auto axis_tensor = in_tensors_.at(1); + int *axis_data = reinterpret_cast(axis_tensor->data_c()); + if (axis_data == nullptr) { + MS_LOG(ERROR) << "Cumsum axis nullptr"; + return RET_ERROR; + } + param_->axis_ = *axis_data; + if (param_->axis_ < 0) { + param_->axis_ += in_tensors_.at(0)->shape().size(); + } + if (static_cast(in_tensors_.at(0)->shape().size()) <= param_->axis_) { + MS_LOG(ERROR) << "axis " << param_->axis_ << " larger than in tensor rank " << in_tensors_.at(0)->shape().size(); + return RET_ERROR; + } + out_dim_ = 1; + for (int i = 0; i < param_->axis_; ++i) { + out_dim_ *= input_tensor->shape().at(i); + } + axis_dim_ = input_tensor->shape().at(param_->axis_); + in_dim_ = 1; + for (int i = param_->axis_ + 1; i < static_cast(input_tensor->shape().size()); ++i) { + in_dim_ *= input_tensor->shape().at(i); + } + unit_ = UP_DIV(out_dim_, op_parameter_->thread_num_); + return RET_OK; +} + +int CumSumCPUKernel::DoCumsum(int task_id) { + auto input_tensor = in_tensors_.at(0); + MS_ASSERT(input_tensor != nullptr); + float *input_data = reinterpret_cast(input_tensor->data_c()); + if (input_data == nullptr) { + MS_LOG(ERROR) << "input data nullptr"; + return RET_ERROR; + } + auto output_tensor = out_tensors_.at(0); + MS_ASSERT(output_tensor != nullptr); + float *output_data = reinterpret_cast(output_tensor->data_c()); + if (output_data == nullptr) { + MS_LOG(ERROR) << "output data nullptr"; + return RET_ERROR; + } + float *input = input_data + task_id * unit_ * axis_dim_ * in_dim_; + int out_dim = MSMIN(out_dim_ - unit_ * task_id, unit_); + float *output = output_data + task_id * unit_ * axis_dim_ * in_dim_; + if (!param_->reverse_) { + Cumsum(input, output, out_dim, axis_dim_, in_dim_, param_->exclusive_); + } else { + CumsumReverse(input, output, out_dim, axis_dim_, in_dim_, param_->exclusive_); + } + return RET_OK; +} + +int CumSumCPUKernel::DoCumsumInt(int task_id) { + auto input_tensor = in_tensors_.at(0); + MS_ASSERT(input_tensor != nullptr); + int *input_data = reinterpret_cast(input_tensor->data_c()); + if (input_data == nullptr) { + MS_LOG(ERROR) << "input data nullptr"; + return RET_ERROR; + } + auto output_tensor = out_tensors_.at(0); + MS_ASSERT(output_tensor != nullptr); + int *output_data = reinterpret_cast(output_tensor->data_c()); + if (output_data == nullptr) { + MS_LOG(ERROR) << "output data nullptr"; + return RET_ERROR; + } + int *input = input_data + task_id * unit_ * axis_dim_ * in_dim_; + int out_dim = MSMIN(out_dim_ - unit_ * task_id, unit_); + int *output = output_data + task_id * unit_ * axis_dim_ * in_dim_; + if (!param_->reverse_) { + CumsumInt(input, output, out_dim, axis_dim_, in_dim_, param_->exclusive_); + } else { + CumsumReverseInt(input, output, out_dim, axis_dim_, in_dim_, param_->exclusive_); + } + return RET_OK; +} + +int CumSumCPUKernel::Run() { + int ret = ParallelLaunch(static_cast(this->context_)->thread_pool_, CumsumLaunch, this, + op_parameter_->thread_num_); + + if (ret != RET_OK) { + MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; + return RET_ERROR; + } + return RET_OK; +} + +REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_CumSum, LiteKernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_CumSum, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.h new file mode 100644 index 00000000000..0e914e841d5 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cumsum_fp32.h @@ -0,0 +1,48 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUMSUM_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUMSUM_H_ + +#include +#include "include/errorcode.h" +#include "nnacl/cumsum_parameter.h" +#include "src/lite_kernel.h" + +namespace mindspore::kernel { +class CumSumCPUKernel : public LiteKernel { + public: + CumSumCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx) { + param_ = reinterpret_cast(op_parameter_); + } + ~CumSumCPUKernel() = default; + + int Init() override; + int ReSize() override; + int Run() override; + int DoCumsum(int task_id); + int DoCumsumInt(int task_id); + + private: + int out_dim_ = 1; + int axis_dim_ = 1; + int in_dim_ = 1; + int unit_ = 1; + CumSumParameter *param_ = nullptr; +}; +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUMSUM_H_ diff --git a/mindspore/lite/test/ut/nnacl/infer/cumsum_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/cumsum_infer_test.cc new file mode 100644 index 00000000000..ae61313f7b2 --- /dev/null +++ b/mindspore/lite/test/ut/nnacl/infer/cumsum_infer_test.cc @@ -0,0 +1,63 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common/common_test.h" +#include "nnacl/infer/cumsum_infer.h" +#include "nnacl/cumsum_parameter.h" + +namespace mindspore { + +class CumSumInferTest : public mindspore::CommonTest { + public: + CumSumInferTest() {} +}; + +TEST_F(CumSumInferTest, Test0) { + size_t inputs_size = 2; + std::vector inputs(inputs_size, NULL); + inputs[0] = new TensorC; + inputs[0]->shape_size_ = 3; + inputs[0]->shape_[0] = 4; + inputs[0]->shape_[1] = 3; + inputs[0]->shape_[2] = 2; + inputs[0]->data_type_ = kNumberTypeInt32; + inputs[0]->format_ = Format_NHWC; + inputs[1] = new TensorC; + inputs[1]->shape_size_ = 1; + inputs[1]->shape_[0] = 1; + + std::vector outputs(1, NULL); + outputs[0] = new TensorC; + CumSumParameter *parameter = new CumSumParameter; + parameter->op_parameter_.infer_flag_ = true; + int ret = CumsumInferShape((const TensorC **)inputs.data(), inputs.size(), outputs.data(), outputs.size(), + reinterpret_cast(parameter)); + ASSERT_EQ(ret, NNACL_OK); + ASSERT_EQ(outputs[0]->shape_size_, 3); + ASSERT_EQ(outputs[0]->shape_[0], 4); + ASSERT_EQ(outputs[0]->shape_[1], 3); + ASSERT_EQ(outputs[0]->shape_[2], 2); + ASSERT_EQ(outputs[0]->data_type_, kNumberTypeInt32); + ASSERT_EQ(outputs[0]->format_, Format_NHWC); + delete parameter; + for (size_t i = 0; i < inputs_size; i++) { + delete inputs[i]; + } + for (size_t i = 0; i < outputs.size(); i++) { + delete outputs[i]; + } +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc index ab3ae8e667a..80450ec255e 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/activation_fp32_test.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -226,4 +226,48 @@ TEST_F(TestActivationFp32, HardTanh2) { output0_tensor.set_data(nullptr); } +TEST_F(TestActivationFp32, Softplus) { + std::vector inputs_tensor; + std::vector outputs_tensor; + + ActivationParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Activation; + op_param.type_ = schema::ActivationType_SOFTPLUS; + + std::vector input = {1, 2, 3, 4, 5, -1, 6, 7, -10, -20, 20, 30, 14, 0}; + std::vector in_shape = {14}; + + lite::Tensor input0_tensor; + inputs_tensor.push_back(&input0_tensor); + input0_tensor.set_data(input.data()); + input0_tensor.set_shape(in_shape); + + std::vector output(14); + std::vector output_shape = {14}; + + lite::Tensor output0_tensor; + outputs_tensor.push_back(&output0_tensor); + output0_tensor.set_data(output.data()); + + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Activation}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + lite::InnerContext ctx; + ctx.thread_num_ = 2; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), &ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor.shape(); + auto ret = kernel->Run(); + ASSERT_EQ(0, ret); + std::vector expect_output = {1.3132616, 2.1269281, 3.0485871, 4.0181499, 5.0067153, + 0.31326169, 6.0024757, 7.0009117, 0.0000453989, 0.0000000002, + 20.00000000, 30.00000000, 14.0000000, 0.69314718}; + ASSERT_EQ(0, CompareOutputData(output.data(), expect_output.data(), 14, 0.00001)); + + input0_tensor.set_data(nullptr); + output0_tensor.set_data(nullptr); +} + } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/cumsum_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/cumsum_tests.cc new file mode 100644 index 00000000000..08502bc92ae --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/cumsum_tests.cc @@ -0,0 +1,384 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "common/common_test.h" +#include "nnacl/cumsum_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" + +namespace mindspore { +class TestCumsum : public mindspore::CommonTest { + public: + TestCumsum() {} +}; + +TEST_F(TestCumsum, TestThread1) { + lite::Tensor in_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float input_data0[12] = {1, 1, 2, 2, 3, 3, 10, 10, 20, 20, 30, 30}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float output_data0[12] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = true; + parameter->exclusive_ = false; + parameter->reverse_ = false; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ctx->thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_NEAR(1.0f, output_data0[0], 0.000001); + EXPECT_NEAR(1.0f, output_data0[1], 0.000001); + EXPECT_NEAR(3.0f, output_data0[2], 0.000001); + EXPECT_NEAR(3.0f, output_data0[3], 0.000001); + EXPECT_NEAR(6.0f, output_data0[4], 0.000001); + EXPECT_NEAR(6.0f, output_data0[5], 0.000001); + EXPECT_NEAR(10.0f, output_data0[6], 0.000001); + EXPECT_NEAR(10.0f, output_data0[7], 0.000001); + EXPECT_NEAR(30.0f, output_data0[8], 0.000001); + EXPECT_NEAR(30.0f, output_data0[9], 0.000001); + EXPECT_NEAR(60.0f, output_data0[10], 0.000001); + EXPECT_NEAR(60.0f, output_data0[11], 0.000001); + + for (int i = 0; i < 12; ++i) { + std::cout << output_data0[i] << " "; + } + std::cout << std::endl; + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); +} + +TEST_F(TestCumsum, TestExclusive) { + lite::Tensor in_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float input_data0[12] = {1, 1, 2, 2, 3, 3, 10, 10, 20, 20, 30, 30}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float output_data0[12] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = true; + parameter->exclusive_ = true; + parameter->reverse_ = false; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_NEAR(0.0f, output_data0[0], 0.000001); + EXPECT_NEAR(0.0f, output_data0[1], 0.000001); + EXPECT_NEAR(1.0f, output_data0[2], 0.000001); + EXPECT_NEAR(1.0f, output_data0[3], 0.000001); + EXPECT_NEAR(3.0f, output_data0[4], 0.000001); + EXPECT_NEAR(3.0f, output_data0[5], 0.000001); + EXPECT_NEAR(0.0f, output_data0[6], 0.000001); + EXPECT_NEAR(0.0f, output_data0[7], 0.000001); + EXPECT_NEAR(10.0f, output_data0[8], 0.000001); + EXPECT_NEAR(10.0f, output_data0[9], 0.000001); + EXPECT_NEAR(30.0f, output_data0[10], 0.000001); + EXPECT_NEAR(30.0f, output_data0[11], 0.000001); + + for (int i = 0; i < 12; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); + delete kernel; +} + +TEST_F(TestCumsum, TestReverse) { + lite::Tensor in_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float input_data0[12] = {1, 1, 2, 2, 3, 3, 10, 10, 20, 20, 30, 30}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float output_data0[12] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = 1; + parameter->exclusive_ = false; + parameter->reverse_ = true; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_NEAR(6.0f, output_data0[0], 0.000001); + EXPECT_NEAR(6.0f, output_data0[1], 0.000001); + EXPECT_NEAR(5.0f, output_data0[2], 0.000001); + EXPECT_NEAR(5.0f, output_data0[3], 0.000001); + EXPECT_NEAR(3.0f, output_data0[4], 0.000001); + EXPECT_NEAR(3.0f, output_data0[5], 0.000001); + EXPECT_NEAR(60.0f, output_data0[6], 0.000001); + EXPECT_NEAR(60.0f, output_data0[7], 0.000001); + EXPECT_NEAR(50.0f, output_data0[8], 0.000001); + EXPECT_NEAR(50.0f, output_data0[9], 0.000001); + EXPECT_NEAR(30.0f, output_data0[10], 0.000001); + EXPECT_NEAR(30.0f, output_data0[11], 0.000001); + + for (int i = 0; i < 12; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); + delete kernel; +} + +TEST_F(TestCumsum, TestReverseExclusive) { + lite::Tensor in_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float input_data0[12] = {1, 1, 2, 2, 3, 3, 10, 10, 20, 20, 30, 30}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeFloat32, {2, 3, 2}); + float output_data0[12] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = true; + parameter->exclusive_ = true; + parameter->reverse_ = true; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_NEAR(5.0f, output_data0[0], 0.000001); + EXPECT_NEAR(5.0f, output_data0[1], 0.000001); + EXPECT_NEAR(3.0f, output_data0[2], 0.000001); + EXPECT_NEAR(3.0f, output_data0[3], 0.000001); + EXPECT_NEAR(0.0f, output_data0[4], 0.000001); + EXPECT_NEAR(0.0f, output_data0[5], 0.000001); + EXPECT_NEAR(50.0f, output_data0[6], 0.000001); + EXPECT_NEAR(50.0f, output_data0[7], 0.000001); + EXPECT_NEAR(30.0f, output_data0[8], 0.000001); + EXPECT_NEAR(30.0f, output_data0[9], 0.000001); + EXPECT_NEAR(0.0f, output_data0[10], 0.000001); + EXPECT_NEAR(0.0f, output_data0[11], 0.000001); + + for (int i = 0; i < 12; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); + delete kernel; +} + +TEST_F(TestCumsum, TestIntRank2) { + lite::Tensor in_tensor0(kNumberTypeInt32, {1, 6}); + int input_data0[6] = {1, 2, 3, 4, 5, 6}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeInt32, {1, 6}); + int output_data0[6] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = true; + parameter->exclusive_ = false; + parameter->reverse_ = false; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ctx->thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, output_data0[0]); + EXPECT_EQ(3, output_data0[1]); + EXPECT_EQ(6, output_data0[2]); + EXPECT_EQ(10, output_data0[3]); + EXPECT_EQ(15, output_data0[4]); + EXPECT_EQ(21, output_data0[5]); + + for (int i = 0; i < 6; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); + delete kernel; +} + +TEST_F(TestCumsum, TestIntRank2Thread2) { + lite::Tensor in_tensor0(kNumberTypeInt32, {1, 6}); + int input_data0[6] = {1, 2, 3, 4, 5, 6}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeInt32, {1, 6}); + int output_data0[6] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = true; + parameter->exclusive_ = false; + parameter->reverse_ = false; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, output_data0[0]); + EXPECT_EQ(3, output_data0[1]); + EXPECT_EQ(6, output_data0[2]); + EXPECT_EQ(10, output_data0[3]); + EXPECT_EQ(15, output_data0[4]); + EXPECT_EQ(21, output_data0[5]); + + for (int i = 0; i < 6; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); + delete kernel; +} + +TEST_F(TestCumsum, TestIntRank2Thread4) { + lite::Tensor in_tensor0(kNumberTypeInt32, {1, 6}); + int input_data0[6] = {1, 2, 3, 4, 5, 6}; + in_tensor0.set_data(input_data0); + lite::Tensor in_tensor1(kNumberTypeInt32, {1}); + int input_data1[1] = {1}; // axis 1 + in_tensor1.set_data(input_data1); + std::vector inputs = {&in_tensor0, &in_tensor1}; + + lite::Tensor out_tensor0(kNumberTypeInt32, {1, 6}); + int output_data0[6] = {0}; + out_tensor0.set_data(output_data0); + std::vector outputs = {&out_tensor0}; + + CumSumParameter *parameter = reinterpret_cast(malloc(sizeof(CumSumParameter))); + parameter->op_parameter_.type_ = schema::PrimitiveType_CumSum; + parameter->op_parameter_.infer_flag_ = true; + parameter->exclusive_ = false; + parameter->reverse_ = false; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_CumSum}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ctx->thread_num_ = 4; + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(parameter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_EQ(1, output_data0[0]); + EXPECT_EQ(3, output_data0[1]); + EXPECT_EQ(6, output_data0[2]); + EXPECT_EQ(10, output_data0[3]); + EXPECT_EQ(15, output_data0[4]); + EXPECT_EQ(21, output_data0[5]); + + for (int i = 0; i < 6; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); + in_tensor0.set_data(nullptr); + in_tensor1.set_data(nullptr); + delete kernel; +} + +} // namespace mindspore