From 8d1e65ad6b734d161307b5344b9545696dfd1bc8 Mon Sep 17 00:00:00 2001 From: wang_shaocong Date: Tue, 20 Oct 2020 15:44:40 +0800 Subject: [PATCH] Add implementation of transpose to adapt different dimensions. --- mindspore/lite/nnacl/transpose.c | 29 +++++++++++++- mindspore/lite/nnacl/transpose.h | 4 +- .../src/runtime/kernel/arm/fp32/transpose.cc | 38 ++++++++++++++++++- .../src/runtime/kernel/arm/fp32/transpose.h | 2 + mindspore/lite/test/models_onnx.cfg | 2 + .../kernel/arm/fp32/transpose_fp32_tests.cc | 6 +-- .../legacy_optimizer/graph/infershape_pass.cc | 2 +- 7 files changed, 74 insertions(+), 9 deletions(-) diff --git a/mindspore/lite/nnacl/transpose.c b/mindspore/lite/nnacl/transpose.c index fa7de54fbf8..95186d398f4 100644 --- a/mindspore/lite/nnacl/transpose.c +++ b/mindspore/lite/nnacl/transpose.c @@ -127,8 +127,30 @@ void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strid } } +void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, + int h_start, int h_end, int dims, int *size, int *position) { + *(size + dims - 1) = 1; + for (int i = dims - 1; i > 0; --i) { + *(size + i - 1) = *(size + i) * output_shape[i]; + } + + for (size_t idx = 0; idx < (*size) * output_shape[0]; ++idx) { + int pos = idx; + int output_idx = 0; + int input_idx = 0; + for (int i = 0; i < dims; ++i) { + *(position + i) = pos / *(size + i); + int out_stride = i < dims - 1 ? out_strides[i] : 1; + output_idx += (*(position + i) * out_stride); + input_idx += (*(position + i) * strides[perm[i]]); + pos -= *(position + i) * (*(size + i)); + } + out_data[output_idx] = in_data[input_idx]; + } +} + int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, - TransposeParameter *transpose_param, int h_start, int h_end) { + TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position) { if (in_data == NULL || out_data == NULL) { return NNACL_ERR; } @@ -138,7 +160,7 @@ int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_s int data_size = transpose_param->data_size_; int num_axes = transpose_param->num_axes_; - if (num_axes < 2 || num_axes > 5) { + if (num_axes < 2) { return NNACL_ERR; } @@ -163,6 +185,9 @@ int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_s TransposeDim4(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end); } else if (num_axes == 5) { TransposeDim5(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end); + } else { + TransposeDims(in_data, out_data, strides, out_strides, perm, output_shape, h_start, h_end, num_axes, size, + position); } return NNACL_OK; } diff --git a/mindspore/lite/nnacl/transpose.h b/mindspore/lite/nnacl/transpose.h index 823780d33eb..edb2c7e9218 100644 --- a/mindspore/lite/nnacl/transpose.h +++ b/mindspore/lite/nnacl/transpose.h @@ -33,7 +33,7 @@ typedef struct TransposeParameter { extern "C" { #endif int DoTranspose(float *in_data, float *out_data, int *input_shape, int *output_shape, - TransposeParameter *transpose_param, int h_start, int h_end); + TransposeParameter *transpose_param, int h_start, int h_end, int *size, int *position); void TransposeDim2(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, int h_start, int h_end); void TransposeDim3(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, @@ -42,6 +42,8 @@ void TransposeDim4(float *in_data, float *out_data, int *strides, int *out_strid int h_start, int h_end); void TransposeDim5(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, int h_start, int h_end); +void TransposeDims(float *in_data, float *out_data, int *strides, int *out_strides, int *perm, int *output_shape, + int h_start, int h_end, int dims, int *size, int *position); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc index be9847f50c4..3eaa4fc31c2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.cc @@ -15,6 +15,7 @@ */ #include "src/runtime/kernel/arm/fp32/transpose.h" + #include #include "nnacl/transpose.h" #include "schema/model_generated.h" @@ -29,6 +30,10 @@ using mindspore::lite::RET_OP_EXECUTE_FAILURE; using mindspore::schema::PrimitiveType_Transpose; namespace mindspore::kernel { +namespace { +constexpr int maxDimSize = 5; +} // namespace + int TransposeCPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; @@ -90,8 +95,16 @@ int TransposeCPUKernel::TransposeParallel(int task_id) { } int thread_offset = task_id * thread_h_stride_; TransposeParameter *param = reinterpret_cast(this->op_parameter_); - auto ret = - DoTranspose(in_data_, out_data_, in_shape_, out_shape_, param, thread_offset, thread_offset + num_unit_thread); + + int *size = nullptr; + int *position = nullptr; + if (this->dim_size_ != nullptr && this->position_ != nullptr) { + size = this->dim_size_ + task_id * param->num_axes_; + position = this->position_ + task_id * param->num_axes_; + } + + auto ret = DoTranspose(in_data_, out_data_, in_shape_, out_shape_, param, thread_offset, + thread_offset + num_unit_thread, size, position); if (ret != RET_OK) { MS_LOG(ERROR) << "Transpose error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; @@ -120,8 +133,29 @@ int TransposeCPUKernel::Run() { } in_data_ = reinterpret_cast(in_tensor->MutableData()); out_data_ = reinterpret_cast(out_tensor->MutableData()); + int dims = out_tensor->shape().size(); + if (dims > maxDimSize) { + dim_size_ = reinterpret_cast(context_->allocator->Malloc(dims * thread_h_num_ * sizeof(int))); + if (dim_size_ == nullptr) { + MS_LOG(ERROR) << "Malloc data failed"; + return RET_ERROR; + } + position_ = reinterpret_cast(context_->allocator->Malloc(dims * thread_h_num_ * sizeof(int))); + if (position_ == nullptr) { + MS_LOG(ERROR) << "Malloc data failed"; + context_->allocator->Free(dim_size_); + dim_size_ = nullptr; + return RET_ERROR; + } + } auto ret = ParallelLaunch(this->context_->thread_pool_, TransposeRun, this, thread_h_num_); + if (dims > maxDimSize) { + context_->allocator->Free(dim_size_); + context_->allocator->Free(position_); + dim_size_ = nullptr; + position_ = nullptr; + } if (ret != RET_OK) { MS_LOG(ERROR) << "Tranpose error error_code[" << ret << "]"; return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h index 7025fab34a0..823ab97bf29 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose.h @@ -46,6 +46,8 @@ class TransposeCPUKernel : public LiteKernel { float *out_data_; int *in_shape_ = nullptr; int *out_shape_ = nullptr; + int *dim_size_ = nullptr; + int *position_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/models_onnx.cfg b/mindspore/lite/test/models_onnx.cfg index b3796d968e9..ee3075f001c 100644 --- a/mindspore/lite/test/models_onnx.cfg +++ b/mindspore/lite/test/models_onnx.cfg @@ -6,3 +6,5 @@ gts_version-RFB-320_simplified.onnx mnist-8.onnx crnn_lite_lstm_v2.onnx:32,32,32,1 psenet_lite_mbv2.onnx:1,32,32,3 +super-resolution-10.onnx:1,224,224,1 +tinyyolov2-8.onnx:1,416,416,3 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc index 0e0e28f3f23..1749fcc4c0a 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/transpose_fp32_tests.cc @@ -64,7 +64,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes4) { param->out_strides_[i] = out_strides[i]; } - auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3); + auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3, nullptr, nullptr); ASSERT_EQ(ret, 0); delete param; CompareOutputData(out, correct, 24, 0.000001); @@ -104,7 +104,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes3) { param->out_strides_[i] = out_strides[i]; } - auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3); + auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 3, nullptr, nullptr); ASSERT_EQ(ret, 0); delete param; CompareOutputData(out, correct, 24, 0.000001); @@ -145,7 +145,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes2) { param->out_strides_[i] = out_strides[i]; } - auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 6); + auto ret = DoTranspose(in, out, input_shape, output_shape, param, 0, 6, nullptr, nullptr); ASSERT_EQ(ret, 0); delete param; CompareOutputData(out, correct, 24, 0.000001); diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc index 0ea1da47ce1..61f4f08e64c 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc @@ -92,7 +92,7 @@ STATUS InferShapePass::Run(MetaGraphT *graph) { auto input_tensor = graph->allTensors[idx].get(); for (auto &dim : input_tensor->dims) { if (dim == 0) { - MS_LOG(WARNING) << "One dimension of the input shape is 0, which would be set to 32 as a default value."; + MS_LOG(WARNING) << "One dimension of the input shape is 0, which would be set to -1 as a default value."; dim = DEFAULT_DIM_VALUE; } }