From 77c83d18cf004748b2c9349406ab2940b43c9bba Mon Sep 17 00:00:00 2001 From: shaoxiangdong Date: Fri, 30 Apr 2021 17:04:05 +0800 Subject: [PATCH] modify split cpu kernel --- .../kernel_compiler/cpu/split_cpu_kernel.cc | 71 +++++++++++++------ .../kernel_compiler/cpu/split_cpu_kernel.h | 10 ++- 2 files changed, 57 insertions(+), 24 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc index a86ac4a6469..26fa98ac5c0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,8 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include "backend/kernel_compiler/cpu/split_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" +#include "common/thread_pool.h" namespace mindspore { namespace kernel { @@ -29,20 +31,19 @@ void SplitCPUKernel::InitKernel(const CNodePtr &kernel_node) { template void SplitCPUKernel::Reshape() { - input_size_ = 1; - dims_current_after_axis_ = 1; - dims_after_axis_ = 1; - axis_step_ = input_shape_[axis_] / output_num_; + param_ = new SplitParameter(); + param_->num_split_ = output_num_; + param_->split_dim_ = axis_ >= 0 ? axis_ : input_shape_.size() + axis_; - for (int i = 0; i < SizeToInt(input_shape_.size()); i++) { - input_size_ *= input_shape_[i]; - if (i > axis_) { - dims_current_after_axis_ *= input_shape_[i]; - dims_after_axis_ *= input_shape_[i]; - } - if (i == axis_) { - dims_current_after_axis_ *= input_shape_[i]; - } + param_->strides_[input_shape_.size() - 1] = 1; + for (int i = input_shape_.size() - 2; i >= 0; i--) { + param_->strides_[i] = param_->strides_[i + 1] * input_shape_[i + 1]; + } + + param_->split_sizes_ = new int[sizeof(int) * param_->num_split_]; + int split_size = input_shape_[param_->split_dim_] / output_num_; + for (int i = 0; i < param_->num_split_; i++) { + param_->split_sizes_[i] = split_size; } } @@ -61,17 +62,42 @@ bool SplitCPUKernel::Launch(const std::vector &inputs, } template -void SplitCPUKernel::LaunchSplit(const T *input, T **output, size_t size) { +void SplitCPUKernel::LaunchSplit(T *input, T **output, size_t size) { + (void)std::transform(input_shape_.begin(), input_shape_.end(), std::back_inserter(input_shape_int_), + [](const int &value) { return static_cast(value); }); + auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); + const float block_size = 128.0; + size_t thread_num = size < block_size * max_thread_num ? std::ceil(size / block_size) : max_thread_num; + + param_->split_count_ = size / (input_shape_[param_->split_dim_] * param_->strides_[param_->split_dim_]); + int num_unit = param_->split_count_ * param_->num_split_; + int thread_n_stride; + if (thread_num != 0) { + thread_n_stride = UP_DIV(num_unit, thread_num); + } + auto task = [&](size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - int num = i % dims_current_after_axis_ / dims_after_axis_; - int block = num / axis_step_; - int block_pos = i / dims_current_after_axis_ * axis_step_ * dims_after_axis_ + - num % axis_step_ * dims_after_axis_ + i % dims_after_axis_; - output[block][block_pos] = input[i]; - } + int task_id = start / (size / thread_num); + int thread_offset = task_id * thread_n_stride; + int num_unit_thread = MSMIN(thread_n_stride, num_unit - task_id * thread_n_stride); + DoSplit(input, reinterpret_cast(output), &input_shape_int_[0], thread_offset, num_unit_thread, param_, + sizeof(T)); }; CPUKernelUtils::ParallelFor(task, size); + + return; +} + +template +void SplitCPUKernel::FreeTmpBuff() { + if (param_->split_sizes_ != nullptr) { + delete[] param_->split_sizes_; + param_->split_sizes_ = nullptr; + } + if (param_ != nullptr) { + delete param_; + param_ = nullptr; + } return; } @@ -86,6 +112,7 @@ void SplitCPUKernel::LaunchKernel(const std::vector &inputs, } size_t size = static_cast(inputs[0]->size / sizeof(T)); LaunchSplit(input, output, size); + FreeTmpBuff(); return; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.h index f4c74c3d6c8..b74def2b966 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include "backend/kernel_compiler/cpu/cpu_kernel.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" +#include "backend/kernel_compiler/cpu/nnacl/base/split_base.h" namespace mindspore { namespace kernel { @@ -42,7 +43,8 @@ class SplitCPUKernel : public CPUKernel { private: void CheckParam(const CNodePtr &kernel_node); void Reshape(); - void LaunchSplit(const T *input, T **output, size_t size); + void LaunchSplit(T *input, T **output, size_t size); + void FreeTmpBuff(); int64_t axis_; int64_t output_num_; int64_t axis_step_; @@ -53,7 +55,11 @@ class SplitCPUKernel : public CPUKernel { std::vector> output_shape_list_; std::vector input_shape_; + std::vector input_shape_int_; TypeId dtype_{kTypeUnknown}; + + protected: + SplitParameter *param_ = nullptr; }; MS_REG_CPU_KERNEL_T(