diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h index 5f5a5633863..b9ff29cb8ff 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/batchtospace_gpu_kernel.h @@ -19,15 +19,13 @@ #include #include +#include #include "plugin/device/gpu/kernel/gpu_kernel.h" #include "plugin/device/gpu/kernel/gpu_kernel_factory.h" -#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/batchtospace_impl.cuh" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/batchtospace_helper.h" namespace mindspore { namespace kernel { -constexpr size_t SHAPE_SIZE = 4; -constexpr size_t CROPS_SHAPE_0 = 2; -constexpr size_t CROPS_SHAPE_1 = 2; template class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod { public: @@ -36,139 +34,55 @@ class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { - T *input = GetDeviceAddress(inputs, 0); - T *output = GetDeviceAddress(outputs, 0); - - size_t size = output_size_ / sizeof(T); - - CalBatchToSpace(size, input, in_, ih_, iw_, ic_, on_, oh_, ow_, oc_, crops_[0][0], crops_[0][1], crops_[1][0], - crops_[1][1], block_size_, output, reinterpret_cast(stream_ptr)); + std::vector input_addrs = ConvertPtrs(inputs); + std::vector work_addrs = ConvertPtrs(workspace); + std::vector output_addrs = ConvertPtrs(outputs); + int flag = helper_ptr_->Process(input_addrs, output_addrs, work_addrs, stream_ptr); + if (flag != 0) { + return false; + } return true; } bool Init(const CNodePtr &kernel_node) override { - kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - (void)CheckParam(kernel_node); - input_size_ = sizeof(T); - for (size_t idx = 0; idx < input_shape_.size(); ++idx) { - input_size_ *= input_shape_[idx]; + kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node); + + helper_ptr_ = std::make_unique>(kernel_name_); + helper_ptr_->ResetResource(); + + std::vector> input_shapes; + std::vector> output_shapes; + auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); + input_shapes.emplace_back(input_shape); + output_shapes.emplace_back(output_shape); + attr_.block_size = GetAttr(kernel_node, "block_size"); + attr_.crops = GetAttr>>(kernel_node, "crops"); + attr_.input_shape = input_shape; + int flag = helper_ptr_->CheckKernelParam(&attr_); + if (flag != 0) { + return false; } - in_ = input_shape_[0]; - ic_ = input_shape_[1]; - ih_ = input_shape_[2]; - iw_ = input_shape_[3]; - - on_ = in_ / (block_size_ * block_size_); - oc_ = ic_; - oh_ = ih_ * block_size_ - crops_[0][0] - crops_[0][1]; - ow_ = iw_ * block_size_ - crops_[1][0] - crops_[1][1]; - output_size_ = on_ * oc_ * oh_ * ow_ * sizeof(T); + flag = helper_ptr_->CalMemSize(input_shapes, output_shapes); + if (flag != 0) { + return false; + } InitSizeLists(); return true; } - void ResetResource() noexcept override { - in_ = 0; - ic_ = 0; - ih_ = 0; - iw_ = 0; - on_ = 0; - oc_ = 0; - oh_ = 0; - ow_ = 0; - kernel_name_ = "BatchToSpace"; - input_size_list_.clear(); - output_size_list_.clear(); - crops_.clear(); - input_shape_.clear(); - } protected: void InitSizeLists() override { - input_size_list_.push_back(input_size_); - output_size_list_.push_back(output_size_); - } - - void CheckParam(const CNodePtr &kernel_node) { - block_size_ = GetAttr(kernel_node, "block_size"); - if (block_size_ < 1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got " - << block_size_; - } - size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); - if (input_num != 1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num; - } - size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); - if (output_num != 1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num; - } - - // check input_shape - auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); - if (input_shape.size() != SHAPE_SIZE) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got " - << input_shape.size(); - } - if ((input_shape[0] % (block_size_ * block_size_)) != 0) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', input_shape[0] should be divisible by product of block_shape, but got input_shape[0]: " - << input_shape[0] << ", block_shape: " << block_size_; - } - for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) { - if (input_shape[idx] < 1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', the element of shape of input cannot be less than 1, but got " - << CONVERT_VECTOR_TO_STRING(input_shape); - } - } - input_shape_.assign(input_shape.begin(), input_shape.end()); - - // check crops - crops_ = (GetAttr>>(kernel_node, "crops")); - - if (crops_.size() != CROPS_SHAPE_0) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'crops' should be " << CROPS_SHAPE_0 - << ", but got " << crops_.size(); - } - if (crops_[0].size() != CROPS_SHAPE_1 || crops_[1].size() != CROPS_SHAPE_1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of element of 'crops' should be " << CROPS_SHAPE_1 - << ", but got the size of crops[0]: " << crops_[0].size() - << ", the size of crops[1]: " << crops_[1].size(); - } else { - for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) { - for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) { - if (crops_[idx_i][idx_j] < 0) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', the element of 'crops' should be greater than or equal to 0, but got crops[" - << idx_i << "][" << idx_j << "]: " << crops_[idx_i][idx_j]; - } - } - auto tmp_shape = input_shape[idx_i + CROPS_SHAPE_1] * block_size_ - crops_[idx_i][0] - crops_[idx_i][1]; - if (tmp_shape <= 0) { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ - << "', the element of shape of output should be greater than 0, but got " << tmp_shape; - } - } - } + input_size_list_ = helper_ptr_->GetInputSizeList(); + output_size_list_ = helper_ptr_->GetOutputSizeList(); } private: - std::vector> crops_; - std::vector input_shape_; - size_t block_size_; - size_t input_size_; - size_t output_size_; - size_t in_; - size_t ic_; - size_t ih_; - size_t iw_; - size_t on_; - size_t oc_; - size_t oh_; - size_t ow_; std::string kernel_name_; + std::unique_ptr> helper_ptr_ = nullptr; + cukernel::BatchToSpaceAttr attr_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h index ef29d98e35b..323034636e1 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/unique_gpu_kernel.h @@ -18,9 +18,10 @@ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_UNIQUE_GPU_KERNEL_H_ #include +#include #include "plugin/device/gpu/kernel/gpu_kernel.h" #include "plugin/device/gpu/kernel/gpu_kernel_factory.h" -#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unique_impl.cuh" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/unique_helper.h" namespace mindspore { namespace kernel { template @@ -34,32 +35,31 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod { if (is_null_input_) { return true; } - T *input = GetDeviceAddress(inputs, 0); - S *input_index = GetDeviceAddress(workspace, 0); - S *sorted_index = GetDeviceAddress(workspace, 1); - T *output = GetDeviceAddress(outputs, 0); - S *index = GetDeviceAddress(outputs, 1); stream_ptr_ = stream_ptr; - post_output_size_ = CalUnique(input, num_elements_, input_index, sorted_index, output, index, - reinterpret_cast(stream_ptr)); + std::vector input_ptrs = ConvertPtrs(inputs); + std::vector work_ptrs = ConvertPtrs(workspace); + std::vector output_ptrs = ConvertPtrs(outputs); + if (helper_ptr_->Process(input_ptrs, output_ptrs, work_ptrs, stream_ptr) != 0) { + return false; + } return true; } bool Init(const CNodePtr &kernel_node) override { - auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; + auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); + helper_ptr_ = std::make_unique>(kernel_name); + helper_ptr_->ResetResource(); + std::vector> input_shapes; + std::vector> output_shapes; std::vector shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input"); if (is_null_input_) { InitSizeLists(); return true; } - for (auto x : shape) { - num_elements_ *= x; - } - input_size_ = num_elements_ * sizeof(T); - output_size_ = input_size_; - workspace_size_ = num_elements_ * sizeof(S); + input_shapes.emplace_back(shape); + helper_ptr_->CalMemSize(input_shapes, output_shapes); InitSizeLists(); return true; } @@ -73,7 +73,7 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod { for (size_t i = 0; i < output_num; ++i) { std::vector shape = common::AnfAlgo::GetOutputInferShape(kernel_node_.lock(), i); if (i == 0) { - shape[0] = post_output_size_; + shape[0] = helper_ptr_->GetOutSize(); } TypeId type_id = common::AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), i); type_ids.emplace_back(type_id); @@ -83,11 +83,6 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod { } void ResetResource() noexcept override { - input_size_ = 0; - output_size_ = 0; - workspace_size_ = 0; - num_elements_ = 1; - post_output_size_ = 0; is_null_input_ = false; stream_ptr_ = nullptr; input_size_list_.clear(); @@ -97,21 +92,15 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod { protected: void InitSizeLists() override { - input_size_list_.push_back(input_size_); - output_size_list_.push_back(output_size_); - output_size_list_.push_back(num_elements_ * sizeof(S)); - workspace_size_list_.push_back(workspace_size_); - workspace_size_list_.push_back(workspace_size_); + input_size_list_ = helper_ptr_->GetInputSizeList(); + output_size_list_ = helper_ptr_->GetOutputSizeList(); + workspace_size_list_ = helper_ptr_->GetWorkSizeList(); } private: void *stream_ptr_; - size_t input_size_; - size_t output_size_; - size_t workspace_size_; - int num_elements_; - int post_output_size_; bool is_null_input_; + std::unique_ptr> helper_ptr_ = nullptr; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/batchtospace_helper.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/batchtospace_helper.h new file mode 100644 index 00000000000..de74a417a00 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/batchtospace_helper.h @@ -0,0 +1,161 @@ +/** + * Copyright 2019-2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_BATCHTOSPACE_HELPER_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_BATCHTOSPACE_HELPER_H_ +#include +#include +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/batchtospace_impl.cuh" + +namespace mindspore { +namespace cukernel { +constexpr size_t INPUT_NUM = 1; +constexpr size_t OUTPUT_NUM = 1; +constexpr size_t SHAPE_SIZE = 4; +constexpr size_t CROPS_SHAPE_0 = 2; +constexpr size_t CROPS_SHAPE_1 = 2; + +struct BatchToSpaceAttr : public GpuKernelAttrBase { + std::vector> crops; + std::vector input_shape; + size_t block_size; +}; + +template +class BatchToSpaceHelperGpuKernel : public GpuKernelHelperBase { + public: + explicit BatchToSpaceHelperGpuKernel(std::string &kernel_name) : GpuKernelHelperBase(kernel_name) {} + virtual ~BatchToSpaceHelperGpuKernel() = default; + int CalMemSize(const std::vector> &input_shapes, + const std::vector> &output_shapes) override { + int flag = CalShapesSizeInBytes(input_shapes, INPUT_NUM, kernel_name_, "input_shapes", &input_size_list_); + if (flag != 0) { + return flag; + } + flag = CalShapesSizeInBytes(output_shapes, OUTPUT_NUM, kernel_name_, "output_shapes", &output_size_list_); + if (flag != 0) { + return flag; + } + kernel_size_ = output_size_list_[0] / sizeof(T); + return 0; + } + + int Process(const std::vector &input_ptrs, const std::vector &output_ptrs, + const std::vector &work_ptrs, void *cuda_stream) override { + size_t in = attr_ptr_->input_shape[0]; + size_t ic = attr_ptr_->input_shape[1]; + size_t ih = attr_ptr_->input_shape[2]; + size_t iw = attr_ptr_->input_shape[3]; + + size_t on = in / (attr_ptr_->block_size * attr_ptr_->block_size); + size_t oc = ic; + size_t oh = ih * attr_ptr_->block_size - attr_ptr_->crops[0][0] - attr_ptr_->crops[0][1]; + size_t ow = iw * attr_ptr_->block_size - attr_ptr_->crops[1][0] - attr_ptr_->crops[1][1]; + + T *input_ptr = nullptr; + T *output_ptr = nullptr; + int flag = GetDeviceAddress(input_ptrs, 0, kernel_name_, &input_ptr); + if (flag != 0) { + return flag; + } + + flag = GetDeviceAddress(output_ptrs, 0, kernel_name_, &output_ptr); + if (flag != 0) { + return flag; + } + + CalBatchToSpace(kernel_size_, input_ptr, in, ih, iw, ic, on, oh, ow, oc, attr_ptr_->crops[0][0], + attr_ptr_->crops[0][1], attr_ptr_->crops[1][0], attr_ptr_->crops[1][1], attr_ptr_->block_size, + output_ptr, reinterpret_cast(cuda_stream)); + + return 0; + } + + void ResetResource() override { + kernel_size_ = 0; + input_size_list_.clear(); + output_size_list_.clear(); + work_size_list_.clear(); + } + int CheckKernelParam(GpuKernelAttrBase *kernel_attr) override { + attr_ptr_ = dynamic_cast(kernel_attr); + if (attr_ptr_->block_size < 1) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got " + << attr_ptr_->block_size; + return -1; + } + + // check input_shape + if (attr_ptr_->input_shape.size() != SHAPE_SIZE) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got " + << attr_ptr_->input_shape.size(); + return -1; + } + if ((attr_ptr_->input_shape[0] % (attr_ptr_->block_size * attr_ptr_->block_size)) != 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', input_shape[0] should be divisible by product of block_shape, but got input_shape[0]: " + << attr_ptr_->input_shape[0] << ", block_shape: " << attr_ptr_->block_size; + return -1; + } + for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) { + if (attr_ptr_->input_shape[idx] < 1) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the element of shape of input cannot be less than 1, but got " + << ConvertVectorToString(attr_ptr_->input_shape); + return -1; + } + } + + // check crops + if (attr_ptr_->crops.size() != CROPS_SHAPE_0) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the size of 'crops' should be " << CROPS_SHAPE_0 << ", but got " + << attr_ptr_->crops.size(); + return -1; + } + if (attr_ptr_->crops[0].size() != CROPS_SHAPE_1 || attr_ptr_->crops[1].size() != CROPS_SHAPE_1) { + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the size of element of 'crops' should be " << CROPS_SHAPE_1 + << ", but got the size of crops[0]: " << attr_ptr_->crops[0].size() + << ", the size of crops[1]: " << attr_ptr_->crops[1].size(); + return -1; + } else { + for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) { + for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) { + if (attr_ptr_->crops[idx_i][idx_j] < 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', the element of 'crops' should be greater than or equal to 0, but got crops[" << idx_i + << "][" << idx_j << "]: " << attr_ptr_->crops[idx_i][idx_j]; + return -1; + } + } + auto tmp_shape = attr_ptr_->input_shape[idx_i + CROPS_SHAPE_1] * attr_ptr_->block_size - + attr_ptr_->crops[idx_i][0] - attr_ptr_->crops[idx_i][1]; + if (tmp_shape <= 0) { + MS_LOG(ERROR) << "For '" << kernel_name_ + << "', the element of shape of output should be greater than 0, but got " << tmp_shape; + return -1; + } + } + } + return 0; + } + + private: + BatchToSpaceAttr *attr_ptr_; + size_t kernel_size_; +}; +} // namespace cukernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_BATCHTOSPACE_HELPER_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h new file mode 100644 index 00000000000..ebd0eeeed83 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h @@ -0,0 +1,89 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_COMMON_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_COMMON_H_ + +#include +#include +#include "mindspore/core/utils/log_adapter.h" +namespace mindspore { +namespace cukernel { +// 1. 错误码细化 + +inline std::string ConvertVectorToString(const std::vector &value) { + std::stringstream ss; + ss << "("; + for (auto it = value.begin(); it != value.end(); it++) { + if (it == value.begin()) { + ss << *it; + } else { + ss << ", " << *it; + } + } + ss << ")"; + return ss.str(); +} + +template +int CalShapesSizeInBytes(const std::vector> &shapes, const size_t shape_num, + const std::string kernel_name, const std::string param_name, + std::vector *shapes_size) { + if (shape_num != shapes.size()) { + MS_LOG(ERROR) << "For '" << kernel_name << "', the number of " << param_name << "should be equal to " << shape_num + << ", but got " << shapes.size(); + return -1; + } + size_t return_flag = 0; + for (size_t idx = 0; idx < shape_num; ++idx) { + size_t cur_size = sizeof(T); + if (shapes[idx].size() == 0) { + // 常数 + MS_LOG(WARNING) << "For '" << kernel_name << "', the shapes[" << idx << "] is ( )"; + shapes_size->emplace_back(cur_size); + continue; + } + for (const auto &val : shapes[idx]) { + cur_size *= val; + } + if (cur_size == 0) { + MS_LOG(WARNING) << "For '" << kernel_name << "', the shape cannot contain zero, but got shapes[" << idx << "] is " + << ConvertVectorToString(shapes[idx]); + return_flag = -1; + } + shapes_size->emplace_back(cur_size); + } + return return_flag; +} + +template +inline int GetDeviceAddress(const std::vector &addr_list, const size_t index, const std::string kernel_name, + T **out_ptr) { + if (index >= addr_list.size()) { + MS_LOG(ERROR) << "Address index(" << index << ") out of range(" << addr_list.size() << ")"; + return -1; + } + + if (addr_list[index] == nullptr) { + MS_LOG(ERROR) << "The device address is empty, address index: " << index << ", op name is: " << kernel_name; + return -1; + } + *out_ptr = reinterpret_cast(addr_list[index]); + return 0; +} +} // namespace cukernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_COMMON_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h new file mode 100644 index 00000000000..efbdccb8053 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h @@ -0,0 +1,63 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_HELPER_BASE_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_HELPER_BASE_H_ + +#include +#include +#include "mindspore/core/utils/log_adapter.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h" +namespace mindspore { +namespace cukernel { +struct GpuKernelAttrBase { + virtual ~GpuKernelAttrBase() = default; +}; + +class GpuKernelHelperBase { + public: + explicit GpuKernelHelperBase(std::string &kernel_name) : kernel_name_(kernel_name) {} + virtual ~GpuKernelHelperBase() { + input_size_list_.clear(); + output_size_list_.clear(); + work_size_list_.clear(); + } + + virtual int CalMemSize(const std::vector> &input_shapes, + const std::vector> &output_shapes) = 0; + + virtual int Process(const std::vector &input_ptrs, const std::vector &output_ptrs, + const std::vector &work_ptrs, void *cuda_stream) = 0; + + virtual void ResetResource() { + MS_LOG(ERROR) << "kernel must override the `ResetResource()` method when dynamic shape"; + } + + std::vector GetInputSizeList() { return input_size_list_; } + std::vector GetOutputSizeList() { return output_size_list_; } + std::vector GetWorkSizeList() { return work_size_list_; } + + virtual int CheckKernelParam(GpuKernelAttrBase *kernel_attr) { return 0; } + + protected: + std::vector input_size_list_; + std::vector output_size_list_; + std::vector work_size_list_; + std::string kernel_name_; +}; +} // namespace cukernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_HELPER_BASE_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/unary_helper.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/unary_helper.h new file mode 100644 index 00000000000..d52d99a9025 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/unary_helper.h @@ -0,0 +1,147 @@ +/** + * Copyright 2019-2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNARY_HELPER_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNARY_HELPER_H_ +#include +#include +#include +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unary_op_impl.cuh" + +namespace mindspore { +namespace cukernel { +enum UnaryOptype { + UNARY_OP_EXP = 0, + UNARY_OP_EXPM1, + UNARY_OP_LOG, + UNARY_OP_LOG1P, + UNARY_OP_ERF, + UNARY_OP_ERFC, + UNARY_OP_NEG, + UNARY_OP_RECIPROCAL, + UNARY_OP_SQUARE, + UNARY_OP_SQRT, + UNARY_OP_RSQRT, + UNARY_OP_SIN, + UNARY_OP_COS, + UNARY_OP_ASIN, + UNARY_OP_ACOS, + UNARY_OP_ATAN, + UNARY_OP_ASINH, + UNARY_OP_ACOSH, + UNARY_OP_ABS, + UNARY_OP_FLOOR, + UNARY_OP_RINT, + UNARY_OP_ROUND, + UNARY_OP_SIGN, + UNARY_OP_REAL, + UNARY_OP_IMAG, + UNARY_OP_CONJ, + UNARY_OP_INVALID_TYPE = 255 +}; + +static const std::map kUnaryOpTypeMap = { + {"Exp", UNARY_OP_EXP}, {"Expm1", UNARY_OP_EXPM1}, + {"Log", UNARY_OP_LOG}, {"Log1p", UNARY_OP_LOG1P}, + {"Erf", UNARY_OP_ERF}, {"Erfc", UNARY_OP_ERFC}, + {"Neg", UNARY_OP_NEG}, {"Reciprocal", UNARY_OP_RECIPROCAL}, + {"Square", UNARY_OP_SQUARE}, {"Sqrt", UNARY_OP_SQRT}, + {"Rsqrt", UNARY_OP_RSQRT}, {"Sin", UNARY_OP_SIN}, + {"Cos", UNARY_OP_COS}, {"Asin", UNARY_OP_ASIN}, + {"ACos", UNARY_OP_ACOS}, {"Atan", UNARY_OP_ATAN}, + {"Asinh", UNARY_OP_ASINH}, {"Acosh", UNARY_OP_ACOSH}, + {"Abs", UNARY_OP_ABS}, {"Floor", UNARY_OP_FLOOR}, + {"Rint", UNARY_OP_RINT}, {"Round", UNARY_OP_ROUND}, + {"Real", UNARY_OP_REAL}, {"Imag", UNARY_OP_IMAG}, + {"Sign", UNARY_OP_SIGN}, {"Conj", UNARY_OP_CONJ}}; + +template +class UnaryHelperGpuKernel : public GpuKernelHelperBase { + public: + explicit UnaryHelperGpuKernel(std::string &kernel_name) : GpuKernelHelperBase(kernel_name) {} + virtual ~UnaryHelperGpuKernel() = default; + int CalMemSize(const std::vector> &input_shapes, + const std::vector> &output_shapes) override { + auto iter = kUnaryOpTypeMap.find(kernel_name_); + if (iter == kUnaryOpTypeMap.end()) { + MS_LOG(ERROR) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc," + << " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, " + << "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name_; + return -1; + } + + unary_op_type_ = iter->second; + int flag = CalShapesSizeInBytes(input_shapes, 1, kernel_name_, "input_shapes", &input_size_list_); + output_size_list_ = input_size_list_; + if (flag != 0) { + return flag; + } + return 0; + } + + int Process(const std::vector &input_ptrs, const std::vector &output_ptrs, + const std::vector &work_ptrs, void *cuda_stream) override { + static std::map> func_map = { + {UNARY_OP_EXP, Exponential}, {UNARY_OP_EXPM1, Expm1}, + {UNARY_OP_LOG, Logarithm}, {UNARY_OP_LOG1P, Log1p}, + {UNARY_OP_ERF, Erf}, {UNARY_OP_ERFC, Erfc}, + {UNARY_OP_NEG, Negative}, {UNARY_OP_RECIPROCAL, Reciprocal}, + {UNARY_OP_SQUARE, Square}, {UNARY_OP_SQRT, Sqrt}, + {UNARY_OP_RSQRT, Rsqrt}, {UNARY_OP_SIN, Sin}, + {UNARY_OP_COS, Cos}, {UNARY_OP_ASIN, Asin}, + {UNARY_OP_ACOS, ACos}, {UNARY_OP_ATAN, Atan}, + {UNARY_OP_ASINH, Asinh}, {UNARY_OP_ACOSH, Acosh}, + {UNARY_OP_ABS, Abs}, {UNARY_OP_FLOOR, Floor}, + {UNARY_OP_RINT, Rint}, {UNARY_OP_ROUND, Round}, + {UNARY_OP_SIGN, Sign}}; + + auto iter = func_map.find(unary_op_type_); + if (iter != func_map.end()) { + T *input_addr; + T *output_addr; + int flag = GetDeviceAddress(input_ptrs, 0, kernel_name_, &input_addr); + if (flag != 0) { + return flag; + } + flag = GetDeviceAddress(output_ptrs, 0, kernel_name_, &output_addr); + if (flag != 0) { + return flag; + } + iter->second(input_addr, output_addr, input_size_list_[0] / sizeof(T), + reinterpret_cast(cuda_stream)); + } else { + MS_LOG(ERROR) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc," + << " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, " + << "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_; + return -1; + } + + return 0; + } + + void ResetResource() override { + unary_op_type_ = UNARY_OP_INVALID_TYPE; + input_size_list_.clear(); + output_size_list_.clear(); + } + + private: + UnaryOptype unary_op_type_; +}; +} // namespace cukernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNARY_HELPER_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/unique_helper.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/unique_helper.h new file mode 100644 index 00000000000..ad84d2e4699 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/unique_helper.h @@ -0,0 +1,105 @@ +/** + * Copyright 2019-2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNIQUE_HELPER_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNIQUE_HELPER_H_ +#include +#include +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unique_impl.cuh" + +namespace mindspore { +namespace cukernel { +constexpr size_t INPUT_NUM = 1; +constexpr size_t OUTPUT_NUM = 1; +constexpr size_t WORK_NUM = 0; +constexpr size_t SHAPE_SIZE = 4; +constexpr size_t CROPS_SHAPE_0 = 2; +constexpr size_t CROPS_SHAPE_1 = 2; + +template +class UniqueHelperGpuKernel : public GpuKernelHelperBase { + public: + explicit UniqueHelperGpuKernel(std::string &kernel_name) : GpuKernelHelperBase(kernel_name) {} + virtual ~UniqueHelperGpuKernel() = default; + int CalMemSize(const std::vector> &input_shapes, + const std::vector> &output_shapes) override { + int flag = CalShapesSizeInBytes(input_shapes, INPUT_NUM, kernel_name_, "input_shapes", &input_size_list_); + if (flag != 0) { + return flag; + } + num_elements_ = input_size_list_[0] / sizeof(T); + size_t workspace_size = num_elements_ * sizeof(S); + work_size_list_.emplace_back(workspace_size); + work_size_list_.emplace_back(workspace_size); + output_size_list_.emplace_back(input_size_list_[0]); + output_size_list_.emplace_back(num_elements_ * sizeof(S)); + return 0; + } + + int Process(const std::vector &input_ptrs, const std::vector &output_ptrs, + const std::vector &work_ptrs, void *cuda_stream) override { + T *t_input_ptr = nullptr; + S *s_input_index = nullptr; + S *s_sorted_index = nullptr; + T *t_output_ptr = nullptr; + S *s_output_index = nullptr; + int flag = GetDeviceAddress(input_ptrs, 0, kernel_name_, &t_input_ptr); + if (flag != 0) { + return flag; + } + + flag = GetDeviceAddress(work_ptrs, 0, kernel_name_, &s_input_index); + if (flag != 0) { + return flag; + } + + flag = GetDeviceAddress(work_ptrs, 1, kernel_name_, &s_sorted_index); + if (flag != 0) { + return flag; + } + flag = GetDeviceAddress(output_ptrs, 0, kernel_name_, &t_output_ptr); + if (flag != 0) { + return flag; + } + + flag = GetDeviceAddress(output_ptrs, 1, kernel_name_, &s_output_index); + if (flag != 0) { + return flag; + } + + post_output_size_ = CalUnique(t_input_ptr, num_elements_, s_input_index, s_sorted_index, t_output_ptr, + s_output_index, reinterpret_cast(cuda_stream)); + return 0; + } + + void ResetResource() override { + num_elements_ = 1; + post_output_size_ = 0; + input_size_list_.clear(); + output_size_list_.clear(); + work_size_list_.clear(); + } + + int GetOutSize() { return post_output_size_; } + + private: + int num_elements_; + int post_output_size_; +}; +} // namespace cukernel +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNIQUE_HELPER_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h index 1f37bd33bfd..11693da6a40 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h @@ -113,6 +113,14 @@ class NativeGpuKernelMod : public GpuKernelMod { return reinterpret_cast(addr_list[index]->addr); } + std::vector ConvertPtrs(const std::vector &input_ptrs) { + std::vector out_ptrs; + for (auto &cur_addr : input_ptrs) { + out_ptrs.emplace_back(cur_addr->addr); + } + return out_ptrs; + } + template inline T *GetPossiblyNullDeviceAddress(const std::vector &addr_list, size_t index) { if (index >= addr_list.size()) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h index 7027b825fd1..4a46d709a3e 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_complex_gpu_kernel.h @@ -39,21 +39,21 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod { S *output_addr = GetDeviceAddress(outputs, 0); switch (unary_op_type_) { - case UNARY_OP_REAL: { + case cukernel::UNARY_OP_REAL: { if constexpr (!std::is_same>::value && !std::is_same>::value) { Real(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast(stream_ptr)); } break; } - case UNARY_OP_IMAG: { + case cukernel::UNARY_OP_IMAG: { if constexpr (!std::is_same>::value && !std::is_same>::value) { Imag(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast(stream_ptr)); } break; } - case UNARY_OP_CONJ: { + case cukernel::UNARY_OP_CONJ: { if constexpr (std::is_same::value && !std::is_same::value) { Conj(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast(stream_ptr)); } @@ -112,8 +112,8 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod { private: void GetOpType(const CNodePtr &kernel_node) { std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); - static std::map kComplexSupportedTypeMap = { - {"Real", UNARY_OP_REAL}, {"Imag", UNARY_OP_IMAG}, {"Conj", UNARY_OP_CONJ}}; + static std::map kComplexSupportedTypeMap = { + {"Real", cukernel::UNARY_OP_REAL}, {"Imag", cukernel::UNARY_OP_IMAG}, {"Conj", cukernel::UNARY_OP_CONJ}}; auto iter = kComplexSupportedTypeMap.find(kernel_name); if (iter != kComplexSupportedTypeMap.end()) { unary_op_type_ = iter->second; @@ -128,7 +128,7 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod { size_t output_size_; size_t workspace_size_; bool is_null_input_; - UnaryOptype unary_op_type_; + cukernel::UnaryOptype unary_op_type_; }; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h index 4b6cd6183f3..3f469c33169 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/unary_op_gpu_kernel.h @@ -21,58 +21,13 @@ #include #include #include -#include +#include #include "plugin/device/gpu/kernel/gpu_kernel.h" #include "plugin/device/gpu/kernel/gpu_kernel_factory.h" -#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unary_op_impl.cuh" +#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/unary_helper.h" namespace mindspore { namespace kernel { -enum UnaryOptype { - UNARY_OP_EXP = 0, - UNARY_OP_EXPM1, - UNARY_OP_LOG, - UNARY_OP_LOG1P, - UNARY_OP_ERF, - UNARY_OP_ERFC, - UNARY_OP_NEG, - UNARY_OP_RECIPROCAL, - UNARY_OP_SQUARE, - UNARY_OP_SQRT, - UNARY_OP_RSQRT, - UNARY_OP_SIN, - UNARY_OP_COS, - UNARY_OP_ASIN, - UNARY_OP_ACOS, - UNARY_OP_ATAN, - UNARY_OP_ASINH, - UNARY_OP_ACOSH, - UNARY_OP_ABS, - UNARY_OP_FLOOR, - UNARY_OP_RINT, - UNARY_OP_ROUND, - UNARY_OP_SIGN, - UNARY_OP_REAL, - UNARY_OP_IMAG, - UNARY_OP_CONJ, - UNARY_OP_INVALID_TYPE = 255 -}; - -static const std::map kUnaryOpTypeMap = { - {"Exp", UNARY_OP_EXP}, {"Expm1", UNARY_OP_EXPM1}, - {"Log", UNARY_OP_LOG}, {"Log1p", UNARY_OP_LOG1P}, - {"Erf", UNARY_OP_ERF}, {"Erfc", UNARY_OP_ERFC}, - {"Neg", UNARY_OP_NEG}, {"Reciprocal", UNARY_OP_RECIPROCAL}, - {"Square", UNARY_OP_SQUARE}, {"Sqrt", UNARY_OP_SQRT}, - {"Rsqrt", UNARY_OP_RSQRT}, {"Sin", UNARY_OP_SIN}, - {"Cos", UNARY_OP_COS}, {"Asin", UNARY_OP_ASIN}, - {"ACos", UNARY_OP_ACOS}, {"Atan", UNARY_OP_ATAN}, - {"Asinh", UNARY_OP_ASINH}, {"Acosh", UNARY_OP_ACOSH}, - {"Abs", UNARY_OP_ABS}, {"Floor", UNARY_OP_FLOOR}, - {"Rint", UNARY_OP_RINT}, {"Round", UNARY_OP_ROUND}, - {"Real", UNARY_OP_REAL}, {"Imag", UNARY_OP_IMAG}, - {"Sign", UNARY_OP_SIGN}, {"Conj", UNARY_OP_CONJ}}; - template class UnaryOpGpuKernelMod : public NativeGpuKernelMod { public: @@ -84,72 +39,50 @@ class UnaryOpGpuKernelMod : public NativeGpuKernelMod { if (is_null_input_) { return true; } - - static std::map> func_map = { - {UNARY_OP_EXP, Exponential}, {UNARY_OP_EXPM1, Expm1}, - {UNARY_OP_LOG, Logarithm}, {UNARY_OP_LOG1P, Log1p}, - {UNARY_OP_ERF, Erf}, {UNARY_OP_ERFC, Erfc}, - {UNARY_OP_NEG, Negative}, {UNARY_OP_RECIPROCAL, Reciprocal}, - {UNARY_OP_SQUARE, Square}, {UNARY_OP_SQRT, Sqrt}, - {UNARY_OP_RSQRT, Rsqrt}, {UNARY_OP_SIN, Sin}, - {UNARY_OP_COS, Cos}, {UNARY_OP_ASIN, Asin}, - {UNARY_OP_ACOS, ACos}, {UNARY_OP_ATAN, Atan}, - {UNARY_OP_ASINH, Asinh}, {UNARY_OP_ACOSH, Acosh}, - {UNARY_OP_ABS, Abs}, {UNARY_OP_FLOOR, Floor}, - {UNARY_OP_RINT, Rint}, {UNARY_OP_ROUND, Round}, - {UNARY_OP_SIGN, Sign}}; - - auto iter = func_map.find(unary_op_type_); - if (iter != func_map.end()) { - T *input_addr = GetDeviceAddress(inputs, 0); - T *output_addr = GetDeviceAddress(outputs, 0); - iter->second(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast(stream_ptr)); - } else { - MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc," - << " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, " - << "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_; + std::vector input_addrs; + std::vector output_addrs; + std::vector work_addrs; + for (size_t idx = 0; idx < inputs.size(); ++idx) { + void *cur_ptr = reinterpret_cast(GetDeviceAddress(inputs, idx)); + input_addrs.emplace_back(cur_ptr); + } + for (size_t idx = 0; idx < outputs.size(); ++idx) { + void *cur_ptr = reinterpret_cast(GetDeviceAddress(outputs, idx)); + output_addrs.emplace_back(cur_ptr); + } + int flag = helper_ptr_->Process(input_addrs, output_addrs, work_addrs, stream_ptr); + if (flag != 0) { + return false; } - return true; } bool Init(const CNodePtr &kernel_node) override { - std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); kernel_node_ = kernel_node; - auto iter = kUnaryOpTypeMap.find(kernel_name); - if (iter == kUnaryOpTypeMap.end()) { - MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc," - << " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, " - << "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name; - } - unary_op_type_ = iter->second; - size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node); - if (input_num != 1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num; - } - size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node); - if (output_num != 1) { - MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num; - } + std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); + helper_ptr_ = std::make_unique>(kernel_name); + helper_ptr_->ResetResource(); + std::vector> input_shapes; + std::vector> output_shapes; auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0); + auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0); is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input"); if (is_null_input_) { - InitSizeLists(); + input_size_list_.emplace_back(0); + output_size_list_.emplace_back(0); return true; } - for (size_t i = 0; i < input_shape.size(); i++) { - input_size_ *= input_shape[i]; + input_shapes.emplace_back(input_shape); + output_shapes.emplace_back(output_shape); + int flag = helper_ptr_->CalMemSize(input_shapes, output_shapes); + if (flag != 0) { + return false; } - output_size_ = input_size_; InitSizeLists(); return true; } + void ResetResource() noexcept override { - unary_op_type_ = UNARY_OP_INVALID_TYPE; - input_size_ = sizeof(T); - output_size_ = sizeof(T); - workspace_size_ = 0; - is_null_input_ = false; input_size_list_.clear(); output_size_list_.clear(); workspace_size_list_.clear(); @@ -157,15 +90,13 @@ class UnaryOpGpuKernelMod : public NativeGpuKernelMod { protected: void InitSizeLists() override { - input_size_list_.push_back(input_size_); - output_size_list_.push_back(output_size_); + input_size_list_ = helper_ptr_->GetInputSizeList(); + output_size_list_ = helper_ptr_->GetOutputSizeList(); + workspace_size_list_ = helper_ptr_->GetWorkSizeList(); } private: - UnaryOptype unary_op_type_; - size_t input_size_; - size_t output_size_; - size_t workspace_size_; + std::unique_ptr> helper_ptr_ = nullptr; bool is_null_input_; }; } // namespace kernel