!31271 Reconstruct the GPU invoking mode

Merge pull request !31271 from zong_shuai/reconstruct_gpu_kernel
This commit is contained in:
i-robot 2022-03-21 15:05:28 +00:00 committed by Gitee
commit 27e417de6a
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
10 changed files with 667 additions and 260 deletions

View File

@ -19,15 +19,13 @@
#include <vector>
#include <string>
#include <memory>
#include "plugin/device/gpu/kernel/gpu_kernel.h"
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/batchtospace_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/batchtospace_helper.h"
namespace mindspore {
namespace kernel {
constexpr size_t SHAPE_SIZE = 4;
constexpr size_t CROPS_SHAPE_0 = 2;
constexpr size_t CROPS_SHAPE_1 = 2;
template <typename T>
class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod {
public:
@ -36,139 +34,55 @@ class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
T *output = GetDeviceAddress<T>(outputs, 0);
size_t size = output_size_ / sizeof(T);
CalBatchToSpace<T>(size, input, in_, ih_, iw_, ic_, on_, oh_, ow_, oc_, crops_[0][0], crops_[0][1], crops_[1][0],
crops_[1][1], block_size_, output, reinterpret_cast<cudaStream_t>(stream_ptr));
std::vector<void *> input_addrs = ConvertPtrs(inputs);
std::vector<void *> work_addrs = ConvertPtrs(workspace);
std::vector<void *> output_addrs = ConvertPtrs(outputs);
int flag = helper_ptr_->Process(input_addrs, output_addrs, work_addrs, stream_ptr);
if (flag != 0) {
return false;
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
(void)CheckParam(kernel_node);
input_size_ = sizeof(T);
for (size_t idx = 0; idx < input_shape_.size(); ++idx) {
input_size_ *= input_shape_[idx];
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
helper_ptr_ = std::make_unique<cukernel::BatchToSpaceHelperGpuKernel<T>>(kernel_name_);
helper_ptr_->ResetResource();
std::vector<std::vector<size_t>> input_shapes;
std::vector<std::vector<size_t>> output_shapes;
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
input_shapes.emplace_back(input_shape);
output_shapes.emplace_back(output_shape);
attr_.block_size = GetAttr<int64_t>(kernel_node, "block_size");
attr_.crops = GetAttr<std::vector<std::vector<int64_t>>>(kernel_node, "crops");
attr_.input_shape = input_shape;
int flag = helper_ptr_->CheckKernelParam(&attr_);
if (flag != 0) {
return false;
}
in_ = input_shape_[0];
ic_ = input_shape_[1];
ih_ = input_shape_[2];
iw_ = input_shape_[3];
on_ = in_ / (block_size_ * block_size_);
oc_ = ic_;
oh_ = ih_ * block_size_ - crops_[0][0] - crops_[0][1];
ow_ = iw_ * block_size_ - crops_[1][0] - crops_[1][1];
output_size_ = on_ * oc_ * oh_ * ow_ * sizeof(T);
flag = helper_ptr_->CalMemSize(input_shapes, output_shapes);
if (flag != 0) {
return false;
}
InitSizeLists();
return true;
}
void ResetResource() noexcept override {
in_ = 0;
ic_ = 0;
ih_ = 0;
iw_ = 0;
on_ = 0;
oc_ = 0;
oh_ = 0;
ow_ = 0;
kernel_name_ = "BatchToSpace";
input_size_list_.clear();
output_size_list_.clear();
crops_.clear();
input_shape_.clear();
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_);
output_size_list_.push_back(output_size_);
}
void CheckParam(const CNodePtr &kernel_node) {
block_size_ = GetAttr<int64_t>(kernel_node, "block_size");
if (block_size_ < 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got "
<< block_size_;
}
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
// check input_shape
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
if (input_shape.size() != SHAPE_SIZE) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
<< input_shape.size();
}
if ((input_shape[0] % (block_size_ * block_size_)) != 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', input_shape[0] should be divisible by product of block_shape, but got input_shape[0]: "
<< input_shape[0] << ", block_shape: " << block_size_;
}
for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) {
if (input_shape[idx] < 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the element of shape of input cannot be less than 1, but got "
<< CONVERT_VECTOR_TO_STRING(input_shape);
}
}
input_shape_.assign(input_shape.begin(), input_shape.end());
// check crops
crops_ = (GetAttr<std::vector<std::vector<int64_t>>>(kernel_node, "crops"));
if (crops_.size() != CROPS_SHAPE_0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'crops' should be " << CROPS_SHAPE_0
<< ", but got " << crops_.size();
}
if (crops_[0].size() != CROPS_SHAPE_1 || crops_[1].size() != CROPS_SHAPE_1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of element of 'crops' should be " << CROPS_SHAPE_1
<< ", but got the size of crops[0]: " << crops_[0].size()
<< ", the size of crops[1]: " << crops_[1].size();
} else {
for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) {
for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) {
if (crops_[idx_i][idx_j] < 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the element of 'crops' should be greater than or equal to 0, but got crops["
<< idx_i << "][" << idx_j << "]: " << crops_[idx_i][idx_j];
}
}
auto tmp_shape = input_shape[idx_i + CROPS_SHAPE_1] * block_size_ - crops_[idx_i][0] - crops_[idx_i][1];
if (tmp_shape <= 0) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_
<< "', the element of shape of output should be greater than 0, but got " << tmp_shape;
}
}
}
input_size_list_ = helper_ptr_->GetInputSizeList();
output_size_list_ = helper_ptr_->GetOutputSizeList();
}
private:
std::vector<std::vector<int64_t>> crops_;
std::vector<size_t> input_shape_;
size_t block_size_;
size_t input_size_;
size_t output_size_;
size_t in_;
size_t ic_;
size_t ih_;
size_t iw_;
size_t on_;
size_t oc_;
size_t oh_;
size_t ow_;
std::string kernel_name_;
std::unique_ptr<cukernel::BatchToSpaceHelperGpuKernel<T>> helper_ptr_ = nullptr;
cukernel::BatchToSpaceAttr attr_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -18,9 +18,10 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_UNIQUE_GPU_KERNEL_H_
#include <vector>
#include <memory>
#include "plugin/device/gpu/kernel/gpu_kernel.h"
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unique_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/unique_helper.h"
namespace mindspore {
namespace kernel {
template <typename T, typename S>
@ -34,32 +35,31 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod {
if (is_null_input_) {
return true;
}
T *input = GetDeviceAddress<T>(inputs, 0);
S *input_index = GetDeviceAddress<S>(workspace, 0);
S *sorted_index = GetDeviceAddress<S>(workspace, 1);
T *output = GetDeviceAddress<T>(outputs, 0);
S *index = GetDeviceAddress<S>(outputs, 1);
stream_ptr_ = stream_ptr;
post_output_size_ = CalUnique(input, num_elements_, input_index, sorted_index, output, index,
reinterpret_cast<cudaStream_t>(stream_ptr));
std::vector<void *> input_ptrs = ConvertPtrs(inputs);
std::vector<void *> work_ptrs = ConvertPtrs(workspace);
std::vector<void *> output_ptrs = ConvertPtrs(outputs);
if (helper_ptr_->Process(input_ptrs, output_ptrs, work_ptrs, stream_ptr) != 0) {
return false;
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
helper_ptr_ = std::make_unique<cukernel::UniqueHelperGpuKernel<T, S>>(kernel_name);
helper_ptr_->ResetResource();
std::vector<std::vector<size_t>> input_shapes;
std::vector<std::vector<size_t>> output_shapes;
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
return true;
}
for (auto x : shape) {
num_elements_ *= x;
}
input_size_ = num_elements_ * sizeof(T);
output_size_ = input_size_;
workspace_size_ = num_elements_ * sizeof(S);
input_shapes.emplace_back(shape);
helper_ptr_->CalMemSize(input_shapes, output_shapes);
InitSizeLists();
return true;
}
@ -73,7 +73,7 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod {
for (size_t i = 0; i < output_num; ++i) {
std::vector<size_t> shape = common::AnfAlgo::GetOutputInferShape(kernel_node_.lock(), i);
if (i == 0) {
shape[0] = post_output_size_;
shape[0] = helper_ptr_->GetOutSize();
}
TypeId type_id = common::AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), i);
type_ids.emplace_back(type_id);
@ -83,11 +83,6 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod {
}
void ResetResource() noexcept override {
input_size_ = 0;
output_size_ = 0;
workspace_size_ = 0;
num_elements_ = 1;
post_output_size_ = 0;
is_null_input_ = false;
stream_ptr_ = nullptr;
input_size_list_.clear();
@ -97,21 +92,15 @@ class UniqueGpuKernelMod : public NativeGpuKernelMod {
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_);
output_size_list_.push_back(output_size_);
output_size_list_.push_back(num_elements_ * sizeof(S));
workspace_size_list_.push_back(workspace_size_);
workspace_size_list_.push_back(workspace_size_);
input_size_list_ = helper_ptr_->GetInputSizeList();
output_size_list_ = helper_ptr_->GetOutputSizeList();
workspace_size_list_ = helper_ptr_->GetWorkSizeList();
}
private:
void *stream_ptr_;
size_t input_size_;
size_t output_size_;
size_t workspace_size_;
int num_elements_;
int post_output_size_;
bool is_null_input_;
std::unique_ptr<cukernel::UniqueHelperGpuKernel<T, S>> helper_ptr_ = nullptr;
};
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,161 @@
/**
* Copyright 2019-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_BATCHTOSPACE_HELPER_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_BATCHTOSPACE_HELPER_H_
#include <string>
#include <vector>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/batchtospace_impl.cuh"
namespace mindspore {
namespace cukernel {
constexpr size_t INPUT_NUM = 1;
constexpr size_t OUTPUT_NUM = 1;
constexpr size_t SHAPE_SIZE = 4;
constexpr size_t CROPS_SHAPE_0 = 2;
constexpr size_t CROPS_SHAPE_1 = 2;
struct BatchToSpaceAttr : public GpuKernelAttrBase {
std::vector<std::vector<int64_t>> crops;
std::vector<size_t> input_shape;
size_t block_size;
};
template <typename T>
class BatchToSpaceHelperGpuKernel : public GpuKernelHelperBase {
public:
explicit BatchToSpaceHelperGpuKernel(std::string &kernel_name) : GpuKernelHelperBase(kernel_name) {}
virtual ~BatchToSpaceHelperGpuKernel() = default;
int CalMemSize(const std::vector<std::vector<size_t>> &input_shapes,
const std::vector<std::vector<size_t>> &output_shapes) override {
int flag = CalShapesSizeInBytes<T>(input_shapes, INPUT_NUM, kernel_name_, "input_shapes", &input_size_list_);
if (flag != 0) {
return flag;
}
flag = CalShapesSizeInBytes<T>(output_shapes, OUTPUT_NUM, kernel_name_, "output_shapes", &output_size_list_);
if (flag != 0) {
return flag;
}
kernel_size_ = output_size_list_[0] / sizeof(T);
return 0;
}
int Process(const std::vector<void *> &input_ptrs, const std::vector<void *> &output_ptrs,
const std::vector<void *> &work_ptrs, void *cuda_stream) override {
size_t in = attr_ptr_->input_shape[0];
size_t ic = attr_ptr_->input_shape[1];
size_t ih = attr_ptr_->input_shape[2];
size_t iw = attr_ptr_->input_shape[3];
size_t on = in / (attr_ptr_->block_size * attr_ptr_->block_size);
size_t oc = ic;
size_t oh = ih * attr_ptr_->block_size - attr_ptr_->crops[0][0] - attr_ptr_->crops[0][1];
size_t ow = iw * attr_ptr_->block_size - attr_ptr_->crops[1][0] - attr_ptr_->crops[1][1];
T *input_ptr = nullptr;
T *output_ptr = nullptr;
int flag = GetDeviceAddress<T>(input_ptrs, 0, kernel_name_, &input_ptr);
if (flag != 0) {
return flag;
}
flag = GetDeviceAddress<T>(output_ptrs, 0, kernel_name_, &output_ptr);
if (flag != 0) {
return flag;
}
CalBatchToSpace<T>(kernel_size_, input_ptr, in, ih, iw, ic, on, oh, ow, oc, attr_ptr_->crops[0][0],
attr_ptr_->crops[0][1], attr_ptr_->crops[1][0], attr_ptr_->crops[1][1], attr_ptr_->block_size,
output_ptr, reinterpret_cast<cudaStream_t>(cuda_stream));
return 0;
}
void ResetResource() override {
kernel_size_ = 0;
input_size_list_.clear();
output_size_list_.clear();
work_size_list_.clear();
}
int CheckKernelParam(GpuKernelAttrBase *kernel_attr) override {
attr_ptr_ = dynamic_cast<BatchToSpaceAttr *>(kernel_attr);
if (attr_ptr_->block_size < 1) {
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got "
<< attr_ptr_->block_size;
return -1;
}
// check input_shape
if (attr_ptr_->input_shape.size() != SHAPE_SIZE) {
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the dimension of input should be 4, but got "
<< attr_ptr_->input_shape.size();
return -1;
}
if ((attr_ptr_->input_shape[0] % (attr_ptr_->block_size * attr_ptr_->block_size)) != 0) {
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', input_shape[0] should be divisible by product of block_shape, but got input_shape[0]: "
<< attr_ptr_->input_shape[0] << ", block_shape: " << attr_ptr_->block_size;
return -1;
}
for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) {
if (attr_ptr_->input_shape[idx] < 1) {
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the element of shape of input cannot be less than 1, but got "
<< ConvertVectorToString(attr_ptr_->input_shape);
return -1;
}
}
// check crops
if (attr_ptr_->crops.size() != CROPS_SHAPE_0) {
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the size of 'crops' should be " << CROPS_SHAPE_0 << ", but got "
<< attr_ptr_->crops.size();
return -1;
}
if (attr_ptr_->crops[0].size() != CROPS_SHAPE_1 || attr_ptr_->crops[1].size() != CROPS_SHAPE_1) {
MS_LOG(ERROR) << "For '" << kernel_name_ << "', the size of element of 'crops' should be " << CROPS_SHAPE_1
<< ", but got the size of crops[0]: " << attr_ptr_->crops[0].size()
<< ", the size of crops[1]: " << attr_ptr_->crops[1].size();
return -1;
} else {
for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) {
for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) {
if (attr_ptr_->crops[idx_i][idx_j] < 0) {
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', the element of 'crops' should be greater than or equal to 0, but got crops[" << idx_i
<< "][" << idx_j << "]: " << attr_ptr_->crops[idx_i][idx_j];
return -1;
}
}
auto tmp_shape = attr_ptr_->input_shape[idx_i + CROPS_SHAPE_1] * attr_ptr_->block_size -
attr_ptr_->crops[idx_i][0] - attr_ptr_->crops[idx_i][1];
if (tmp_shape <= 0) {
MS_LOG(ERROR) << "For '" << kernel_name_
<< "', the element of shape of output should be greater than 0, but got " << tmp_shape;
return -1;
}
}
}
return 0;
}
private:
BatchToSpaceAttr *attr_ptr_;
size_t kernel_size_;
};
} // namespace cukernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_BATCHTOSPACE_HELPER_H_

View File

@ -0,0 +1,89 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_COMMON_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_COMMON_H_
#include <string>
#include <vector>
#include "mindspore/core/utils/log_adapter.h"
namespace mindspore {
namespace cukernel {
// 1. 错误码细化
inline std::string ConvertVectorToString(const std::vector<size_t> &value) {
std::stringstream ss;
ss << "(";
for (auto it = value.begin(); it != value.end(); it++) {
if (it == value.begin()) {
ss << *it;
} else {
ss << ", " << *it;
}
}
ss << ")";
return ss.str();
}
template <typename T>
int CalShapesSizeInBytes(const std::vector<std::vector<size_t>> &shapes, const size_t shape_num,
const std::string kernel_name, const std::string param_name,
std::vector<size_t> *shapes_size) {
if (shape_num != shapes.size()) {
MS_LOG(ERROR) << "For '" << kernel_name << "', the number of " << param_name << "should be equal to " << shape_num
<< ", but got " << shapes.size();
return -1;
}
size_t return_flag = 0;
for (size_t idx = 0; idx < shape_num; ++idx) {
size_t cur_size = sizeof(T);
if (shapes[idx].size() == 0) {
// 常数
MS_LOG(WARNING) << "For '" << kernel_name << "', the shapes[" << idx << "] is ( )";
shapes_size->emplace_back(cur_size);
continue;
}
for (const auto &val : shapes[idx]) {
cur_size *= val;
}
if (cur_size == 0) {
MS_LOG(WARNING) << "For '" << kernel_name << "', the shape cannot contain zero, but got shapes[" << idx << "] is "
<< ConvertVectorToString(shapes[idx]);
return_flag = -1;
}
shapes_size->emplace_back(cur_size);
}
return return_flag;
}
template <typename T>
inline int GetDeviceAddress(const std::vector<void *> &addr_list, const size_t index, const std::string kernel_name,
T **out_ptr) {
if (index >= addr_list.size()) {
MS_LOG(ERROR) << "Address index(" << index << ") out of range(" << addr_list.size() << ")";
return -1;
}
if (addr_list[index] == nullptr) {
MS_LOG(ERROR) << "The device address is empty, address index: " << index << ", op name is: " << kernel_name;
return -1;
}
*out_ptr = reinterpret_cast<T *>(addr_list[index]);
return 0;
}
} // namespace cukernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_COMMON_H_

View File

@ -0,0 +1,63 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_HELPER_BASE_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_HELPER_BASE_H_
#include <string>
#include <vector>
#include "mindspore/core/utils/log_adapter.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/cuda_class_common.h"
namespace mindspore {
namespace cukernel {
struct GpuKernelAttrBase {
virtual ~GpuKernelAttrBase() = default;
};
class GpuKernelHelperBase {
public:
explicit GpuKernelHelperBase(std::string &kernel_name) : kernel_name_(kernel_name) {}
virtual ~GpuKernelHelperBase() {
input_size_list_.clear();
output_size_list_.clear();
work_size_list_.clear();
}
virtual int CalMemSize(const std::vector<std::vector<size_t>> &input_shapes,
const std::vector<std::vector<size_t>> &output_shapes) = 0;
virtual int Process(const std::vector<void *> &input_ptrs, const std::vector<void *> &output_ptrs,
const std::vector<void *> &work_ptrs, void *cuda_stream) = 0;
virtual void ResetResource() {
MS_LOG(ERROR) << "kernel must override the `ResetResource()` method when dynamic shape";
}
std::vector<size_t> GetInputSizeList() { return input_size_list_; }
std::vector<size_t> GetOutputSizeList() { return output_size_list_; }
std::vector<size_t> GetWorkSizeList() { return work_size_list_; }
virtual int CheckKernelParam(GpuKernelAttrBase *kernel_attr) { return 0; }
protected:
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> work_size_list_;
std::string kernel_name_;
};
} // namespace cukernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_HELPER_BASE_H_

View File

@ -0,0 +1,147 @@
/**
* Copyright 2019-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNARY_HELPER_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNARY_HELPER_H_
#include <string>
#include <vector>
#include <map>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unary_op_impl.cuh"
namespace mindspore {
namespace cukernel {
enum UnaryOptype {
UNARY_OP_EXP = 0,
UNARY_OP_EXPM1,
UNARY_OP_LOG,
UNARY_OP_LOG1P,
UNARY_OP_ERF,
UNARY_OP_ERFC,
UNARY_OP_NEG,
UNARY_OP_RECIPROCAL,
UNARY_OP_SQUARE,
UNARY_OP_SQRT,
UNARY_OP_RSQRT,
UNARY_OP_SIN,
UNARY_OP_COS,
UNARY_OP_ASIN,
UNARY_OP_ACOS,
UNARY_OP_ATAN,
UNARY_OP_ASINH,
UNARY_OP_ACOSH,
UNARY_OP_ABS,
UNARY_OP_FLOOR,
UNARY_OP_RINT,
UNARY_OP_ROUND,
UNARY_OP_SIGN,
UNARY_OP_REAL,
UNARY_OP_IMAG,
UNARY_OP_CONJ,
UNARY_OP_INVALID_TYPE = 255
};
static const std::map<std::string, UnaryOptype> kUnaryOpTypeMap = {
{"Exp", UNARY_OP_EXP}, {"Expm1", UNARY_OP_EXPM1},
{"Log", UNARY_OP_LOG}, {"Log1p", UNARY_OP_LOG1P},
{"Erf", UNARY_OP_ERF}, {"Erfc", UNARY_OP_ERFC},
{"Neg", UNARY_OP_NEG}, {"Reciprocal", UNARY_OP_RECIPROCAL},
{"Square", UNARY_OP_SQUARE}, {"Sqrt", UNARY_OP_SQRT},
{"Rsqrt", UNARY_OP_RSQRT}, {"Sin", UNARY_OP_SIN},
{"Cos", UNARY_OP_COS}, {"Asin", UNARY_OP_ASIN},
{"ACos", UNARY_OP_ACOS}, {"Atan", UNARY_OP_ATAN},
{"Asinh", UNARY_OP_ASINH}, {"Acosh", UNARY_OP_ACOSH},
{"Abs", UNARY_OP_ABS}, {"Floor", UNARY_OP_FLOOR},
{"Rint", UNARY_OP_RINT}, {"Round", UNARY_OP_ROUND},
{"Real", UNARY_OP_REAL}, {"Imag", UNARY_OP_IMAG},
{"Sign", UNARY_OP_SIGN}, {"Conj", UNARY_OP_CONJ}};
template <typename T>
class UnaryHelperGpuKernel : public GpuKernelHelperBase {
public:
explicit UnaryHelperGpuKernel(std::string &kernel_name) : GpuKernelHelperBase(kernel_name) {}
virtual ~UnaryHelperGpuKernel() = default;
int CalMemSize(const std::vector<std::vector<size_t>> &input_shapes,
const std::vector<std::vector<size_t>> &output_shapes) override {
auto iter = kUnaryOpTypeMap.find(kernel_name_);
if (iter == kUnaryOpTypeMap.end()) {
MS_LOG(ERROR) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name_;
return -1;
}
unary_op_type_ = iter->second;
int flag = CalShapesSizeInBytes<T>(input_shapes, 1, kernel_name_, "input_shapes", &input_size_list_);
output_size_list_ = input_size_list_;
if (flag != 0) {
return flag;
}
return 0;
}
int Process(const std::vector<void *> &input_ptrs, const std::vector<void *> &output_ptrs,
const std::vector<void *> &work_ptrs, void *cuda_stream) override {
static std::map<UnaryOptype, std::function<void(const T *, T *, const size_t, cudaStream_t)>> func_map = {
{UNARY_OP_EXP, Exponential<T>}, {UNARY_OP_EXPM1, Expm1<T>},
{UNARY_OP_LOG, Logarithm<T>}, {UNARY_OP_LOG1P, Log1p<T>},
{UNARY_OP_ERF, Erf<T>}, {UNARY_OP_ERFC, Erfc<T>},
{UNARY_OP_NEG, Negative<T>}, {UNARY_OP_RECIPROCAL, Reciprocal<T>},
{UNARY_OP_SQUARE, Square<T>}, {UNARY_OP_SQRT, Sqrt<T>},
{UNARY_OP_RSQRT, Rsqrt<T>}, {UNARY_OP_SIN, Sin<T>},
{UNARY_OP_COS, Cos<T>}, {UNARY_OP_ASIN, Asin<T>},
{UNARY_OP_ACOS, ACos<T>}, {UNARY_OP_ATAN, Atan<T>},
{UNARY_OP_ASINH, Asinh<T>}, {UNARY_OP_ACOSH, Acosh<T>},
{UNARY_OP_ABS, Abs<T>}, {UNARY_OP_FLOOR, Floor<T>},
{UNARY_OP_RINT, Rint<T>}, {UNARY_OP_ROUND, Round<T>},
{UNARY_OP_SIGN, Sign<T>}};
auto iter = func_map.find(unary_op_type_);
if (iter != func_map.end()) {
T *input_addr;
T *output_addr;
int flag = GetDeviceAddress<T>(input_ptrs, 0, kernel_name_, &input_addr);
if (flag != 0) {
return flag;
}
flag = GetDeviceAddress<T>(output_ptrs, 0, kernel_name_, &output_addr);
if (flag != 0) {
return flag;
}
iter->second(input_addr, output_addr, input_size_list_[0] / sizeof(T),
reinterpret_cast<cudaStream_t>(cuda_stream));
} else {
MS_LOG(ERROR) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_;
return -1;
}
return 0;
}
void ResetResource() override {
unary_op_type_ = UNARY_OP_INVALID_TYPE;
input_size_list_.clear();
output_size_list_.clear();
}
private:
UnaryOptype unary_op_type_;
};
} // namespace cukernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNARY_HELPER_H_

View File

@ -0,0 +1,105 @@
/**
* Copyright 2019-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNIQUE_HELPER_H_
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNIQUE_HELPER_H_
#include <string>
#include <vector>
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/helper_base.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unique_impl.cuh"
namespace mindspore {
namespace cukernel {
constexpr size_t INPUT_NUM = 1;
constexpr size_t OUTPUT_NUM = 1;
constexpr size_t WORK_NUM = 0;
constexpr size_t SHAPE_SIZE = 4;
constexpr size_t CROPS_SHAPE_0 = 2;
constexpr size_t CROPS_SHAPE_1 = 2;
template <typename T, typename S>
class UniqueHelperGpuKernel : public GpuKernelHelperBase {
public:
explicit UniqueHelperGpuKernel(std::string &kernel_name) : GpuKernelHelperBase(kernel_name) {}
virtual ~UniqueHelperGpuKernel() = default;
int CalMemSize(const std::vector<std::vector<size_t>> &input_shapes,
const std::vector<std::vector<size_t>> &output_shapes) override {
int flag = CalShapesSizeInBytes<T>(input_shapes, INPUT_NUM, kernel_name_, "input_shapes", &input_size_list_);
if (flag != 0) {
return flag;
}
num_elements_ = input_size_list_[0] / sizeof(T);
size_t workspace_size = num_elements_ * sizeof(S);
work_size_list_.emplace_back(workspace_size);
work_size_list_.emplace_back(workspace_size);
output_size_list_.emplace_back(input_size_list_[0]);
output_size_list_.emplace_back(num_elements_ * sizeof(S));
return 0;
}
int Process(const std::vector<void *> &input_ptrs, const std::vector<void *> &output_ptrs,
const std::vector<void *> &work_ptrs, void *cuda_stream) override {
T *t_input_ptr = nullptr;
S *s_input_index = nullptr;
S *s_sorted_index = nullptr;
T *t_output_ptr = nullptr;
S *s_output_index = nullptr;
int flag = GetDeviceAddress<T>(input_ptrs, 0, kernel_name_, &t_input_ptr);
if (flag != 0) {
return flag;
}
flag = GetDeviceAddress<S>(work_ptrs, 0, kernel_name_, &s_input_index);
if (flag != 0) {
return flag;
}
flag = GetDeviceAddress<S>(work_ptrs, 1, kernel_name_, &s_sorted_index);
if (flag != 0) {
return flag;
}
flag = GetDeviceAddress<T>(output_ptrs, 0, kernel_name_, &t_output_ptr);
if (flag != 0) {
return flag;
}
flag = GetDeviceAddress<S>(output_ptrs, 1, kernel_name_, &s_output_index);
if (flag != 0) {
return flag;
}
post_output_size_ = CalUnique(t_input_ptr, num_elements_, s_input_index, s_sorted_index, t_output_ptr,
s_output_index, reinterpret_cast<cudaStream_t>(cuda_stream));
return 0;
}
void ResetResource() override {
num_elements_ = 1;
post_output_size_ = 0;
input_size_list_.clear();
output_size_list_.clear();
work_size_list_.clear();
}
int GetOutSize() { return post_output_size_; }
private:
int num_elements_;
int post_output_size_;
};
} // namespace cukernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_CLASS_UNIQUE_HELPER_H_

View File

@ -113,6 +113,14 @@ class NativeGpuKernelMod : public GpuKernelMod {
return reinterpret_cast<T *>(addr_list[index]->addr);
}
std::vector<void *> ConvertPtrs(const std::vector<AddressPtr> &input_ptrs) {
std::vector<void *> out_ptrs;
for (auto &cur_addr : input_ptrs) {
out_ptrs.emplace_back(cur_addr->addr);
}
return out_ptrs;
}
template <typename T>
inline T *GetPossiblyNullDeviceAddress(const std::vector<AddressPtr> &addr_list, size_t index) {
if (index >= addr_list.size()) {

View File

@ -39,21 +39,21 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod {
S *output_addr = GetDeviceAddress<S>(outputs, 0);
switch (unary_op_type_) {
case UNARY_OP_REAL: {
case cukernel::UNARY_OP_REAL: {
if constexpr (!std::is_same<S, utils::Complex<float>>::value &&
!std::is_same<S, utils::Complex<double>>::value) {
Real(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr));
}
break;
}
case UNARY_OP_IMAG: {
case cukernel::UNARY_OP_IMAG: {
if constexpr (!std::is_same<S, utils::Complex<float>>::value &&
!std::is_same<S, utils::Complex<double>>::value) {
Imag(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr));
}
break;
}
case UNARY_OP_CONJ: {
case cukernel::UNARY_OP_CONJ: {
if constexpr (std::is_same<T, S>::value && !std::is_same<T, bool>::value) {
Conj(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr));
}
@ -112,8 +112,8 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod {
private:
void GetOpType(const CNodePtr &kernel_node) {
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
static std::map<std::string, UnaryOptype> kComplexSupportedTypeMap = {
{"Real", UNARY_OP_REAL}, {"Imag", UNARY_OP_IMAG}, {"Conj", UNARY_OP_CONJ}};
static std::map<std::string, cukernel::UnaryOptype> kComplexSupportedTypeMap = {
{"Real", cukernel::UNARY_OP_REAL}, {"Imag", cukernel::UNARY_OP_IMAG}, {"Conj", cukernel::UNARY_OP_CONJ}};
auto iter = kComplexSupportedTypeMap.find(kernel_name);
if (iter != kComplexSupportedTypeMap.end()) {
unary_op_type_ = iter->second;
@ -128,7 +128,7 @@ class UnaryOpComplexGpuKernelMod : public NativeGpuKernelMod {
size_t output_size_;
size_t workspace_size_;
bool is_null_input_;
UnaryOptype unary_op_type_;
cukernel::UnaryOptype unary_op_type_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -21,58 +21,13 @@
#include <functional>
#include <vector>
#include <string>
#include <map>
#include <memory>
#include "plugin/device/gpu/kernel/gpu_kernel.h"
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/unary_op_impl.cuh"
#include "plugin/device/gpu/kernel/cuda_impl/cuda_class/unary_helper.h"
namespace mindspore {
namespace kernel {
enum UnaryOptype {
UNARY_OP_EXP = 0,
UNARY_OP_EXPM1,
UNARY_OP_LOG,
UNARY_OP_LOG1P,
UNARY_OP_ERF,
UNARY_OP_ERFC,
UNARY_OP_NEG,
UNARY_OP_RECIPROCAL,
UNARY_OP_SQUARE,
UNARY_OP_SQRT,
UNARY_OP_RSQRT,
UNARY_OP_SIN,
UNARY_OP_COS,
UNARY_OP_ASIN,
UNARY_OP_ACOS,
UNARY_OP_ATAN,
UNARY_OP_ASINH,
UNARY_OP_ACOSH,
UNARY_OP_ABS,
UNARY_OP_FLOOR,
UNARY_OP_RINT,
UNARY_OP_ROUND,
UNARY_OP_SIGN,
UNARY_OP_REAL,
UNARY_OP_IMAG,
UNARY_OP_CONJ,
UNARY_OP_INVALID_TYPE = 255
};
static const std::map<std::string, UnaryOptype> kUnaryOpTypeMap = {
{"Exp", UNARY_OP_EXP}, {"Expm1", UNARY_OP_EXPM1},
{"Log", UNARY_OP_LOG}, {"Log1p", UNARY_OP_LOG1P},
{"Erf", UNARY_OP_ERF}, {"Erfc", UNARY_OP_ERFC},
{"Neg", UNARY_OP_NEG}, {"Reciprocal", UNARY_OP_RECIPROCAL},
{"Square", UNARY_OP_SQUARE}, {"Sqrt", UNARY_OP_SQRT},
{"Rsqrt", UNARY_OP_RSQRT}, {"Sin", UNARY_OP_SIN},
{"Cos", UNARY_OP_COS}, {"Asin", UNARY_OP_ASIN},
{"ACos", UNARY_OP_ACOS}, {"Atan", UNARY_OP_ATAN},
{"Asinh", UNARY_OP_ASINH}, {"Acosh", UNARY_OP_ACOSH},
{"Abs", UNARY_OP_ABS}, {"Floor", UNARY_OP_FLOOR},
{"Rint", UNARY_OP_RINT}, {"Round", UNARY_OP_ROUND},
{"Real", UNARY_OP_REAL}, {"Imag", UNARY_OP_IMAG},
{"Sign", UNARY_OP_SIGN}, {"Conj", UNARY_OP_CONJ}};
template <typename T>
class UnaryOpGpuKernelMod : public NativeGpuKernelMod {
public:
@ -84,72 +39,50 @@ class UnaryOpGpuKernelMod : public NativeGpuKernelMod {
if (is_null_input_) {
return true;
}
static std::map<UnaryOptype, std::function<void(const T *, T *, const size_t, cudaStream_t)>> func_map = {
{UNARY_OP_EXP, Exponential<T>}, {UNARY_OP_EXPM1, Expm1<T>},
{UNARY_OP_LOG, Logarithm<T>}, {UNARY_OP_LOG1P, Log1p<T>},
{UNARY_OP_ERF, Erf<T>}, {UNARY_OP_ERFC, Erfc<T>},
{UNARY_OP_NEG, Negative<T>}, {UNARY_OP_RECIPROCAL, Reciprocal<T>},
{UNARY_OP_SQUARE, Square<T>}, {UNARY_OP_SQRT, Sqrt<T>},
{UNARY_OP_RSQRT, Rsqrt<T>}, {UNARY_OP_SIN, Sin<T>},
{UNARY_OP_COS, Cos<T>}, {UNARY_OP_ASIN, Asin<T>},
{UNARY_OP_ACOS, ACos<T>}, {UNARY_OP_ATAN, Atan<T>},
{UNARY_OP_ASINH, Asinh<T>}, {UNARY_OP_ACOSH, Acosh<T>},
{UNARY_OP_ABS, Abs<T>}, {UNARY_OP_FLOOR, Floor<T>},
{UNARY_OP_RINT, Rint<T>}, {UNARY_OP_ROUND, Round<T>},
{UNARY_OP_SIGN, Sign<T>}};
auto iter = func_map.find(unary_op_type_);
if (iter != func_map.end()) {
T *input_addr = GetDeviceAddress<T>(inputs, 0);
T *output_addr = GetDeviceAddress<T>(outputs, 0);
iter->second(input_addr, output_addr, inputs[0]->size / sizeof(T), reinterpret_cast<cudaStream_t>(stream_ptr));
} else {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << unary_op_type_;
std::vector<void *> input_addrs;
std::vector<void *> output_addrs;
std::vector<void *> work_addrs;
for (size_t idx = 0; idx < inputs.size(); ++idx) {
void *cur_ptr = reinterpret_cast<void *>(GetDeviceAddress<T>(inputs, idx));
input_addrs.emplace_back(cur_ptr);
}
for (size_t idx = 0; idx < outputs.size(); ++idx) {
void *cur_ptr = reinterpret_cast<void *>(GetDeviceAddress<T>(outputs, idx));
output_addrs.emplace_back(cur_ptr);
}
int flag = helper_ptr_->Process(input_addrs, output_addrs, work_addrs, stream_ptr);
if (flag != 0) {
return false;
}
return true;
}
bool Init(const CNodePtr &kernel_node) override {
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
auto iter = kUnaryOpTypeMap.find(kernel_name);
if (iter == kUnaryOpTypeMap.end()) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << ", only support these types: Exp, Expm1, Log, Log1p, Erf, Erfc,"
<< " Neg, Reciprocal, Square, Sqrt, Rsqrt, Sin, Cos, Asin, ACos, Atan, Asinh, Acosh, Abs, "
<< "Floor, Rint, Round, Real, Imag, Sign, Conj currently, but got " << kernel_name;
}
unary_op_type_ = iter->second;
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of inputs should be 1, but got " << input_num;
}
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name << "', the number of outputs should be 1, but got " << output_num;
}
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
helper_ptr_ = std::make_unique<cukernel::UnaryHelperGpuKernel<T>>(kernel_name);
helper_ptr_->ResetResource();
std::vector<std::vector<size_t>> input_shapes;
std::vector<std::vector<size_t>> output_shapes;
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
auto output_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
is_null_input_ = CHECK_SHAPE_NULL(input_shape, kernel_name, "input");
if (is_null_input_) {
InitSizeLists();
input_size_list_.emplace_back(0);
output_size_list_.emplace_back(0);
return true;
}
for (size_t i = 0; i < input_shape.size(); i++) {
input_size_ *= input_shape[i];
input_shapes.emplace_back(input_shape);
output_shapes.emplace_back(output_shape);
int flag = helper_ptr_->CalMemSize(input_shapes, output_shapes);
if (flag != 0) {
return false;
}
output_size_ = input_size_;
InitSizeLists();
return true;
}
void ResetResource() noexcept override {
unary_op_type_ = UNARY_OP_INVALID_TYPE;
input_size_ = sizeof(T);
output_size_ = sizeof(T);
workspace_size_ = 0;
is_null_input_ = false;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
@ -157,15 +90,13 @@ class UnaryOpGpuKernelMod : public NativeGpuKernelMod {
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_);
output_size_list_.push_back(output_size_);
input_size_list_ = helper_ptr_->GetInputSizeList();
output_size_list_ = helper_ptr_->GetOutputSizeList();
workspace_size_list_ = helper_ptr_->GetWorkSizeList();
}
private:
UnaryOptype unary_op_type_;
size_t input_size_;
size_t output_size_;
size_t workspace_size_;
std::unique_ptr<cukernel::UnaryHelperGpuKernel<T>> helper_ptr_ = nullptr;
bool is_null_input_;
};
} // namespace kernel