!31281 gpu dynamic shape

Merge pull request !31281 from changzherui/gpu_dynamic_shape
This commit is contained in:
i-robot 2022-03-23 06:12:50 +00:00 committed by Gitee
commit c1a9e313b0
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
8 changed files with 344 additions and 92 deletions

View File

@ -792,9 +792,19 @@ const std::set<std::string> DynamicShapeConstInputToAttrCPU = {
kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName,
kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kReduceSumOpName};
const std::set<std::string> DynamicShapeConstInputToAttrGPU = {
kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName,
kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName};
const std::set<std::string> DynamicShapeConstInputToAttrGPU = {kCastOpName,
kExpandDimsOpName,
kReshapeOpName,
kEmbeddingLookupOpName,
kTransposeOpName,
kReduceSumOpName,
kReduceMinOpName,
kReduceMeanOpName,
kReduceMaxOpName,
kReduceAllOpName,
kReduceAnyOpName,
kConcatOpName,
kConv2DBackpropFilterOpName};
// The map between kernel's output and input ref relationship.
// Key is the output index while the value is input index which will be used as the reference of output.

View File

@ -42,5 +42,104 @@ MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeUIn
StridedSliceGradGpuKernelMod, uchar)
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
StridedSliceGradGpuKernelMod, bool)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat64),
StridedSliceGradGpuKernelMod, double, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
StridedSliceGradGpuKernelMod, float, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
StridedSliceGradGpuKernelMod, int64_t, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt32),
StridedSliceGradGpuKernelMod, int, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeInt16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt16),
StridedSliceGradGpuKernelMod, short, int64_t) // NOLINT
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt8),
StridedSliceGradGpuKernelMod, int8_t, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeUInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeUInt64),
StridedSliceGradGpuKernelMod, uint64_t, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeUInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeUInt32),
StridedSliceGradGpuKernelMod, uint32_t, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeUInt16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeUInt16),
StridedSliceGradGpuKernelMod, uint16_t, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeUInt8),
StridedSliceGradGpuKernelMod, uchar, int64_t)
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
KernelAttr()
.AddInputAttr(kNumberTypeBool)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeBool),
StridedSliceGradGpuKernelMod, bool, int64_t)
} // namespace kernel
} // namespace mindspore

View File

@ -27,7 +27,8 @@
namespace mindspore {
namespace kernel {
template <typename T>
constexpr int DynamicInputNum = 5;
template <typename T, typename S = int64_t>
class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSliceGpuCommon {
public:
StridedSliceGradGpuKernelMod() = default;
@ -49,8 +50,19 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl
bool Init(const CNodePtr &kernel_node) override {
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
std::vector<int64_t> shapex = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
for (auto x : shapex) {
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num == DynamicInputNum) {
is_dynamic_attr_ = true;
}
if (is_dynamic_attr_) {
GetDynamicAttrIntValue(kernel_node, kShapexIndex_, &shapex_);
GetDynamicAttrIntValue(kernel_node, kBeginIndex_, &begin_);
GetDynamicAttrIntValue(kernel_node, kEndIndex_, &end_);
GetDynamicAttrIntValue(kernel_node, kStrideIndex_, &strides_);
} else {
shapex_ = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
}
for (auto x : shapex_) {
input_shape_.push_back(static_cast<size_t>(x));
}
if (input_shape_.size() > MAX_DIMS) {
@ -58,10 +70,19 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl
<< ", but got " << input_shape_.size();
}
CollectInfo(kernel_node);
CollectInfo(kernel_node, is_dynamic_attr_);
InitSizeLists();
return true;
}
void ResetResource() noexcept override {
ResetSizeLists();
begin_.clear();
end_.clear();
strides_.clear();
input_shape_.clear();
output_shape_.clear();
is_dynamic_attr_ = false;
}
protected:
void InitSizeLists() override {
@ -77,6 +98,14 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl
}
output_size_list_.push_back(size1);
}
bool is_null_input_{false};
bool is_dynamic_attr_{false};
bool get_dynamic_attr_value_{false};
static constexpr size_t kShapexIndex_{1};
static constexpr size_t kBeginIndex_{2};
static constexpr size_t kEndIndex_{3};
static constexpr size_t kStrideIndex_{4};
std::vector<int64_t> shapex_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -21,7 +21,7 @@ namespace kernel {
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int)
UnsortedSegmentSumGpuKernelMod, double, int32_t)
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
@ -31,7 +31,7 @@ MS_REG_GPU_KERNEL_TWO(
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int)
UnsortedSegmentSumGpuKernelMod, float, int32_t)
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
@ -41,7 +41,7 @@ MS_REG_GPU_KERNEL_TWO(
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int)
UnsortedSegmentSumGpuKernelMod, half, int32_t)
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
@ -51,29 +51,78 @@ MS_REG_GPU_KERNEL_TWO(
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int, int)
UnsortedSegmentSumGpuKernelMod, int32_t, int32_t)
MS_REG_GPU_KERNEL_TWO(
UnsortedSegmentSum,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int, int64_t)
// Re-registration with 3 inputs - dynamic shape mode - sets of Int64/Int32 num segments types
// Re-registration with 3 inputs - dynamic shape mode
// Int32
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int32_t, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int32_t, int64_t)
// Int64
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int)
UnsortedSegmentSumGpuKernelMod, double, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
@ -81,29 +130,13 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat64),
UnsortedSegmentSumGpuKernelMod, double, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int)
UnsortedSegmentSumGpuKernelMod, float, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
@ -111,29 +144,13 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
UnsortedSegmentSumGpuKernelMod, float, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int)
UnsortedSegmentSumGpuKernelMod, half, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
@ -141,42 +158,19 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat16),
UnsortedSegmentSumGpuKernelMod, half, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int, int)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int, int)
UnsortedSegmentSumGpuKernelMod, int32_t, int32_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int, int64_t)
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
UnsortedSegmentSumGpuKernelMod, int, int64_t)
UnsortedSegmentSumGpuKernelMod, int32_t, int64_t)
} // namespace kernel
} // namespace mindspore

View File

@ -26,5 +26,19 @@ MS_REG_GPU_KERNEL_ONE(
Conv2D,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
Conv2dFwdGpuKernelMod, half)
MS_REG_GPU_KERNEL_ONE(Conv2D,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
Conv2dFwdGpuKernelMod, float)
MS_REG_GPU_KERNEL_ONE(Conv2D,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat16),
Conv2dFwdGpuKernelMod, half)
} // namespace kernel
} // namespace mindspore

View File

@ -228,6 +228,41 @@ class ConvGradFilterBkwGpuKernelMod : public NativeGpuKernelMod {
"cudnnDestroyTensorDescriptor failed");
}
void ResetResource() noexcept override {
cudnn_handle_ = nullptr;
dw_desc_ = nullptr;
conv_desc_ = nullptr;
dy_desc_ = nullptr;
x_desc_ = nullptr;
padded_descriptor_ = nullptr;
algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
pad_mode_ = "";
data_format_ = kOpFormat_NCHW;
format_attr_ = kOpFormat_NCHW;
cudnn_data_type_ = CUDNN_DATA_FLOAT;
compute_format_ = CUDNN_TENSOR_NCHW;
old_height_ = 0;
old_width_ = 0;
pad_height_ = 0;
pad_width_ = 0;
pad_top_ = 0;
pad_left_ = 0;
n_ = 0;
c_ = 0;
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv2dGradFilter";
input_size_ = 0;
dy_size_ = 0;
output_size_ = 0;
padded_size_ = 0;
workspace_size_ = 0;
use_pad_ = 0;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
}
protected:
void InitResource() override {
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();

View File

@ -26,5 +26,19 @@ MS_REG_GPU_KERNEL_ONE(
Conv2DBackpropInput,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
ConvGradInputBkwGpuKernelMod, half)
MS_REG_GPU_KERNEL_TWO(Conv2DBackpropInput,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
ConvGradInputBkwGpuKernelMod, float, int64_t)
MS_REG_GPU_KERNEL_TWO(Conv2DBackpropInput,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat16),
ConvGradInputBkwGpuKernelMod, half, int64_t)
} // namespace kernel
} // namespace mindspore

View File

@ -47,8 +47,10 @@ constexpr size_t kWidth2DStrideIndex = 1;
constexpr size_t k2DDilationSize = 4;
constexpr size_t kHeight2DDilationIndex = 2;
constexpr size_t kWidth2DDilationIndex = 3;
constexpr auto StaticInput = 2;
constexpr auto DynamicInput = 3;
template <typename T>
template <typename T, typename S = int64_t>
class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
public:
ConvGradInputBkwGpuKernelMod()
@ -129,21 +131,21 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
bool Init(const CNodePtr &kernel_node) override {
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
kernel_node_ = kernel_node;
InitResource();
(void)CheckParam(kernel_node);
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
auto format_attr = GetAttr<std::string>(kernel_node, "format");
if (format_attr == kOpFormat_NHWC) {
data_format_ = kOpFormat_NHWC;
if (is_dynamic_attr_ && !get_dynamic_attr_value_) {
return true;
}
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
if (CheckNull(dy_shape, filter_shape)) {
return true;
}
auto format_attr = GetAttr<std::string>(kernel_node, "format");
if (format_attr == kOpFormat_NHWC) {
data_format_ = kOpFormat_NHWC;
}
std::vector<size_t> input_shape;
GetInputShape(kernel_node, &input_shape);
if (data_format_ == kOpFormat_NHWC) {
@ -155,7 +157,6 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
CheckTensorSize({input_shape, dy_shape, filter_shape});
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
Set4DDesc(dy_shape, input_shape, filter_shape);
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, group_),
"cudnnSetConvGroupCount failed");
@ -243,6 +244,44 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
"cudnnDestroyTensorDescriptor failed");
}
void ResetResource() noexcept override {
MS_LOG(ERROR) << "czrrr conv2d_grad_input ResetResource";
cudnn_handle_ = nullptr;
w_desc_ = nullptr;
conv_desc_ = nullptr;
dy_desc_ = nullptr;
dx_desc_ = nullptr;
padded_descriptor_ = nullptr;
algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
pad_mode_ = "";
data_format_ = kOpFormat_NCHW;
cudnn_data_type_ = CUDNN_DATA_FLOAT;
compute_format_ = CUDNN_TENSOR_NCHW;
old_height_ = 0;
old_width_ = 0;
pad_height_ = 0;
pad_width_ = 0;
pad_top_ = 0;
pad_left_ = 0;
n_ = 0;
c_ = 0;
stride_.clear();
dilation_.clear();
group_ = 1;
is_null_input_ = false;
kernel_name_ = "Conv2dGradInput";
dy_size_ = 0;
w_size_ = 0;
output_size_ = 0;
padded_size_ = 0;
workspace_size_ = 0;
use_pad_ = 0;
beta_ = 0;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
}
protected:
void InitResource() override {
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
@ -293,14 +332,28 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
private:
void CheckParam(const CNodePtr &kernel_node) {
kernel_node_ = kernel_node;
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
if (input_num != StaticInput && input_num != DynamicInput) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2 or 3, but got " << input_num;
}
if (input_num == DynamicInput) {
is_dynamic_attr_ = true;
}
if (GetDynamicAttrIntValue(kernel_node, kShapeIndex_, &input_shape_)) {
get_dynamic_attr_value_ = true;
}
if (is_dynamic_attr_ && !get_dynamic_attr_value_) {
input_size_list_.push_back(0);
input_size_list_.push_back(0);
output_size_list_.push_back(0);
}
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
}
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
}
void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
constexpr int requested_algo_count = 1;
@ -418,6 +471,10 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
size_t workspace_size_;
bool use_pad_;
float beta_;
bool is_dynamic_attr_{false};
bool get_dynamic_attr_value_{false};
std::vector<int64_t> input_shape_;
static constexpr size_t kShapeIndex_{2};
};
} // namespace kernel
} // namespace mindspore