forked from mindspore-Ecosystem/mindspore
!31281 gpu dynamic shape
Merge pull request !31281 from changzherui/gpu_dynamic_shape
This commit is contained in:
commit
c1a9e313b0
|
@ -792,9 +792,19 @@ const std::set<std::string> DynamicShapeConstInputToAttrCPU = {
|
|||
kCastOpName, kExpandDimsOpName, kEmbeddingLookupOpName, kReduceMinOpName, kReduceMeanOpName,
|
||||
kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName, kReduceSumOpName};
|
||||
|
||||
const std::set<std::string> DynamicShapeConstInputToAttrGPU = {
|
||||
kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName,
|
||||
kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName};
|
||||
const std::set<std::string> DynamicShapeConstInputToAttrGPU = {kCastOpName,
|
||||
kExpandDimsOpName,
|
||||
kReshapeOpName,
|
||||
kEmbeddingLookupOpName,
|
||||
kTransposeOpName,
|
||||
kReduceSumOpName,
|
||||
kReduceMinOpName,
|
||||
kReduceMeanOpName,
|
||||
kReduceMaxOpName,
|
||||
kReduceAllOpName,
|
||||
kReduceAnyOpName,
|
||||
kConcatOpName,
|
||||
kConv2DBackpropFilterOpName};
|
||||
|
||||
// The map between kernel's output and input ref relationship.
|
||||
// Key is the output index while the value is input index which will be used as the reference of output.
|
||||
|
|
|
@ -42,5 +42,104 @@ MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeUIn
|
|||
StridedSliceGradGpuKernelMod, uchar)
|
||||
MS_REG_GPU_KERNEL_ONE(StridedSliceGrad, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
StridedSliceGradGpuKernelMod, bool)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
StridedSliceGradGpuKernelMod, double, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
StridedSliceGradGpuKernelMod, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt64),
|
||||
StridedSliceGradGpuKernelMod, int64_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
StridedSliceGradGpuKernelMod, int, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt16),
|
||||
StridedSliceGradGpuKernelMod, short, int64_t) // NOLINT
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
StridedSliceGradGpuKernelMod, int8_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt64),
|
||||
StridedSliceGradGpuKernelMod, uint64_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt32),
|
||||
StridedSliceGradGpuKernelMod, uint32_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt16),
|
||||
StridedSliceGradGpuKernelMod, uint16_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
StridedSliceGradGpuKernelMod, uchar, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(StridedSliceGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeBool)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeBool),
|
||||
StridedSliceGradGpuKernelMod, bool, int64_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -27,7 +27,8 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
constexpr int DynamicInputNum = 5;
|
||||
template <typename T, typename S = int64_t>
|
||||
class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSliceGpuCommon {
|
||||
public:
|
||||
StridedSliceGradGpuKernelMod() = default;
|
||||
|
@ -49,8 +50,19 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl
|
|||
bool Init(const CNodePtr &kernel_node) override {
|
||||
auto kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
std::vector<int64_t> shapex = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
|
||||
for (auto x : shapex) {
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num == DynamicInputNum) {
|
||||
is_dynamic_attr_ = true;
|
||||
}
|
||||
if (is_dynamic_attr_) {
|
||||
GetDynamicAttrIntValue(kernel_node, kShapexIndex_, &shapex_);
|
||||
GetDynamicAttrIntValue(kernel_node, kBeginIndex_, &begin_);
|
||||
GetDynamicAttrIntValue(kernel_node, kEndIndex_, &end_);
|
||||
GetDynamicAttrIntValue(kernel_node, kStrideIndex_, &strides_);
|
||||
} else {
|
||||
shapex_ = GetAttr<std::vector<int64_t>>(kernel_node, "shapex");
|
||||
}
|
||||
for (auto x : shapex_) {
|
||||
input_shape_.push_back(static_cast<size_t>(x));
|
||||
}
|
||||
if (input_shape_.size() > MAX_DIMS) {
|
||||
|
@ -58,10 +70,19 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl
|
|||
<< ", but got " << input_shape_.size();
|
||||
}
|
||||
|
||||
CollectInfo(kernel_node);
|
||||
CollectInfo(kernel_node, is_dynamic_attr_);
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
void ResetResource() noexcept override {
|
||||
ResetSizeLists();
|
||||
begin_.clear();
|
||||
end_.clear();
|
||||
strides_.clear();
|
||||
input_shape_.clear();
|
||||
output_shape_.clear();
|
||||
is_dynamic_attr_ = false;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
|
@ -77,6 +98,14 @@ class StridedSliceGradGpuKernelMod : public NativeGpuKernelMod, public StridedSl
|
|||
}
|
||||
output_size_list_.push_back(size1);
|
||||
}
|
||||
bool is_null_input_{false};
|
||||
bool is_dynamic_attr_{false};
|
||||
bool get_dynamic_attr_value_{false};
|
||||
static constexpr size_t kShapexIndex_{1};
|
||||
static constexpr size_t kBeginIndex_{2};
|
||||
static constexpr size_t kEndIndex_{3};
|
||||
static constexpr size_t kStrideIndex_{4};
|
||||
std::vector<int64_t> shapex_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace kernel {
|
|||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int)
|
||||
UnsortedSegmentSumGpuKernelMod, double, int32_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
|
@ -31,7 +31,7 @@ MS_REG_GPU_KERNEL_TWO(
|
|||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int)
|
||||
UnsortedSegmentSumGpuKernelMod, float, int32_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
|
@ -41,7 +41,7 @@ MS_REG_GPU_KERNEL_TWO(
|
|||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int)
|
||||
UnsortedSegmentSumGpuKernelMod, half, int32_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
|
@ -51,29 +51,78 @@ MS_REG_GPU_KERNEL_TWO(
|
|||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int, int)
|
||||
UnsortedSegmentSumGpuKernelMod, int32_t, int32_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
UnsortedSegmentSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int, int64_t)
|
||||
|
||||
// Re-registration with 3 inputs - dynamic shape mode - sets of Int64/Int32 num segments types
|
||||
// Re-registration with 3 inputs - dynamic shape mode
|
||||
// Int32
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int32_t, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int32_t, int64_t)
|
||||
// Int64
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int)
|
||||
|
||||
UnsortedSegmentSumGpuKernelMod, double, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
|
@ -81,29 +130,13 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
|||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat64),
|
||||
UnsortedSegmentSumGpuKernelMod, double, int64_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int)
|
||||
|
||||
UnsortedSegmentSumGpuKernelMod, float, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
|
@ -111,29 +144,13 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
|||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
UnsortedSegmentSumGpuKernelMod, float, int64_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int)
|
||||
|
||||
UnsortedSegmentSumGpuKernelMod, half, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
|
@ -141,42 +158,19 @@ MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
|||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
UnsortedSegmentSumGpuKernelMod, half, int64_t)
|
||||
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int, int)
|
||||
|
||||
UnsortedSegmentSumGpuKernelMod, int32_t, int32_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(UnsortedSegmentSum,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
UnsortedSegmentSumGpuKernelMod, int, int64_t)
|
||||
UnsortedSegmentSumGpuKernelMod, int32_t, int64_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -26,5 +26,19 @@ MS_REG_GPU_KERNEL_ONE(
|
|||
Conv2D,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
Conv2dFwdGpuKernelMod, half)
|
||||
MS_REG_GPU_KERNEL_ONE(Conv2D,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
Conv2dFwdGpuKernelMod, float)
|
||||
MS_REG_GPU_KERNEL_ONE(Conv2D,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
Conv2dFwdGpuKernelMod, half)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -228,6 +228,41 @@ class ConvGradFilterBkwGpuKernelMod : public NativeGpuKernelMod {
|
|||
"cudnnDestroyTensorDescriptor failed");
|
||||
}
|
||||
|
||||
void ResetResource() noexcept override {
|
||||
cudnn_handle_ = nullptr;
|
||||
dw_desc_ = nullptr;
|
||||
conv_desc_ = nullptr;
|
||||
dy_desc_ = nullptr;
|
||||
x_desc_ = nullptr;
|
||||
padded_descriptor_ = nullptr;
|
||||
algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
|
||||
pad_mode_ = "";
|
||||
data_format_ = kOpFormat_NCHW;
|
||||
format_attr_ = kOpFormat_NCHW;
|
||||
cudnn_data_type_ = CUDNN_DATA_FLOAT;
|
||||
compute_format_ = CUDNN_TENSOR_NCHW;
|
||||
old_height_ = 0;
|
||||
old_width_ = 0;
|
||||
pad_height_ = 0;
|
||||
pad_width_ = 0;
|
||||
pad_top_ = 0;
|
||||
pad_left_ = 0;
|
||||
n_ = 0;
|
||||
c_ = 0;
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv2dGradFilter";
|
||||
input_size_ = 0;
|
||||
dy_size_ = 0;
|
||||
output_size_ = 0;
|
||||
padded_size_ = 0;
|
||||
workspace_size_ = 0;
|
||||
use_pad_ = 0;
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitResource() override {
|
||||
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
|
|
|
@ -26,5 +26,19 @@ MS_REG_GPU_KERNEL_ONE(
|
|||
Conv2DBackpropInput,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
ConvGradInputBkwGpuKernelMod, half)
|
||||
MS_REG_GPU_KERNEL_TWO(Conv2DBackpropInput,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ConvGradInputBkwGpuKernelMod, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(Conv2DBackpropInput,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ConvGradInputBkwGpuKernelMod, half, int64_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -47,8 +47,10 @@ constexpr size_t kWidth2DStrideIndex = 1;
|
|||
constexpr size_t k2DDilationSize = 4;
|
||||
constexpr size_t kHeight2DDilationIndex = 2;
|
||||
constexpr size_t kWidth2DDilationIndex = 3;
|
||||
constexpr auto StaticInput = 2;
|
||||
constexpr auto DynamicInput = 3;
|
||||
|
||||
template <typename T>
|
||||
template <typename T, typename S = int64_t>
|
||||
class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
|
||||
public:
|
||||
ConvGradInputBkwGpuKernelMod()
|
||||
|
@ -129,21 +131,21 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
|
|||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
InitResource();
|
||||
(void)CheckParam(kernel_node);
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
auto format_attr = GetAttr<std::string>(kernel_node, "format");
|
||||
if (format_attr == kOpFormat_NHWC) {
|
||||
data_format_ = kOpFormat_NHWC;
|
||||
if (is_dynamic_attr_ && !get_dynamic_attr_value_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto dy_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
auto filter_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
if (CheckNull(dy_shape, filter_shape)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto format_attr = GetAttr<std::string>(kernel_node, "format");
|
||||
if (format_attr == kOpFormat_NHWC) {
|
||||
data_format_ = kOpFormat_NHWC;
|
||||
}
|
||||
std::vector<size_t> input_shape;
|
||||
GetInputShape(kernel_node, &input_shape);
|
||||
if (data_format_ == kOpFormat_NHWC) {
|
||||
|
@ -155,7 +157,6 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
|
|||
CheckTensorSize({input_shape, dy_shape, filter_shape});
|
||||
SetNCHW(input_shape, &n_, &c_, &old_height_, &old_width_, data_format_);
|
||||
Set4DDesc(dy_shape, input_shape, filter_shape);
|
||||
|
||||
group_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "group"));
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetConvolutionGroupCount(conv_desc_, group_),
|
||||
"cudnnSetConvGroupCount failed");
|
||||
|
@ -243,6 +244,44 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
|
|||
"cudnnDestroyTensorDescriptor failed");
|
||||
}
|
||||
|
||||
void ResetResource() noexcept override {
|
||||
MS_LOG(ERROR) << "czrrr conv2d_grad_input ResetResource";
|
||||
cudnn_handle_ = nullptr;
|
||||
w_desc_ = nullptr;
|
||||
conv_desc_ = nullptr;
|
||||
dy_desc_ = nullptr;
|
||||
dx_desc_ = nullptr;
|
||||
padded_descriptor_ = nullptr;
|
||||
algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
|
||||
pad_mode_ = "";
|
||||
data_format_ = kOpFormat_NCHW;
|
||||
cudnn_data_type_ = CUDNN_DATA_FLOAT;
|
||||
compute_format_ = CUDNN_TENSOR_NCHW;
|
||||
old_height_ = 0;
|
||||
old_width_ = 0;
|
||||
pad_height_ = 0;
|
||||
pad_width_ = 0;
|
||||
pad_top_ = 0;
|
||||
pad_left_ = 0;
|
||||
n_ = 0;
|
||||
c_ = 0;
|
||||
stride_.clear();
|
||||
dilation_.clear();
|
||||
group_ = 1;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Conv2dGradInput";
|
||||
dy_size_ = 0;
|
||||
w_size_ = 0;
|
||||
output_size_ = 0;
|
||||
padded_size_ = 0;
|
||||
workspace_size_ = 0;
|
||||
use_pad_ = 0;
|
||||
beta_ = 0;
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitResource() override {
|
||||
cudnn_handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
|
@ -293,14 +332,28 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
|
|||
|
||||
private:
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2, but got " << input_num;
|
||||
if (input_num != StaticInput && input_num != DynamicInput) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 2 or 3, but got " << input_num;
|
||||
}
|
||||
if (input_num == DynamicInput) {
|
||||
is_dynamic_attr_ = true;
|
||||
}
|
||||
if (GetDynamicAttrIntValue(kernel_node, kShapeIndex_, &input_shape_)) {
|
||||
get_dynamic_attr_value_ = true;
|
||||
}
|
||||
if (is_dynamic_attr_ && !get_dynamic_attr_value_) {
|
||||
input_size_list_.push_back(0);
|
||||
input_size_list_.push_back(0);
|
||||
output_size_list_.push_back(0);
|
||||
}
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs should be 1, but got " << output_num;
|
||||
}
|
||||
cudnn_data_type_ = GetCudnnDataType(TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0)));
|
||||
data_format_ = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
}
|
||||
void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
|
||||
constexpr int requested_algo_count = 1;
|
||||
|
@ -418,6 +471,10 @@ class ConvGradInputBkwGpuKernelMod : public NativeGpuKernelMod {
|
|||
size_t workspace_size_;
|
||||
bool use_pad_;
|
||||
float beta_;
|
||||
bool is_dynamic_attr_{false};
|
||||
bool get_dynamic_attr_value_{false};
|
||||
std::vector<int64_t> input_shape_;
|
||||
static constexpr size_t kShapeIndex_{2};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
Loading…
Reference in New Issue