diff --git a/docs/api/api_python/ops/mindspore.ops.PadV3.rst b/docs/api/api_python/ops/mindspore.ops.PadV3.rst index acf800fd8b2..6c47d0a6cf3 100644 --- a/docs/api/api_python/ops/mindspore.ops.PadV3.rst +++ b/docs/api/api_python/ops/mindspore.ops.PadV3.rst @@ -6,7 +6,7 @@ mindspore.ops.PadV3 根据参数 `mode` 和 `paddings_contiguous` 对输入进行填充。 参数: - - **mode** (str,可选) - 填充模式,支持"constant" 、"reflect" 和 "edge"。默认值:"constant"。 + - **mode** (str,可选) - 填充模式,支持"constant" 、"reflect"、"edge" 和 "circular"。默认值:"constant"。 - **paddings_contiguous** (bool,可选) - 是否连续填充。如果为True, `paddings` 格式为[begin0, end0, begin1, end1, ...],如果为False,`paddings` 格式为[begin0, begin1, ..., end1, end2, ...]。默认值:True。 输入: @@ -23,11 +23,12 @@ mindspore.ops.PadV3 - **ValueError** - `mode` 不是string类型或者不在支持的列表里。 - **ValueError** - `mode` 是"constant"的同时 `paddings` 元素个数不是偶数。 - **ValueError** - `mode` 是"constant"的同时 `paddings` 元素个数大于输入维度乘以2。 - - **ValueError** - `mode` 是"edge"或"reflect"的同时 `paddings` 元素个数不是2、4或6。 - - **ValueError** - `mode` 是"edge"或"reflect", `x` 的维度是3, `paddings` 元素个数不是2。 - - **ValueError** - `mode` 是"edge"或"reflect", `x` 的维度是4, `paddings` 元素个数不是4。 - - **ValueError** - `mode` 是"edge"或"reflect"的同时 `x` 的维度小于3。 - - **ValueError** - `mode` 是"edge"的同时 `x` 的维度大于5。 + - **ValueError** - `mode` 是"edge"、"reflect"或"circular"的时 `paddings` 元素个数不是2、4或6。 + - **ValueError** - `mode` 是"edge"、"reflect"或"circular", `x` 的维度是3, `paddings` 元素个数不是2。 + - **ValueError** - `mode` 是"edge"、"reflect"或"circular", `x` 的维度是4, `paddings` 元素个数不是4。 + - **ValueError** - `mode` 是"circular", `x` 的维度是5, `paddings` 元素个数不是6。 + - **ValueError** - `mode` 是"edge"、"reflect"或"circular"的同时 `x` 的维度小于3。 + - **ValueError** - `mode` 是"edge"或"circular"的时 `x` 的维度大于5。 - **ValueError** - `mode` 是"reflect"的同时 `x` 的维度大于4。 - **ValueError** - `mode` 是"reflect"的同时填充值大于对应 `x` 的维度。 - **ValueError** - 填充之后,输出shape数不大于零。 diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.cc index 784f8644d07..d19387e46d7 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.cc @@ -16,6 +16,7 @@ #include "plugin/device/cpu/kernel/pad_v3_cpu_kernel.h" #include "plugin/device/cpu/hal/device/cpu_device_address.h" +#include "mindspore/core/ops/op_name.h" #include "mindspore/core/ops/pad_v3.h" namespace mindspore { @@ -25,16 +26,13 @@ constexpr auto kPadV3 = "PadV3"; constexpr const size_t kConstantInputsNum = 3; constexpr const size_t kOtherInputsNum = 2; constexpr const size_t kOutputsNum = 1; -constexpr int64_t kInput3D = 3; -constexpr int64_t kInput4D = 4; -constexpr int64_t kInput5D = 5; constexpr int64_t kPadding1D = 2; constexpr int64_t kPadding2D = 4; constexpr int64_t kPadding3D = 6; constexpr int64_t kNum2 = 2; constexpr int64_t kNum3 = 3; constexpr int64_t kNum4 = 4; -const std::vector mode_list = {"constant", "reflect", "edge"}; +const std::vector mode_list = {ops::kConstant, ops::kReflect, ops::kEdge, ops::kCircular}; } // namespace bool PadV3CpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, @@ -208,11 +206,12 @@ void PadV3CpuKernelMod::OtherModeCompute1D(T *input_ptr, T *output_ptr, int64_t int64_t nplane = 0; int64_t input_w = input_shape_[kNum2]; int64_t output_w = output_shape_.end()[-1]; - int64_t pad_l = paddings_[0]; + int64_t pad_l = paddings_[kIndex0]; + int64_t pad_r = paddings_[kIndex1]; int64_t i_start_x = std::max(int64_t(0), -pad_l); int64_t o_start_x = std::max(int64_t(0), pad_l); for (int64_t j = 0; j < output_w; ++j) { - auto ip_x = IndexCalculate(pad_l, j, input_w, o_start_x, i_start_x); + auto ip_x = IndexCalculate(pad_l, pad_r, j, input_w, o_start_x, i_start_x); T *dest_p = output_ptr + p * output_w * (nplane + 1) + j; T *src_p = input_ptr + +p * input_w * (nplane + 1) + ip_x; *dest_p = *src_p; @@ -221,21 +220,23 @@ void PadV3CpuKernelMod::OtherModeCompute1D(T *input_ptr, T *output_ptr, int64_t template void PadV3CpuKernelMod::OtherModeCompute2D(T *input_ptr, T *output_ptr, int64_t p) const { - int64_t pad_l = paddings_[0]; - int64_t pad_t = paddings_[kNum2]; + int64_t pad_l = paddings_[kIndex0]; + int64_t pad_r = paddings_[kIndex1]; + int64_t pad_t = paddings_[kIndex2]; + int64_t pad_d = paddings_[kIndex3]; int64_t nplane = 0; - int64_t input_h = input_shape_[kNum2]; - int64_t input_w = input_shape_[kNum3]; - int64_t output_h = input_h + pad_t + paddings_[kNum3]; - int64_t output_w = input_w + pad_l + paddings_[1]; + int64_t input_h = input_shape_[kIndex2]; + int64_t input_w = input_shape_[kIndex3]; + int64_t output_h = input_h + pad_t + paddings_[kIndex3]; + int64_t output_w = input_w + pad_l + paddings_[kIndex1]; int64_t i_start_x = std::max(int64_t(0), -pad_l); int64_t i_start_y = std::max(int64_t(0), -pad_t); int64_t o_start_x = std::max(int64_t(0), pad_l); int64_t o_start_y = std::max(int64_t(0), pad_t); for (int64_t i = 0; i < output_h; ++i) { for (int64_t j = 0; j < output_w; ++j) { - auto ip_x = IndexCalculate(pad_l, j, input_w, o_start_x, i_start_x); - auto ip_y = IndexCalculate(pad_t, i, input_h, o_start_y, i_start_y); + auto ip_x = IndexCalculate(pad_l, pad_r, j, input_w, o_start_x, i_start_x); + auto ip_y = IndexCalculate(pad_t, pad_d, i, input_h, o_start_y, i_start_y); T *dest_p = output_ptr + p * output_w * output_h * (nplane + 1) + i * output_w + j; T *src_p = input_ptr + p * input_w * input_h * (nplane + 1) + ip_y * input_w + ip_x; *dest_p = *src_p; @@ -245,9 +246,12 @@ void PadV3CpuKernelMod::OtherModeCompute2D(T *input_ptr, T *output_ptr, int64_t template void PadV3CpuKernelMod::OtherModeCompute3D(T *input_ptr, T *output_ptr, int64_t p) const { - int64_t pad_l = paddings_[0]; - int64_t pad_t = paddings_[kNum2]; - int64_t pad_f = paddings_[kNum4]; + int64_t pad_l = paddings_[kIndex0]; + int64_t pad_r = paddings_[kIndex1]; + int64_t pad_t = paddings_[kIndex2]; + int64_t pad_d = paddings_[kIndex3]; + int64_t pad_f = paddings_[kIndex4]; + int64_t pad_b = paddings_[kIndex5]; int64_t nplane = 0; int64_t input_d = input_shape_[kNum2]; int64_t input_h = input_shape_[kNum3]; @@ -264,9 +268,9 @@ void PadV3CpuKernelMod::OtherModeCompute3D(T *input_ptr, T *output_ptr, int64_t for (int64_t k = 0; k < output_d; ++k) { for (int64_t j = 0; j < output_h; ++j) { for (int64_t i = 0; i < output_w; ++i) { - auto ip_x = IndexCalculate(pad_l, i, input_w, o_start_x, i_start_x); - auto ip_y = IndexCalculate(pad_t, j, input_h, o_start_y, i_start_y); - auto ip_z = IndexCalculate(pad_f, k, input_d, o_start_z, i_start_z); + auto ip_x = IndexCalculate(pad_l, pad_r, i, input_w, o_start_x, i_start_x); + auto ip_y = IndexCalculate(pad_t, pad_d, j, input_h, o_start_y, i_start_y); + auto ip_z = IndexCalculate(pad_f, pad_b, k, input_d, o_start_z, i_start_z); T *dest_p = output_ptr + p * output_w * output_h * output_d * (nplane + 1) + k * output_w * output_h + j * output_w + i; T *src_p = @@ -277,22 +281,26 @@ void PadV3CpuKernelMod::OtherModeCompute3D(T *input_ptr, T *output_ptr, int64_t } } -int64_t PadV3CpuKernelMod::IndexCalculate(int64_t pad_value, int64_t now, int64_t input_value, int64_t o_start, - int64_t i_start) const { +int64_t PadV3CpuKernelMod::IndexCalculate(int64_t pad_value, int64_t pad_end, int64_t now, int64_t input_value, + int64_t o_start, int64_t i_start) const { int64_t ip = 0; if (now < pad_value) { - if (mode_ == "reflect") { + if (mode_ == ops::kReflect) { ip = pad_value + pad_value - now; - } else if (mode_ == "edge") { + } else if (mode_ == ops::kEdge) { ip = pad_value; + } else if (mode_ == ops::kCircular) { + ip = input_value + now + std::min(int64_t(0), pad_end); } } else if (now >= pad_value && now < input_value + pad_value) { ip = now; } else { - if (mode_ == "reflect") { + if (mode_ == ops::kReflect) { ip = (input_value + pad_value - 1) + (input_value + pad_value - 1) - now; - } else if (mode_ == "edge") { + } else if (mode_ == ops::kEdge) { ip = input_value + pad_value - 1; + } else if (mode_ == ops::kCircular) { + ip = now - input_value - std::min(int64_t(0), pad_value); } } ip = ip - o_start + i_start; @@ -307,7 +315,7 @@ bool PadV3CpuKernelMod::LaunchKernel(const std::vector &inputs, cons } auto input_ptr = static_cast(inputs[0]->addr); auto output_ptr = static_cast(outputs[0]->addr); - if (mode_ == "constant") { + if (mode_ == ops::kConstant) { T constant_values = *(static_cast(inputs[2]->addr)); for (int64_t i = 0; i < input_dim_ / kNum2; ++i) { int64_t u = paddings_[i * kNum2]; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.h index 6a603a7d061..950ff8c344d 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_cpu_kernel.h @@ -77,7 +77,8 @@ class PadV3CpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper void OtherModeCompute3D(T *input_ptr, T *output_ptr, int64_t p) const; - int64_t IndexCalculate(int64_t pad_value, int64_t now, int64_t input_value, int64_t o_start, int64_t i_start) const; + int64_t IndexCalculate(int64_t pad_value, int64_t pad_end, int64_t now, int64_t input_value, int64_t o_start, + int64_t i_start) const; bool paddings_contiguous_; int64_t parallelSliceNum_{1}; diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.cc index 434529da538..5ce28ee61cd 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.cc @@ -16,6 +16,7 @@ #include "plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.h" #include "plugin/device/cpu/hal/device/cpu_device_address.h" +#include "mindspore/core/ops/op_name.h" #include "mindspore/core/ops/grad/pad_v3_grad.h" namespace mindspore { @@ -30,6 +31,9 @@ constexpr int64_t k1DNum = 2; constexpr int64_t kpad_l = 0; constexpr int64_t kpad_t = 2; constexpr int64_t kpad_f = 4; +constexpr int64_t kpad_r = 1; +constexpr int64_t kpad_d = 3; +constexpr int64_t kpad_b = 5; constexpr int64_t kwidth = 1; constexpr int64_t kheight = 2; constexpr int64_t kchannel = 3; @@ -40,7 +44,7 @@ constexpr int64_t k2Num = 2; constexpr int64_t padding_pos_2 = 2; constexpr int64_t padding_pos_3 = 3; constexpr int64_t padding_pos_4 = 4; -const std::vector mode_list = {"reflect", "edge"}; +const std::vector mode_list = {ops::kReflect, ops::kEdge, ops::kCircular}; } // namespace bool PadV3GradCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, @@ -55,7 +59,7 @@ bool PadV3GradCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std mode_ = kernel_ptr->get_mode(); const bool is_mode_available = std::find(mode_list.begin(), mode_list.end(), mode_) != mode_list.end(); if (is_mode_available == false) { - MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'mode' should be 'constant', 'reflect' or 'edge', but got " + MS_LOG(ERROR) << "For '" << kernel_name_ << "', the 'mode' should be 'reflect', 'edge' or 'circular', but got " << mode_; return false; } @@ -148,7 +152,7 @@ void PadV3GradCpuKernelMod::PadV3GradCompute(T *input, T *output, int64_t p) con template void PadV3GradCpuKernelMod::PadV3GradCompute1D(T *input, T *output, int64_t p) const { for (int j = 0; j < input_w_; j++) { - auto ip_x = IndexCalculate(pad_l_, j, output_w_, o_start_x_, i_start_x_); + auto ip_x = IndexCalculate(pad_l_, pad_r_, j, output_w_, o_start_x_, i_start_x_); T *src_p = input + p * input_w_ + j; T *dest_p = output + p * output_w_ + ip_x; *dest_p += *src_p; @@ -158,8 +162,8 @@ void PadV3GradCpuKernelMod::PadV3GradCompute1D(T *input, T *output, int64_t p) c template void PadV3GradCpuKernelMod::PadV3GradCompute2D(T *input, T *output, int64_t p, int64_t i) const { for (int j = 0; j < input_w_; j++) { - auto ip_x = IndexCalculate(pad_l_, j, output_w_, o_start_x_, i_start_x_); - auto ip_y = IndexCalculate(pad_t_, i, output_h_, o_start_y_, i_start_y_); + auto ip_x = IndexCalculate(pad_l_, pad_r_, j, output_w_, o_start_x_, i_start_x_); + auto ip_y = IndexCalculate(pad_t_, pad_d_, i, output_h_, o_start_y_, i_start_y_); T *src_p = input + p * input_w_ * input_h_ + i * input_w_ + j; T *dest_p = output + p * output_w_ * output_h_ + ip_y * output_w_ + ip_x; *dest_p += *src_p; @@ -170,9 +174,9 @@ template void PadV3GradCpuKernelMod::PadV3GradCompute3D(T *input, T *output, int64_t p, int64_t z) const { for (int i = 0; i < input_h_; i++) { for (int j = 0; j < input_w_; j++) { - auto ip_x = IndexCalculate(pad_l_, j, output_w_, o_start_x_, i_start_x_); - auto ip_y = IndexCalculate(pad_t_, i, output_h_, o_start_y_, i_start_y_); - auto ip_z = IndexCalculate(pad_f_, z, output_c_, o_start_z_, i_start_z_); + auto ip_x = IndexCalculate(pad_l_, pad_r_, j, output_w_, o_start_x_, i_start_x_); + auto ip_y = IndexCalculate(pad_t_, pad_d_, i, output_h_, o_start_y_, i_start_y_); + auto ip_z = IndexCalculate(pad_f_, pad_b_, z, output_c_, o_start_z_, i_start_z_); T *src_p = input + p * input_w_ * input_h_ * input_c_ + z * input_w_ * input_h_ + i * input_w_ + j; T *dest_p = output + p * output_w_ * output_h_ * output_c_ + ip_z * output_w_ * output_h_ + ip_y * output_w_ + ip_x; @@ -181,22 +185,26 @@ void PadV3GradCpuKernelMod::PadV3GradCompute3D(T *input, T *output, int64_t p, i } } -int64_t PadV3GradCpuKernelMod::IndexCalculate(int64_t pad_value, int64_t now, int64_t output_value, int64_t o_start, - int64_t i_start) const { +int64_t PadV3GradCpuKernelMod::IndexCalculate(int64_t pad_value, int64_t pad_end, int64_t now, int64_t output_value, + int64_t o_start, int64_t i_start) const { int64_t ip = 0; if (now < pad_value) { - if (mode_ == "reflect") { + if (mode_ == ops::kReflect) { ip = pad_value + pad_value - now; - } else if (mode_ == "edge") { + } else if (mode_ == ops::kEdge) { ip = pad_value; + } else if (mode_ == ops::kCircular) { + ip = output_value + now + std::min(int64_t(0), pad_end); } } else if (now >= pad_value && now < output_value + pad_value) { ip = now; } else { - if (mode_ == "reflect") { + if (mode_ == ops::kReflect) { ip = (output_value + pad_value - 1) + (output_value + pad_value - 1) - now; - } else if (mode_ == "edge") { + } else if (mode_ == ops::kEdge) { ip = output_value + pad_value - 1; + } else if (mode_ == ops::kCircular) { + ip = now - output_value - std::min(int64_t(0), pad_value); } } ip = ip - o_start + i_start; @@ -209,7 +217,6 @@ bool PadV3GradCpuKernelMod::LaunchKernel(const std::vector &inputs, if (!GetPaddings(inputs)) { MS_LOG(EXCEPTION) << "get paddings failed"; } - output_w_ = output_shape_.end()[-kwidth]; output_h_ = output_shape_.end()[-kheight]; output_c_ = output_shape_.end()[-kchannel]; @@ -227,6 +234,9 @@ bool PadV3GradCpuKernelMod::LaunchKernel(const std::vector &inputs, pad_l_ = paddings_[kpad_l]; pad_t_ = paddings_[kpad_t]; pad_f_ = paddings_[kpad_f]; + pad_r_ = paddings_[kpad_r]; + pad_d_ = paddings_[kpad_d]; + pad_b_ = paddings_[kpad_b]; int64_t output_num_ = 1; for (int64_t i = 0; i < input_dim_; i++) { diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.h index 9561ee9579e..46b23527c94 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pad_v3_grad_cpu_kernel.h @@ -73,7 +73,8 @@ class PadV3GradCpuKernelMod : public NativeCpuKernelMod { template void PadV3GradCompute3D(T *input, T *output, int64_t p, int64_t z) const; - int64_t IndexCalculate(int64_t pad_value, int64_t now, int64_t output_value, int64_t o_start, int64_t i_start) const; + int64_t IndexCalculate(int64_t pad_value, int64_t pad_end, int64_t now, int64_t output_value, int64_t o_start, + int64_t i_start) const; using SelectFunc = std::function &, @@ -103,6 +104,9 @@ class PadV3GradCpuKernelMod : public NativeCpuKernelMod { int64_t pad_l_{0}; int64_t pad_t_{0}; int64_t pad_f_{0}; + int64_t pad_r_{0}; + int64_t pad_d_{0}; + int64_t pad_b_{0}; int64_t parallelSliceNum_{1}; int64_t paddings_num_{0}; int64_t input_dim_{0}; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_grad_helper.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_grad_helper.h index b915d39e5ff..70d834b08f9 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_grad_helper.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_grad_helper.h @@ -104,7 +104,6 @@ class PadV3GradHelperGpuKernel : public GpuKernelHelperBase { if (flag != 0) { return flag; } - // call cuda kernel if (mode_ == ops::kReflect) { CalReflectPadGrad3d(input_size_, input_ptr, input_shape_5d_[0], input_shape_5d_[1], input_shape_5d_[2], @@ -116,6 +115,13 @@ class PadV3GradHelperGpuKernel : public GpuKernelHelperBase { input_shape_5d_[3], input_shape_5d_[4], output_shape_5d_[2], output_shape_5d_[3], output_shape_5d_[4], paddings_3d_[0].first, paddings_3d_[1].first, paddings_3d_[2].first, output_ptr, device_id_, reinterpret_cast(cuda_stream)); + } else if (mode_ == ops::kCircular) { + CalCircularPadGrad3d(input_size_, input_ptr, input_shape_5d_[kIndex2], input_shape_5d_[kIndex3], + input_shape_5d_[kIndex4], output_shape_5d_[kIndex2], output_shape_5d_[kIndex3], + output_shape_5d_[kIndex4], paddings_3d_[kIndex0].first, paddings_3d_[kIndex1].first, + paddings_3d_[kIndex2].first, paddings_3d_[kIndex0].second, paddings_3d_[kIndex1].second, + paddings_3d_[kIndex2].second, output_ptr, device_id_, + reinterpret_cast(cuda_stream)); } return 0; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_helper.h b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_helper.h index 5d12f7c6533..13e3ca93f80 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_helper.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_class/pad_v3_helper.h @@ -132,6 +132,13 @@ class PadV3HelperGpuKernel : public GpuKernelHelperBase { input_shape_5d_[3], input_shape_5d_[4], output_shape_5d_[2], output_shape_5d_[3], output_shape_5d_[4], paddings_3d_[0].first, paddings_3d_[1].first, paddings_3d_[2].first, output_ptr, device_id_, reinterpret_cast(cuda_stream)); + } else if (mode_ == ops::kCircular) { + CalCircularPad3d(output_size_, input_ptr, input_shape_5d_[kIndex2], input_shape_5d_[kIndex3], + input_shape_5d_[kIndex4], output_shape_5d_[kIndex2], output_shape_5d_[kIndex3], + output_shape_5d_[kIndex4], paddings_3d_[kIndex0].first, paddings_3d_[kIndex1].first, + paddings_3d_[kIndex2].first, paddings_3d_[kIndex0].second, paddings_3d_[kIndex1].second, + paddings_3d_[kIndex2].second, output_ptr, device_id_, + reinterpret_cast(cuda_stream)); } return 0; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cu index 5fd9805f45e..d1cd809301f 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cu @@ -68,6 +68,87 @@ __global__ void ConstantPadGrad3d(const size_t size, const T *dy, const int64_t } } +template +__global__ void CircularPad3d(const size_t size, const T *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, + const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, + const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, T *output) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { + int64_t nc = pos / padded_width; + const int64_t out_w = pos % padded_width; + const int64_t out_h = nc % padded_height; + nc /= padded_height; + const int64_t out_d = nc % padded_depth; + nc /= padded_depth; + + int in_d = ((out_d - pad_head) % old_depth + old_depth) % old_depth; + int in_h = ((out_h - pad_top) % old_height + old_height) % old_height; + int in_w = ((out_w - pad_left) % old_width + old_width) % old_width; + if (out_d < pad_head) { + in_d = (in_d + imin(0, pad_back) + old_depth) % old_depth; + } + if (out_d >= old_depth + pad_head) { + in_d = (in_d + imax(0, -pad_head) + old_depth) % old_depth; + } + if (out_h < pad_top) { + in_h = (in_h + imin(0, pad_down) + old_height) % old_height; + } + if (out_h >= old_height + pad_top) { + in_h = (in_h + imax(0, -pad_top) + old_height) % old_height; + } + if (out_w < pad_left) { + in_w = (in_w + imin(0, pad_right) + old_width) % old_width; + } + if (out_w >= old_width + pad_left) { + in_w = (in_w + imax(0, -pad_left) + old_width) % old_width; + } + output[pos] = input[(nc * old_depth * old_height + in_d * old_height + in_h) * old_width + in_w]; + } +} + +template +__global__ void CircularPadGrad3d(const size_t size, const T *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, + const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, + const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, T *output) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < (size); pos += blockDim.x * gridDim.x) { + int nc = pos / old_width; + const int out_w = pos % old_width; + const int out_h = nc % old_height; + nc /= old_height; + const int out_d = nc % old_depth; + nc /= old_depth; + + int in_d = ((out_d - pad_head) % padded_depth + padded_depth) % padded_depth; + int in_h = ((out_h - pad_top) % padded_height + padded_height) % padded_height; + int in_w = ((out_w - pad_left) % padded_width + padded_width) % padded_width; + + if (out_d < pad_head) { + in_d = (in_d + imin(0, pad_back) + padded_depth) % padded_depth; + } + if (out_d >= padded_depth + pad_head) { + in_d = (in_d + imax(0, -pad_head) + padded_depth) % padded_depth; + } + if (out_h < pad_top) { + in_h = (in_h + imin(0, pad_down) + padded_height) % padded_height; + } + if (out_h >= padded_height + pad_top) { + in_h = (in_h + imax(0, -pad_top) + padded_height) % padded_height; + } + if (out_w < pad_left) { + in_w = (in_w + imin(0, pad_right) + padded_width) % padded_width; + } + if (out_w >= padded_width + pad_left) { + in_w = (in_w + imax(0, -pad_left) + padded_width) % padded_width; + } + + int index = (nc * padded_depth * padded_height + in_d * padded_height + in_h) * padded_width + in_w; + MsAtomicAdd(&output[index], input[pos]); + } +} + template __global__ void ReflectPad3d(const size_t size, const T *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -221,6 +302,28 @@ void CalConstantPadGrad3d(const size_t size, const T *dy, const int64_t num, con padded_width, padded_dhw, padded_hw, pad_head, pad_top, pad_left, dx); } +template +void CalCircularPad3d(const size_t size, const T *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, + const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, + const int64_t pad_back, const int64_t pad_down, const int64_t pad_right, T *output, + const uint32_t &device_id, cudaStream_t cuda_stream) { + CircularPad3d<<>>( + size, input, old_depth, old_height, old_width, padded_depth, padded_height, padded_width, pad_head, pad_top, + pad_left, pad_back, pad_down, pad_right, output); +} + +template +void CalCircularPadGrad3d(const size_t size, const T *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, + const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, + const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, T *output, const uint32_t &device_id, cudaStream_t cuda_stream) { + CircularPadGrad3d<<>>( + size, input, old_depth, old_height, old_width, padded_depth, padded_height, padded_width, pad_head, pad_top, + pad_left, pad_back, pad_down, pad_right, output); +} + template void CalReflectPad3d(const size_t size, const T *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -286,6 +389,13 @@ template CUDA_LIB_EXPORT void CalConstantPad3d( const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const double *pad_value, double *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const double *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, double *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const double *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -316,11 +426,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, double const int64_t pad_top, const int64_t pad_left, double *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const double *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, double *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const float *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const float *pad_value, float *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const float *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, float *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const float *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -351,11 +474,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, float * const int64_t pad_top, const int64_t pad_left, float *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const float *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, float *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const half *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const half *pad_value, half *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const half *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, half *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const half *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -386,11 +522,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, half *in const int64_t pad_top, const int64_t pad_left, half *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const half *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, half *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const int64_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int64_t *pad_value, int64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const int64_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const int64_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -419,11 +568,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, int64 const int64_t pad_top, const int64_t pad_left, int64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const int64_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const int32_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int32_t *pad_value, int32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const int32_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const int32_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -452,11 +614,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, int32 const int64_t pad_top, const int64_t pad_left, int32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const int32_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const int16_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int16_t *pad_value, int16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const int16_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const int16_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -485,11 +660,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, int16 const int64_t pad_top, const int64_t pad_left, int16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const int16_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const int8_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int8_t *pad_value, int8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const int8_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const int8_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -520,11 +708,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, int8_t const int64_t pad_top, const int64_t pad_left, int8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const int8_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, int8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const uint64_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const uint64_t *pad_value, uint64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const uint64_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const uint64_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -551,11 +752,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d( const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, uint64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const uint64_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint64_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const uint32_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const uint32_t *pad_value, uint32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const uint32_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const uint32_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -582,11 +796,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d( const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, uint32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const uint32_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint32_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const uint16_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const uint16_t *pad_value, uint16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const uint16_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const uint16_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -613,11 +840,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d( const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, uint16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const uint16_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint16_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d( const size_t size, const uint8_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const uint8_t *pad_value, uint8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d( + const size_t size, const uint8_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d(const size_t size, const uint8_t *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -646,11 +886,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, uint8 const int64_t pad_top, const int64_t pad_left, uint8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d( + const size_t size, const uint8_t *input, const int64_t old_depth, const int64_t old_height, const int64_t old_width, + const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, uint8_t *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d>( const size_t size, const Complex *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const Complex *pad_value, Complex *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d>( + const size_t size, const Complex *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, + const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, Complex *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d>( const size_t size, const Complex *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, @@ -673,11 +926,24 @@ template CUDA_LIB_EXPORT void CalEdgePadGrad3d>( const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, Complex *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template CUDA_LIB_EXPORT void CalCircularPadGrad3d>( + const size_t size, const Complex *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, + const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, Complex *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d>( const size_t size, const Complex *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const Complex *pad_value, Complex *output, const uint32_t &device_id, cudaStream_t cuda_stream); + +template CUDA_LIB_EXPORT void CalCircularPad3d>( + const size_t size, const Complex *input, const int64_t old_depth, const int64_t old_height, + const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, const int64_t padded_width, + const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, const int64_t pad_down, + const int64_t pad_right, Complex *output, const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalReflectPad3d>( const size_t size, const Complex *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, const int64_t padded_depth, const int64_t padded_height, diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cuh b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cuh index 52642649f16..960a5b4370c 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cuh +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/pad_v3_impl.cuh @@ -19,6 +19,14 @@ #include #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" +template +CUDA_LIB_EXPORT void CalCircularPad3d(const size_t size, const T *input, const int64_t old_depth, + const int64_t old_height, const int64_t old_width, const int64_t padded_depth, + const int64_t padded_height, const int64_t padded_width, const int64_t pad_head, + const int64_t pad_top, const int64_t pad_left, const int64_t pad_back, + const int64_t pad_down, const int64_t pad_right, T *output, + const uint32_t &device_id, cudaStream_t cuda_stream); + template CUDA_LIB_EXPORT void CalConstantPad3d(const size_t size, const T *input, const int64_t num, const int64_t channels, const int64_t old_depth, const int64_t old_height, const int64_t old_width, @@ -66,4 +74,12 @@ CUDA_LIB_EXPORT void CalEdgePadGrad3d(const size_t size, T *input, const int64_t const int64_t pad_left, T *output, const uint32_t &device_id, cudaStream_t cuda_stream); +template +CUDA_LIB_EXPORT void CalCircularPadGrad3d(const size_t size, const T *input, const int64_t old_depth, + const int64_t old_height, const int64_t old_width, const int64_t padded_depth, + const int64_t padded_height, const int64_t padded_width, + const int64_t pad_head, const int64_t pad_top, const int64_t pad_left, + const int64_t pad_back, const int64_t pad_down, const int64_t pad_right, + T *output, const uint32_t &device_id, cudaStream_t cuda_stream); + #endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_PAD_V3_IMPL_CUH_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_v3_grad_gpu_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_v3_grad_gpu_kernel.cc index 316892ac4f5..ddfcb5c45ff 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_v3_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/pad_v3_grad_gpu_kernel.cc @@ -18,7 +18,7 @@ namespace mindspore { namespace kernel { namespace { -const std::vector mode_list = {ops::kReflect, ops::kEdge}; +const std::vector mode_list = {ops::kReflect, ops::kEdge, ops::kCircular}; template std::unique_ptr CreatePadV3GradKernelPtr(const std::string &kernel_name, const uint32_t &device_id) { @@ -52,6 +52,30 @@ const std::vector> kernel_attr = CreatePadV3GradKernelPtr}, {KernelAttr().AddInputAttr(kNumberTypeComplex64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeComplex64), CreatePadV3GradKernelPtr, int64_t>}, + {KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat64), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat16), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt64), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt16), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt8), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt32), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt16), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt8), + CreatePadV3GradKernelPtr}, + {KernelAttr().AddInputAttr(kNumberTypeComplex64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeComplex64), + CreatePadV3GradKernelPtr, int64_t>}, }; } // namespace diff --git a/mindspore/core/ops/op_name.h b/mindspore/core/ops/op_name.h index 4fd47b9f0fb..d8d99778e24 100644 --- a/mindspore/core/ops/op_name.h +++ b/mindspore/core/ops/op_name.h @@ -340,6 +340,7 @@ constexpr auto kHalfPixelCenters = "half_pixel_centers"; constexpr auto kConstant = "constant"; constexpr auto kReflect = "reflect"; constexpr auto kEdge = "edge"; +constexpr auto kCircular = "circular"; constexpr auto kLr = "lr"; constexpr auto kL1 = "l1"; constexpr auto kL2 = "l2"; diff --git a/mindspore/core/ops/pad_v3.cc b/mindspore/core/ops/pad_v3.cc index cfcffc1b95c..512b2cc89fa 100644 --- a/mindspore/core/ops/pad_v3.cc +++ b/mindspore/core/ops/pad_v3.cc @@ -139,14 +139,15 @@ abstract::ShapePtr PadV3InferShape(const PrimitivePtr &primitive, const std::vec std::vector paddings_val; auto mode = GetValue(primitive->GetAttr(kAttrMode)); if (mode != kConstant) { - (void)CheckAndConvertUtils::CheckInteger("input dims for edge or reflect mode", size, kGreaterEqual, kOtherMinDims, - prim_name); - } - if (mode == kReflect) { - ReflectModeCheck(prim_name, paddings_size, x_shape, paddings_arg, size); - } else if (mode == kEdge) { - (void)CheckAndConvertUtils::CheckInteger("input dims for edge mode", size, kLessEqual, kEdgeMaxDims, prim_name); + (void)CheckAndConvertUtils::CheckInteger("input dims for edge, reflect or circular mode", size, kGreaterEqual, + kOtherMinDims, prim_name); + if (mode == kReflect) { + ReflectModeCheck(prim_name, paddings_size, x_shape, paddings_arg, size); + } else { + (void)CheckAndConvertUtils::CheckInteger("input dims for edge mode", size, kLessEqual, kEdgeMaxDims, prim_name); + } } + PaddingsSizeCheck(primitive, paddings_size, size); for (int64_t i = 0; i < paddings_size; ++i) { paddings_val.push_back(int64_t(paddings_arg[LongToSize(i)])); diff --git a/mindspore/python/mindspore/ops/operations/_grad_ops.py b/mindspore/python/mindspore/ops/operations/_grad_ops.py index 89336189b1c..8262f8f9ae2 100644 --- a/mindspore/python/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/python/mindspore/ops/operations/_grad_ops.py @@ -2211,7 +2211,7 @@ class PadV3Grad(Primitive): """Initialize Padv3Grad""" self.add_prim_attr("cust_aicpu", self.name) self.init_prim_io_names(inputs=['x', 'paddings'], outputs=['y']) - validator.check_string(mode, ['reflect', 'edge'], 'mode', self.name) + validator.check_string(mode, ['reflect', 'edge', 'circular'], 'mode', self.name) validator.check_bool(paddings_contiguous, "paddings_contiguous", self.name) self.set_const_input_indexes([1]) self.mode = mode diff --git a/mindspore/python/mindspore/ops/operations/nn_ops.py b/mindspore/python/mindspore/ops/operations/nn_ops.py index 1d40f2bc713..06f51f53539 100644 --- a/mindspore/python/mindspore/ops/operations/nn_ops.py +++ b/mindspore/python/mindspore/ops/operations/nn_ops.py @@ -4275,7 +4275,7 @@ class PadV3(Primitive): Args: mode (str, optional): An optional string indicates padding mode, - support "constant", "reflect", "edge". Default: "constant". + support "constant", "reflect", "edge", "circular". Default: "constant". paddings_contiguous (bool, optional): An optional bool value indicates if the padding is paddings_contiguous. If true, paddings is arranged as [begin0, end0, begin1, end1, ...] If false, paddings is arranged as [begin0, begin1, ..., end1, end2, ...] @@ -4296,13 +4296,14 @@ class PadV3(Primitive): ValueError: If `mode` is not a str or not in support modes. ValueError: If `mode` is "constant", the element's number of `paddings` not be even. ValueError: If `mode` is "constant", the element's number of `paddings` large than input dim * 2. - ValueError: If `mode` is "edge" or "reflect", the element's number of `paddings` is not 2, 4 or 6. - ValueError: If `mode` is "edge" or "reflect", `x` dims equals 3, + ValueError: If `mode` is "edge" "reflect" or "circular", the element's number of `paddings` is not 2, 4 or 6. + ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 3, the element's number of `paddings` is not 2. - ValueError: If `mode` is "edge" or "reflect", `x` dims equals 4, + ValueError: If `mode` is "edge" "reflect" or "circular", `x` dims equals 4, the element's number of `paddings` is not 4. - ValueError: If `mode` is "edge" or "reflect", `x` dims smaller than 3. - ValueError: If `mode` is "edge", x dims bigger than 5. + ValueError: If `mode` is "circular", `x` dims equals 5, the element's number of `paddings` is not 6. + ValueError: If `mode` is "edge", "reflect" or "circular", `x` dims smaller than 3. + ValueError: If `mode` is "edge" or "circular", x dims bigger than 5. ValueError: If `mode` is "reflect", x dims bigger than 4. ValueError: If `mode` is "reflect", padding size bigger than the corresponding `x` dimension. ValueError: After padding, output's shape number is not greater than 0. @@ -4346,7 +4347,7 @@ class PadV3(Primitive): def __init__(self, mode='constant', paddings_contiguous=True): """Initialize PadV3""" self.init_prim_io_names(inputs=['x', 'paddings', 'constant_value'], outputs=['y']) - validator.check_string(mode, ['constant', 'reflect', 'edge'], 'mode', self.name) + validator.check_string(mode, ['constant', 'reflect', 'edge', 'circular'], 'mode', self.name) validator.check_bool(paddings_contiguous, "paddings_contiguous", self.name) self.mode = mode self.paddings_contiguous = paddings_contiguous diff --git a/tests/st/ops/cpu/test_pad_v3.py b/tests/st/ops/cpu/test_pad_v3.py index ad625787eb3..5a2ef3dbe3e 100644 --- a/tests/st/ops/cpu/test_pad_v3.py +++ b/tests/st/ops/cpu/test_pad_v3.py @@ -1,4 +1,4 @@ -# Copyright 2022 Huawei Technologies Co., Ltd +# Copyright 2022-2023 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -38,6 +38,83 @@ class NetDynamic(nn.Cell): return out +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_padv3_circular_dynamic_shape_3d(): + """ + Feature: test padv3 x and padding dynamic shape + Description: test padv3 dynamic shape + Expectation: Success + """ + context.set_context(device_target="CPU", save_graphs=False) + x = Tensor(np.arange(9).reshape(1, 3, 3).astype(np.float32)) + padding = Tensor((1, 2), dtype=mindspore.int64) + + net = NetDynamic('circular') + + x_dyn = Tensor(shape=(1, 3, None), dtype=x.dtype) + padding_dyn = Tensor(shape=(None,), dtype=padding.dtype) + net.set_inputs(x_dyn, padding_dyn) + + out = net(x, padding) + expect = np.array([[[2, 0, 1, 2, 0, 1], + [5, 3, 4, 5, 3, 4], + [8, 6, 7, 8, 6, 7]]]).astype(np.float32) + np.testing.assert_almost_equal(expect, out.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_padv3_circular_dynamic_shape_4d(): + """ + Feature: test padv3 x and padding dynamic shape + Description: test padv3 dynamic shape + Expectation: Success + """ + context.set_context(device_target="CPU", save_graphs=False) + x = Tensor(np.arange(9).reshape(1, 1, 3, 3).astype(np.float64)) + padding = Tensor((1, -1, 1, 2), dtype=mindspore.int32) + + net = NetDynamic('circular') + + x_dyn = Tensor(shape=(1, 1, 3, None), dtype=x.dtype) + padding_dyn = Tensor(shape=(None,), dtype=padding.dtype) + net.set_inputs(x_dyn, padding_dyn) + + out = net(x, padding) + expect = np.array([[[[7, 6, 7], [1, 0, 1], [4, 3, 4], + [7, 6, 7], [1, 0, 1], [4, 3, 4]]]]).astype(np.float64) + np.testing.assert_almost_equal(expect, out.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_padv3_circular_dynamic_shape_5d(): + """ + Feature: test padv3 x and padding dynamic shape + Description: test padv3 dynamic shape + Expectation: Success + """ + context.set_context(device_target="CPU", save_graphs=False) + x = Tensor(np.arange(18).reshape(1, 1, 2, 3, 3).astype(np.float64)) + padding = Tensor((0, 1, 1, -1, 0, -1), dtype=mindspore.int32) + + net = NetDynamic('circular') + + x_dyn = Tensor(shape=(1, 1, None, 3, None), dtype=x.dtype) + padding_dyn = Tensor(shape=(None,), dtype=padding.dtype) + net.set_inputs(x_dyn, padding_dyn) + + out = net(x, padding) + expect = np.array([[[[[3, 4, 5, 3,], + [0, 1, 2, 0,], + [3, 4, 5, 3,]]]]]).astype(np.float64) + np.testing.assert_almost_equal(expect, out.asnumpy()) + + @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard diff --git a/tests/st/ops/cpu/test_pad_v3_grad_op.py b/tests/st/ops/cpu/test_pad_v3_grad_op.py new file mode 100644 index 00000000000..eda2aafafe3 --- /dev/null +++ b/tests/st/ops/cpu/test_pad_v3_grad_op.py @@ -0,0 +1,92 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import pytest +import numpy as np +import mindspore +import mindspore.nn as nn +import mindspore.context as context +from mindspore import Tensor +import mindspore.ops.operations._grad_ops as Grad + + +class PadV3GradNet(nn.Cell): + def __init__(self, mode): + super(PadV3GradNet, self).__init__() + self.op = Grad.PadV3Grad(mode) + + def construct(self, x, paddings): + return self.op(x, paddings) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_padv3grad_circular_3d(): + """ + Feature: test PadV3Grad + Description: test PadV3Grad circular mode. + Expectation: Success + """ + context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="CPU") + net = PadV3GradNet('circular') + + x = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mindspore.float32).reshape((1, 2, 4)) + paddings = Tensor(np.array([1, 1], dtype=np.int32)) + + output = net(x, paddings) + expect = np.array([[[6, 4], [14, 12]]]).astype(np.float32) + np.testing.assert_almost_equal(expect, output.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_padv3grad_circular_4d(): + """ + Feature: test PadV3Grad + Description: test PadV3Grad circular mode. + Expectation: Success + """ + context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="CPU") + net = PadV3GradNet('circular') + + x = Tensor(np.arange(18).reshape(1, 1, 3, 6).astype(np.float32)) + paddings = Tensor(np.array([1, 2, 1, 0], dtype=np.int64)) + + output = net(x, paddings) + expect = np.array([[[[17., 19., 15.], [34., 38., 30.]]]]).astype(np.float32) + np.testing.assert_almost_equal(expect, output.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_padv3grad_circular_5d(): + """ + Feature: test PadV3Grad + Description: test PadV3Grad circular mode. + Expectation: Success + """ + context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="CPU") + net = PadV3GradNet('circular') + + x = Tensor(np.arange(80).reshape(1, 1, 5, 4, 4).astype(np.float64)) + paddings = Tensor(np.array([1, 0, 1, 1, 2, 1], dtype=np.int64)) + + output = net(x, paddings) + expect = np.array([[[[[246., 252., 498.], [222., 228., 450.]], + [[164., 168., 332.], [148., 152., 300.]]]]]).astype(np.float64) + np.testing.assert_almost_equal(expect, output.asnumpy()) diff --git a/tests/st/ops/gpu/test_pad_v3.py b/tests/st/ops/gpu/test_pad_v3.py index cd8eafcac86..9ef17117cca 100644 --- a/tests/st/ops/gpu/test_pad_v3.py +++ b/tests/st/ops/gpu/test_pad_v3.py @@ -1,4 +1,4 @@ -# Copyright 2022 Huawei Technologies Co., Ltd +# Copyright 2022-2023 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -53,6 +53,83 @@ class NetDynamic(nn.Cell): return out +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_padv3_circular_dynamic_shape_3d(): + """ + Feature: test padv3 x and padding dynamic shape + Description: test padv3 dynamic shape + Expectation: Success + """ + context.set_context(device_target="GPU", save_graphs=False) + x = Tensor(np.arange(9).reshape(1, 3, 3).astype(np.float32)) + padding = Tensor((1, 2), dtype=mindspore.int64) + + net = NetDynamic('circular') + + x_dyn = Tensor(shape=(1, 3, None), dtype=x.dtype) + padding_dyn = Tensor(shape=(None,), dtype=padding.dtype) + net.set_inputs(x_dyn, padding_dyn) + + out = net(x, padding) + expect = np.array([[[2, 0, 1, 2, 0, 1], + [5, 3, 4, 5, 3, 4], + [8, 6, 7, 8, 6, 7]]]).astype(np.float32) + np.testing.assert_almost_equal(expect, out.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_padv3_circular_dynamic_shape_4d(): + """ + Feature: test padv3 x and padding dynamic shape + Description: test padv3 dynamic shape + Expectation: Success + """ + context.set_context(device_target="GPU", save_graphs=False) + x = Tensor(np.arange(9).reshape(1, 1, 3, 3).astype(np.float64)) + padding = Tensor((1, -1, 1, 2), dtype=mindspore.int32) + + net = NetDynamic('circular') + + x_dyn = Tensor(shape=(1, 1, 3, None), dtype=x.dtype) + padding_dyn = Tensor(shape=(None,), dtype=padding.dtype) + net.set_inputs(x_dyn, padding_dyn) + + out = net(x, padding) + expect = np.array([[[[7, 6, 7], [1, 0, 1], [4, 3, 4], + [7, 6, 7], [1, 0, 1], [4, 3, 4]]]]).astype(np.float64) + np.testing.assert_almost_equal(expect, out.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_padv3_circular_dynamic_shape_5d(): + """ + Feature: test padv3 x and padding dynamic shape + Description: test padv3 dynamic shape + Expectation: Success + """ + context.set_context(device_target="GPU", save_graphs=False) + x = Tensor(np.arange(18).reshape(1, 1, 2, 3, 3).astype(np.float64)) + padding = Tensor((0, 1, 1, -1, 0, -1), dtype=mindspore.int32) + + net = NetDynamic('circular') + + x_dyn = Tensor(shape=(1, 1, None, 3, None), dtype=x.dtype) + padding_dyn = Tensor(shape=(None,), dtype=padding.dtype) + net.set_inputs(x_dyn, padding_dyn) + + out = net(x, padding) + expect = np.array([[[[[3, 4, 5, 3,], + [0, 1, 2, 0,], + [3, 4, 5, 3,]]]]]).astype(np.float64) + np.testing.assert_almost_equal(expect, out.asnumpy()) + + @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard diff --git a/tests/st/ops/gpu/test_pad_v3_grad.py b/tests/st/ops/gpu/test_pad_v3_grad.py new file mode 100644 index 00000000000..12615808c39 --- /dev/null +++ b/tests/st/ops/gpu/test_pad_v3_grad.py @@ -0,0 +1,92 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import pytest +import numpy as np +import mindspore +import mindspore.nn as nn +import mindspore.context as context +from mindspore import Tensor +import mindspore.ops.operations._grad_ops as Grad + + +class PadV3GradNet(nn.Cell): + def __init__(self, mode): + super(PadV3GradNet, self).__init__() + self.op = Grad.PadV3Grad(mode) + + def construct(self, x, paddings): + return self.op(x, paddings) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_padv3grad_circular_3d(): + """ + Feature: test PadV3Grad + Description: test PadV3Grad circular mode. + Expectation: Success + """ + context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU") + net = PadV3GradNet('circular') + + x = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mindspore.float32).reshape((1, 2, 4)) + paddings = Tensor(np.array([1, 1], dtype=np.int32)) + + output = net(x, paddings) + expect = np.array([[[6, 4], [14, 12]]]).astype(np.float32) + np.testing.assert_almost_equal(expect, output.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_padv3grad_circular_4d(): + """ + Feature: test PadV3Grad + Description: test PadV3Grad circular mode. + Expectation: Success + """ + context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU") + net = PadV3GradNet('circular') + + x = Tensor(np.arange(18).reshape(1, 1, 3, 6).astype(np.float32)) + paddings = Tensor(np.array([1, 2, 1, 0], dtype=np.int64)) + + output = net(x, paddings) + expect = np.array([[[[17., 19., 15.], [34., 38., 30.]]]]).astype(np.float32) + np.testing.assert_almost_equal(expect, output.asnumpy()) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_padv3grad_circular_5d(): + """ + Feature: test PadV3Grad + Description: test PadV3Grad circular mode. + Expectation: Success + """ + context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU") + net = PadV3GradNet('circular') + + x = Tensor(np.arange(80).reshape(1, 1, 5, 4, 4).astype(np.float64)) + paddings = Tensor(np.array([1, 0, 1, 1, 2, 1], dtype=np.int64)) + + output = net(x, paddings) + expect = np.array([[[[[246., 252., 498.], [222., 228., 450.]], + [[164., 168., 332.], [148., 152., 300.]]]]]).astype(np.float64) + np.testing.assert_almost_equal(expect, output.asnumpy())