diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu index ce8617283c6..5aa087e7f51 100644 --- a/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu +++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/broadcast_grad_impl.cu @@ -110,7 +110,13 @@ void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const T *x1, template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1, float *dx2, cudaStream_t stream); +template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const int *x1, const int *x2, + const int *dy, int *dx1, int *dx2, cudaStream_t stream); template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1, float *dx2, cudaStream_t stream); +template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, + const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, + enum BroadcastGradOpType op, const int *x1, const int *x2, const int *dy, int *dx1, + int *dx2, cudaStream_t stream); diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc index d7c70c010fa..e299946780e 100644 --- a/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/math/broadcast_gpu_kernel.cc @@ -90,5 +90,11 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), BroadcastOpGpuKernel, int, int) +MS_REG_GPU_KERNEL_TWO( + Minimum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastOpGpuKernel, int, int) +MS_REG_GPU_KERNEL_TWO( + Maximum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastOpGpuKernel, int, int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc index edc51d4ffd7..85598cf9406 100644 --- a/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/math/broadcast_grad_gpu_kernel.cc @@ -34,5 +34,21 @@ MS_REG_GPU_KERNEL_ONE(MaximumGrad, .AddOutputAttr(kNumberTypeFloat32) .AddOutputAttr(kNumberTypeFloat32), BroadcastOpGradGpuKernel, float) +MS_REG_GPU_KERNEL_ONE(MinimumGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + BroadcastOpGradGpuKernel, int) +MS_REG_GPU_KERNEL_ONE(MaximumGrad, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + BroadcastOpGradGpuKernel, int) } // namespace kernel } // namespace mindspore diff --git a/tests/st/ops/gpu/test_maximum_op.py b/tests/st/ops/gpu/test_maximum_op.py index eafd9e5136c..9566554231f 100644 --- a/tests/st/ops/gpu/test_maximum_op.py +++ b/tests/st/ops/gpu/test_maximum_op.py @@ -222,3 +222,27 @@ def test_broadcast_diff_dims(): output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) assert np.allclose(output_ms[0].asnumpy(), expect_dx1) assert np.allclose(output_ms[1].asnumpy(), expect_dx2) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_maximum_int(): + x = Tensor(np.array([[1, 2, 3]]).astype(np.int32)) + y = Tensor(np.array([[2]]).astype(np.int32)) + expect = [[2, 2, 3]] + error = np.ones(shape=[1, 3]) * 1.0e-5 + + context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") + max_op = Net() + output = max_op(x, y) + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) + + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + max_op_2 = Net() + output = max_op_2(x, y) + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) diff --git a/tests/st/ops/gpu/test_minimum_op.py b/tests/st/ops/gpu/test_minimum_op.py index c5669b17e05..2a14a5bb042 100644 --- a/tests/st/ops/gpu/test_minimum_op.py +++ b/tests/st/ops/gpu/test_minimum_op.py @@ -218,3 +218,21 @@ def test_broadcast_diff_dims(): output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) assert np.allclose(output_ms[0].asnumpy(), expect_dx1) assert np.allclose(output_ms[1].asnumpy(), expect_dx2) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_broadcast_int32(): + context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target='GPU') + + x1_np = np.random.rand(3, 4).astype(np.int32) + x2_np = np.random.rand(3, 4).astype(np.int32) + dy_np = np.random.rand(3, 4).astype(np.int32) + + net = Grad(MinimumNet()) + output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np)) + output0_np = np.where(x1_np < x2_np, dy_np, 0) + output1_np = np.where(x1_np < x2_np, 0, dy_np) + assert np.allclose(output_ms[0].asnumpy(), output0_np) + assert np.allclose(output_ms[1].asnumpy(), output1_np)