diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu index d79942bd43b..827bec11f9b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/broadcast_impl.cu @@ -256,6 +256,10 @@ template void Broadcast(const int &l0, const int &l1, const int &l2, const int & const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, enum BroadcastOpType op, const int *input0, const int *input1, int *output, cudaStream_t stream); +template void Broadcast(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1, + const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3, + enum BroadcastOpType op, const int *input0, const int *input1, bool *output, + cudaStream_t stream); template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1, bool *output, cudaStream_t stream); template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1, @@ -266,6 +270,8 @@ template void NoBroadcast(const int &nums, enum BroadcastOpType op, const half * half *output, cudaStream_t stream); template void NoBroadcast(const int &nums, enum BroadcastOpType op, const int *input0, const int *input1, int *output, cudaStream_t stream); +template void NoBroadcast(const int &nums, enum BroadcastOpType op, const int *input0, const int *input1, + bool *output, cudaStream_t stream); template void BroadcastTo(const int &i0, const int &i1, const int &i2, const int &i3, const int &o0, const int &o1, const int &o2, const int &o3, const float *input_addr, float *output_addr, cudaStream_t stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc index 7232e9a3f55..ccccd767a84 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/broadcast_gpu_kernel.cc @@ -103,6 +103,9 @@ MS_REG_GPU_KERNEL_TWO( BroadcastOpGpuKernel, half, half) // int32 +MS_REG_GPU_KERNEL_TWO( + Less, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), + BroadcastOpGpuKernel, int, bool) MS_REG_GPU_KERNEL_TWO( TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), BroadcastOpGpuKernel, int, int) diff --git a/tests/st/ops/gpu/test_broadcast_op.py b/tests/st/ops/gpu/test_broadcast_op.py index 3f97a229e89..202517729a3 100644 --- a/tests/st/ops/gpu/test_broadcast_op.py +++ b/tests/st/ops/gpu/test_broadcast_op.py @@ -29,6 +29,8 @@ def test_nobroadcast(): x1_np = np.random.rand(10, 20).astype(np.float32) x2_np = np.random.rand(10, 20).astype(np.float32) + x1_np_int32 = np.random.randint(0, 100, (10, 20)).astype(np.int32) + x2_np_int32 = np.random.randint(0, 100, (10, 20)).astype(np.int32) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) @@ -45,6 +47,9 @@ def test_nobroadcast(): output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) + output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32)) + output_np = x1_np_int32 < x2_np_int32 + assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) @@ -71,6 +76,8 @@ def test_broadcast(): x1_np = np.random.rand(3, 1, 5, 1).astype(np.float32) x2_np = np.random.rand(1, 4, 1, 6).astype(np.float32) + x1_np_int32 = np.random.randint(0, 100, (3, 1, 5, 1)).astype(np.int32) + x2_np_int32 = np.random.randint(0, 100, (3, 1, 5, 1)).astype(np.int32) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) @@ -87,6 +94,9 @@ def test_broadcast(): output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) + output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32)) + output_np = x1_np_int32 < x2_np_int32 + assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np) @@ -113,6 +123,8 @@ def test_broadcast_diff_dims(): x1_np = np.random.rand(2).astype(np.float32) x2_np = np.random.rand(2, 1).astype(np.float32) + x1_np_int32 = np.random.randint(0, 100, (2)).astype(np.int32) + x2_np_int32 = np.random.randint(0, 100, (2, 1)).astype(np.int32) output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np)) output_np = np.minimum(x1_np, x2_np) @@ -129,6 +141,9 @@ def test_broadcast_diff_dims(): output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np)) output_np = x1_np < x2_np assert np.allclose(output_ms.asnumpy(), output_np) + output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32)) + output_np = x1_np_int32 < x2_np_int32 + assert np.allclose(output_ms.asnumpy(), output_np) output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np)) output_np = np.power(x1_np, x2_np)