forked from mindspore-Ecosystem/mindspore
add int32 cal for less gpu
This commit is contained in:
parent
9940c723d5
commit
3b41023a6b
|
@ -256,6 +256,10 @@ template void Broadcast(const int &l0, const int &l1, const int &l2, const int &
|
||||||
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
|
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
|
||||||
enum BroadcastOpType op, const int *input0, const int *input1, int *output,
|
enum BroadcastOpType op, const int *input0, const int *input1, int *output,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
|
template void Broadcast(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1,
|
||||||
|
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
|
||||||
|
enum BroadcastOpType op, const int *input0, const int *input1, bool *output,
|
||||||
|
cudaStream_t stream);
|
||||||
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1,
|
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1,
|
||||||
bool *output, cudaStream_t stream);
|
bool *output, cudaStream_t stream);
|
||||||
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1,
|
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const float *input0, const float *input1,
|
||||||
|
@ -266,6 +270,8 @@ template void NoBroadcast(const int &nums, enum BroadcastOpType op, const half *
|
||||||
half *output, cudaStream_t stream);
|
half *output, cudaStream_t stream);
|
||||||
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const int *input0, const int *input1,
|
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const int *input0, const int *input1,
|
||||||
int *output, cudaStream_t stream);
|
int *output, cudaStream_t stream);
|
||||||
|
template void NoBroadcast(const int &nums, enum BroadcastOpType op, const int *input0, const int *input1,
|
||||||
|
bool *output, cudaStream_t stream);
|
||||||
template void BroadcastTo(const int &i0, const int &i1, const int &i2, const int &i3, const int &o0, const int &o1,
|
template void BroadcastTo(const int &i0, const int &i1, const int &i2, const int &i3, const int &o0, const int &o1,
|
||||||
const int &o2, const int &o3, const float *input_addr, float *output_addr,
|
const int &o2, const int &o3, const float *input_addr, float *output_addr,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
|
|
|
@ -103,6 +103,9 @@ MS_REG_GPU_KERNEL_TWO(
|
||||||
BroadcastOpGpuKernel, half, half)
|
BroadcastOpGpuKernel, half, half)
|
||||||
|
|
||||||
// int32
|
// int32
|
||||||
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
|
Less, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
|
||||||
|
BroadcastOpGpuKernel, int, bool)
|
||||||
MS_REG_GPU_KERNEL_TWO(
|
MS_REG_GPU_KERNEL_TWO(
|
||||||
TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||||
BroadcastOpGpuKernel, int, int)
|
BroadcastOpGpuKernel, int, int)
|
||||||
|
|
|
@ -29,6 +29,8 @@ def test_nobroadcast():
|
||||||
|
|
||||||
x1_np = np.random.rand(10, 20).astype(np.float32)
|
x1_np = np.random.rand(10, 20).astype(np.float32)
|
||||||
x2_np = np.random.rand(10, 20).astype(np.float32)
|
x2_np = np.random.rand(10, 20).astype(np.float32)
|
||||||
|
x1_np_int32 = np.random.randint(0, 100, (10, 20)).astype(np.int32)
|
||||||
|
x2_np_int32 = np.random.randint(0, 100, (10, 20)).astype(np.int32)
|
||||||
|
|
||||||
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = np.minimum(x1_np, x2_np)
|
output_np = np.minimum(x1_np, x2_np)
|
||||||
|
@ -45,6 +47,9 @@ def test_nobroadcast():
|
||||||
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = x1_np < x2_np
|
output_np = x1_np < x2_np
|
||||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||||
|
output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32))
|
||||||
|
output_np = x1_np_int32 < x2_np_int32
|
||||||
|
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||||
|
|
||||||
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = np.power(x1_np, x2_np)
|
output_np = np.power(x1_np, x2_np)
|
||||||
|
@ -71,6 +76,8 @@ def test_broadcast():
|
||||||
|
|
||||||
x1_np = np.random.rand(3, 1, 5, 1).astype(np.float32)
|
x1_np = np.random.rand(3, 1, 5, 1).astype(np.float32)
|
||||||
x2_np = np.random.rand(1, 4, 1, 6).astype(np.float32)
|
x2_np = np.random.rand(1, 4, 1, 6).astype(np.float32)
|
||||||
|
x1_np_int32 = np.random.randint(0, 100, (3, 1, 5, 1)).astype(np.int32)
|
||||||
|
x2_np_int32 = np.random.randint(0, 100, (3, 1, 5, 1)).astype(np.int32)
|
||||||
|
|
||||||
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = np.minimum(x1_np, x2_np)
|
output_np = np.minimum(x1_np, x2_np)
|
||||||
|
@ -87,6 +94,9 @@ def test_broadcast():
|
||||||
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = x1_np < x2_np
|
output_np = x1_np < x2_np
|
||||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||||
|
output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32))
|
||||||
|
output_np = x1_np_int32 < x2_np_int32
|
||||||
|
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||||
|
|
||||||
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = np.power(x1_np, x2_np)
|
output_np = np.power(x1_np, x2_np)
|
||||||
|
@ -113,6 +123,8 @@ def test_broadcast_diff_dims():
|
||||||
|
|
||||||
x1_np = np.random.rand(2).astype(np.float32)
|
x1_np = np.random.rand(2).astype(np.float32)
|
||||||
x2_np = np.random.rand(2, 1).astype(np.float32)
|
x2_np = np.random.rand(2, 1).astype(np.float32)
|
||||||
|
x1_np_int32 = np.random.randint(0, 100, (2)).astype(np.int32)
|
||||||
|
x2_np_int32 = np.random.randint(0, 100, (2, 1)).astype(np.int32)
|
||||||
|
|
||||||
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = np.minimum(x1_np, x2_np)
|
output_np = np.minimum(x1_np, x2_np)
|
||||||
|
@ -129,6 +141,9 @@ def test_broadcast_diff_dims():
|
||||||
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = x1_np < x2_np
|
output_np = x1_np < x2_np
|
||||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||||
|
output_ms = P.Less()(Tensor(x1_np_int32), Tensor(x2_np_int32))
|
||||||
|
output_np = x1_np_int32 < x2_np_int32
|
||||||
|
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||||
|
|
||||||
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
||||||
output_np = np.power(x1_np, x2_np)
|
output_np = np.power(x1_np, x2_np)
|
||||||
|
|
Loading…
Reference in New Issue