forked from mindspore-Ecosystem/mindspore
Gpu Minimum & Maximum kernels support int32
This commit is contained in:
parent
b3f91a4f22
commit
480bf4151b
|
@ -110,7 +110,13 @@ void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const T *x1,
|
|||
|
||||
template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const float *x1, const float *x2,
|
||||
const float *dy, float *dx1, float *dx2, cudaStream_t stream);
|
||||
template void NoBroadcastGrad(const int &nums, enum BroadcastGradOpType op, const int *x1, const int *x2,
|
||||
const int *dy, int *dx1, int *dx2, cudaStream_t stream);
|
||||
template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1,
|
||||
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
|
||||
enum BroadcastGradOpType op, const float *x1, const float *x2, const float *dy, float *dx1,
|
||||
float *dx2, cudaStream_t stream);
|
||||
template void BroadcastGrad(const int &l0, const int &l1, const int &l2, const int &l3, const int &r0, const int &r1,
|
||||
const int &r2, const int &r3, const int &d0, const int &d1, const int &d2, const int &d3,
|
||||
enum BroadcastGradOpType op, const int *x1, const int *x2, const int *dy, int *dx1,
|
||||
int *dx2, cudaStream_t stream);
|
||||
|
|
|
@ -90,5 +90,11 @@ MS_REG_GPU_KERNEL_TWO(
|
|||
MS_REG_GPU_KERNEL_TWO(
|
||||
TensorAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
BroadcastOpGpuKernel, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Minimum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
BroadcastOpGpuKernel, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Maximum, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
BroadcastOpGpuKernel, int, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -34,5 +34,21 @@ MS_REG_GPU_KERNEL_ONE(MaximumGrad,
|
|||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
BroadcastOpGradGpuKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(MinimumGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
BroadcastOpGradGpuKernel, int)
|
||||
MS_REG_GPU_KERNEL_ONE(MaximumGrad,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
BroadcastOpGradGpuKernel, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -222,3 +222,27 @@ def test_broadcast_diff_dims():
|
|||
output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np))
|
||||
assert np.allclose(output_ms[0].asnumpy(), expect_dx1)
|
||||
assert np.allclose(output_ms[1].asnumpy(), expect_dx2)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_maximum_int():
|
||||
x = Tensor(np.array([[1, 2, 3]]).astype(np.int32))
|
||||
y = Tensor(np.array([[2]]).astype(np.int32))
|
||||
expect = [[2, 2, 3]]
|
||||
error = np.ones(shape=[1, 3]) * 1.0e-5
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
max_op = Net()
|
||||
output = max_op(x, y)
|
||||
diff = output.asnumpy() - expect
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
max_op_2 = Net()
|
||||
output = max_op_2(x, y)
|
||||
diff = output.asnumpy() - expect
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
|
|
|
@ -218,3 +218,21 @@ def test_broadcast_diff_dims():
|
|||
output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np))
|
||||
assert np.allclose(output_ms[0].asnumpy(), expect_dx1)
|
||||
assert np.allclose(output_ms[1].asnumpy(), expect_dx2)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_broadcast_int32():
|
||||
context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target='GPU')
|
||||
|
||||
x1_np = np.random.rand(3, 4).astype(np.int32)
|
||||
x2_np = np.random.rand(3, 4).astype(np.int32)
|
||||
dy_np = np.random.rand(3, 4).astype(np.int32)
|
||||
|
||||
net = Grad(MinimumNet())
|
||||
output_ms = net(Tensor(x1_np), Tensor(x2_np), Tensor(dy_np))
|
||||
output0_np = np.where(x1_np < x2_np, dy_np, 0)
|
||||
output1_np = np.where(x1_np < x2_np, 0, dy_np)
|
||||
assert np.allclose(output_ms[0].asnumpy(), output0_np)
|
||||
assert np.allclose(output_ms[1].asnumpy(), output1_np)
|
||||
|
|
Loading…
Reference in New Issue