gpu maximum & minimum kernel with fp16 input
This commit is contained in:
parent
202795eb05
commit
3b54e55223
|
@ -202,7 +202,8 @@ void ElewiseArith(const int &nums, enum BroadcastOpType op, const T *x0, const T
|
|||
template <>
|
||||
void ElewiseArith(const int &nums, enum BroadcastOpType op, const half *x0, const half *x1, half *y,
|
||||
cudaStream_t stream) {
|
||||
if (nums % 2 == 0) {
|
||||
// `>` return true iff both half result are true. fallback to half
|
||||
if (nums % 2 == 0 && op != BROADCAST_TYPE_MINIMUM && op != BROADCAST_TYPE_MAXIMUM && op != BROADCAST_TYPE_ABSGRAD) {
|
||||
ElewiseArithKernel<half2>(nums / 2, op, reinterpret_cast<const half2 *>(x0), reinterpret_cast<const half2 *>(x1),
|
||||
reinterpret_cast<half2 *>(y), stream);
|
||||
} else {
|
||||
|
|
|
@ -68,6 +68,48 @@ def test_nobroadcast():
|
|||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_nobroadcast_fp16():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
|
||||
|
||||
x1_np = np.random.rand(10, 20).astype(np.float16)
|
||||
x2_np = np.random.rand(10, 20).astype(np.float16)
|
||||
|
||||
output_ms = P.Minimum()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = np.minimum(x1_np, x2_np)
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.Maximum()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = np.maximum(x1_np, x2_np)
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.Greater()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = x1_np > x2_np
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.Less()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = x1_np < x2_np
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.Pow()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = np.power(x1_np, x2_np)
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.RealDiv()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = x1_np / x2_np
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.Mul()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = x1_np * x2_np
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
output_ms = P.Sub()(Tensor(x1_np), Tensor(x2_np))
|
||||
output_np = x1_np - x2_np
|
||||
assert np.allclose(output_ms.asnumpy(), output_np)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
|
|
Loading…
Reference in New Issue