Add dtype float16 that erf and erfc should support

2020-09-16 14:31:22 -04:00 · 2020-09-16 14:31:22 -04:00 · 8132e56417
parent 6f5be6b876
commit 8132e56417
6 changed files with 34 additions and 6 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/erf_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/erf_impl.cu
@ -18,7 +18,7 @@
 template <typename T>
 __global__ void ErfKernel(T *input, T *output, size_t count) {
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
-    output[i] = (T)erf(input[i]);
+    output[i] = static_cast<T>(erf(static_cast<float>(input[i])));
  }
  return;
 }
@ -30,3 +30,4 @@ void Erf(T *input, T *output, size_t count, cudaStream_t cuda_stream) {
 }

 template void Erf<float>(float *input, float *output, size_t count, cudaStream_t cuda_stream);
+template void Erf<half>(half *input, half *output, size_t count, cudaStream_t cuda_stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/erfc_impl.cu
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/erfc_impl.cu
@ -18,7 +18,7 @@
 template <typename T>
 __global__ void ErfcKernel(T *input, T *output, size_t count) {
  for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
-    output[i] = (T)erfc(input[i]);
+    output[i] = static_cast<T>(erfc(static_cast<float>(input[i])));
  }
  return;
 }
@ -30,3 +30,4 @@ void Erfc(T *input, T *output, size_t count, cudaStream_t cuda_stream) {
 }

 template void Erfc<float>(float *input, float *output, size_t count, cudaStream_t cuda_stream);
+template void Erfc<half>(half *input, half *output, size_t count, cudaStream_t cuda_stream);
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/erf_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/erf_gpu_kernel.cc
@ -20,5 +20,7 @@ namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Erf, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                      ErfGpuKernel, float)
+MS_REG_GPU_KERNEL_ONE(Erf, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+                      ErfGpuKernel, half)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/erfc_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/erfc_gpu_kernel.cc
@ -20,5 +20,7 @@ namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Erfc, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                      ErfcGpuKernel, float)
+MS_REG_GPU_KERNEL_ONE(Erfc, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+                      ErfcGpuKernel, half)
 }  // namespace kernel
 }  // namespace mindspore
--- a/tests/st/ops/gpu/test_erf_op.py
+++ b/tests/st/ops/gpu/test_erf_op.py
@ -37,10 +37,21 @@ class NetErf(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
-def test_exp():
+def test_erf_fp32():
    erf = NetErf()
-    x = np.array([2.0, 3.0, 4.0, 5.0]).astype(np.float32)
+    x = np.random.rand(3, 8).astype(np.float32)
    output = erf(Tensor(x, dtype=dtype.float32))
    expect = special.erf(x)
    tol = 1e-6
    assert (np.abs(output.asnumpy() - expect) < tol).all()
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_erf_fp16():
+    erf = NetErf()
+    x = np.random.rand(3, 8).astype(np.float16)
+    output = erf(Tensor(x, dtype=dtype.float16))
+    expect = special.erf(x)
+    tol = 1e-3
+    assert (np.abs(output.asnumpy() - expect) < tol).all()
--- a/tests/st/ops/gpu/test_erfc_op.py
+++ b/tests/st/ops/gpu/test_erfc_op.py
@ -37,10 +37,21 @@ class NetErfc(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
-def test_exp():
+def test_erfc_fp32():
    erfc = NetErfc()
-    x = np.array([2.0, 3.0, 4.0, 5.0]).astype(np.float32)
+    x = np.random.rand(3, 8).astype(np.float32)
    output = erfc(Tensor(x, dtype=dtype.float32))
    expect = special.erfc(x)
    tol = 1e-6
    assert (np.abs(output.asnumpy() - expect) < tol).all()
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_erfc_fp16():
+    erfc = NetErfc()
+    x = np.random.rand(3, 8).astype(np.float16)
+    output = erfc(Tensor(x, dtype=dtype.float16))
+    expect = special.erfc(x)
+    tol = 1e-3
+    assert (np.abs(output.asnumpy() - expect) < tol).all()