From ce6d61fba83d926c8dfacedae4d25c44e28ab893 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 6 Aug 2018 21:51:52 +0000 Subject: [PATCH] AMDGPU: Conversions always produce canonical results Not sure why this was checking for denormals for f16. My interpretation of the IEEE standard is conversions should produce a canonical result, and the ISA manual says denormals are created when appropriate. llvm-svn: 339064 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 ++--- .../AMDGPU/fcanonicalize-elimination.ll | 36 ++++++++++++++++++- 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 22d33f568e86..c598c6d69ccf 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6775,16 +6775,11 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op, case ISD::FSQRT: case ISD::FDIV: case ISD::FREM: + case ISD::FP_ROUND: + case ISD::FP_EXTEND: case AMDGPUISD::FMUL_LEGACY: case AMDGPUISD::FMAD_FTZ: return true; - case ISD::FP_ROUND: - return Op.getValueType().getScalarType() != MVT::f16 || - Subtarget->hasFP16Denormals(); - - case ISD::FP_EXTEND: - return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 || - Subtarget->hasFP16Denormals(); // It can/will be lowered or combined as a bit operation. // Need to check their input recursively to handle. diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index 4005c4d94cc8..e3748e650b39 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -215,6 +215,22 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half ad ret void } +; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16: +; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}} +; GCN-NOT: v_mul +; GCN-NOT: v_max +; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] +define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id + %load = load half, half addrspace(1)* %gep, align 2 + %v = fpext half %load to float + %canonicalized = tail call float @llvm.canonicalize.f32(float %v) + %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id + store float %canonicalized, float addrspace(1)* %gep2, align 4 + ret void +} + ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64: ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}] ; GCN-NOT: v_mul @@ -233,8 +249,9 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double a ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32: ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} +; GCN-NOT: v_max +; GCN-NOT: v_mul ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] -; GCN-NOT: 1.0 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id @@ -246,6 +263,22 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float ad ret void } +; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16: +; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} +; GCN-NOT: v_max +; GCN-NOT: v_mul +; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] +define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id + %load = load float, float addrspace(1)* %gep, align 4 + %v = fptrunc float %load to half + %canonicalized = tail call half @llvm.canonicalize.f16(half %v) + %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id + store half %canonicalized, half addrspace(1)* %gep2, align 2 + ret void +} + ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32: ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}} @@ -738,3 +771,4 @@ declare double @llvm.maxnum.f64(double, double) #0 attributes #0 = { nounwind readnone } attributes #1 = { "no-nans-fp-math"="true" } +attributes #2 = { "target-features"="-fp64-fp16-denormals" }