From ce84130f8562c8c990362502f03d04187a0be581 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 22 Dec 2016 03:05:37 +0000 Subject: [PATCH] AMDGPU: Implement f16 fcanonicalize llvm-svn: 290300 --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 + llvm/lib/Target/AMDGPU/SIInstructions.td | 5 + llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 172 ++++++++++++++++++ 4 files changed, 181 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index a7a995156e2e..c49866da5d3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -391,6 +391,7 @@ int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP16_ONE = 0x3C00; int FP32_ONE = 0x3f800000; int FP32_NEG_ONE = 0xbf800000; int FP64_ONE = 0x3ff0000000000000; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0558c59782c1..b9302582fa89 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3648,6 +3648,9 @@ SDValue SITargetLowering::performFCanonicalizeCombine( if (VT == MVT::f64 && !Subtarget->hasFP64Denormals()) return DAG.getConstantFP(0.0, SDLoc(N), VT); + + if (VT == MVT::f16 && !Subtarget->hasFP16Denormals()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); } if (C.isNaN()) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 54865956ae2f..bc35c2edc8d3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1021,6 +1021,11 @@ defm : BFMPatterns ; def : BFEPattern ; +def : Pat< + (fcanonicalize f16:$src), + (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0) +>; + def : Pat< (fcanonicalize f32:$src), (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0) diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll new file mode 100644 index 000000000000..fb693de0e39d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -0,0 +1,172 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare half @llvm.canonicalize.f16(half) #0 + +; GCN-LABEL: {{^}}v_test_canonicalize_var_f16: +; GCN: v_mul_f16_e32 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} +; GCN: buffer_store_short [[REG]] +define void @v_test_canonicalize_var_f16(half addrspace(1)* %out) #1 { + %val = load half, half addrspace(1)* %out + %canonicalized = call half @llvm.canonicalize.f16(half %val) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_canonicalize_var_f16: +; GCN: v_mul_f16_e64 [[REG:v[0-9]+]], 1.0, {{s[0-9]+}} +; GCN: buffer_store_short [[REG]] +define void @s_test_canonicalize_var_f16(half addrspace(1)* %out, i16 zeroext %val.arg) #1 { + %val = bitcast i16 %val.arg to half + %canonicalized = call half @llvm.canonicalize.f16(half %val) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_p0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_p0_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0.0) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_n0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_n0_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half -0.0) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_p1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_p1_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 1.0) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_n1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_n1_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half -1.0) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_literal_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4c00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_literal_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 16.0) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_no_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3ff{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_denormals_fold_canonicalize_denormal0_f16(half addrspace(1)* %out) #3 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH03FF) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_no_denormals_fold_canonicalize_denormal1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_no_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_denormals_fold_canonicalize_denormal1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff83ff{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_denormals_fold_canonicalize_denormal1_f16(half addrspace(1)* %out) #3 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH83FF) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7c00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_qnan_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C00) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg1_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_qnan_value_neg1_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -1 to half)) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_qnan_value_neg2_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_qnan_value_neg2_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 -2 to half)) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_snan0_value_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_snan0_value_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH7C01) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_snan1_value_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_snan1_value_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xH7DFF) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_snan2_value_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_snan2_value_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xHFDFF) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}test_fold_canonicalize_snan3_value_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7e00{{$}} +; GCN: buffer_store_short [[REG]] +define void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 { + %canonicalized = call half @llvm.canonicalize.f16(half 0xHFC01) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } +attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" } +attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" }