diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2bc3d7fa5089..82d1bc270a49 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3107,6 +3107,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } } +bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const { + return isTypeLegal(VT.getScalarType()); +} + bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const { // This currently forces unfolding various combinations of fsub into fma with // free fneg'd operands. As long as we have fast FMA (controlled by diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 91380f8c5885..3e1d0a4a1f36 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -246,6 +246,8 @@ public: MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + + bool hasBitPreservingFPLogic(EVT VT) const override; bool enableAggressiveFMAFusion(EVT VT) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll index 600c6cd8230e..550ad7956c92 100644 --- a/llvm/test/CodeGen/AMDGPU/fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fabs.ll @@ -83,7 +83,7 @@ define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, fl ret void } -; GCN-LABEL: {{^}}fabs_fold: +; FUNC-LABEL: {{^}}fabs_fold: ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; GCN-NOT: and @@ -95,6 +95,18 @@ define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float ret void } +; Make sure we turn some integer operations back into fabs +; FUNC-LABEL: {{^}}bitpreserve_fabs_f32: +; GCN: v_add_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, 1.0 +define amdgpu_kernel void @bitpreserve_fabs_f32(float addrspace(1)* %out, float %in) { + %in.bc = bitcast float %in to i32 + %int.abs = and i32 %in.bc, 2147483647 + %bc = bitcast i32 %int.abs to float + %fadd = fadd float %bc, 1.0 + store float %fadd, float addrspace(1)* %out + ret void +} + declare float @fabs(float) readnone declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone diff --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll index d1eabfb13c9a..94ec61622bd2 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.ll @@ -84,3 +84,15 @@ define amdgpu_kernel void @fneg_fold_f32(float addrspace(1)* %out, float %in) { store float %fmul, float addrspace(1)* %out ret void } + +; Make sure we turn some integer operations back into fabs +; FUNC-LABEL: {{^}}bitpreserve_fneg_f32: +; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0 +define amdgpu_kernel void @bitpreserve_fneg_f32(float addrspace(1)* %out, float %in) { + %in.bc = bitcast float %in to i32 + %int.abs = xor i32 %in.bc, 2147483648 + %bc = bitcast i32 %int.abs to float + %fadd = fmul float %bc, 4.0 + store float %fadd, float addrspace(1)* %out + ret void +}