forked from OSchip/llvm-project
AMDGPU: Implement hasBitPreservingFPLogic
llvm-svn: 315754
This commit is contained in:
parent
868783e855
commit
e11d8aca77
|
@ -3107,6 +3107,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
|||
}
|
||||
}
|
||||
|
||||
bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
|
||||
return isTypeLegal(VT.getScalarType());
|
||||
}
|
||||
|
||||
bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
|
||||
// This currently forces unfolding various combinations of fsub into fma with
|
||||
// free fneg'd operands. As long as we have fast FMA (controlled by
|
||||
|
|
|
@ -246,6 +246,8 @@ public:
|
|||
MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const override;
|
||||
|
||||
bool hasBitPreservingFPLogic(EVT VT) const override;
|
||||
bool enableAggressiveFMAFusion(EVT VT) const override;
|
||||
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
|
||||
EVT VT) const override;
|
||||
|
|
|
@ -83,7 +83,7 @@ define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, fl
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fabs_fold:
|
||||
; FUNC-LABEL: {{^}}fabs_fold:
|
||||
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
|
||||
; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
|
||||
; GCN-NOT: and
|
||||
|
@ -95,6 +95,18 @@ define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float
|
|||
ret void
|
||||
}
|
||||
|
||||
; Make sure we turn some integer operations back into fabs
|
||||
; FUNC-LABEL: {{^}}bitpreserve_fabs_f32:
|
||||
; GCN: v_add_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, 1.0
|
||||
define amdgpu_kernel void @bitpreserve_fabs_f32(float addrspace(1)* %out, float %in) {
|
||||
%in.bc = bitcast float %in to i32
|
||||
%int.abs = and i32 %in.bc, 2147483647
|
||||
%bc = bitcast i32 %int.abs to float
|
||||
%fadd = fadd float %bc, 1.0
|
||||
store float %fadd, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @fabs(float) readnone
|
||||
declare float @llvm.fabs.f32(float) readnone
|
||||
declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
|
||||
|
|
|
@ -84,3 +84,15 @@ define amdgpu_kernel void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
|
|||
store float %fmul, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure we turn some integer operations back into fabs
|
||||
; FUNC-LABEL: {{^}}bitpreserve_fneg_f32:
|
||||
; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0
|
||||
define amdgpu_kernel void @bitpreserve_fneg_f32(float addrspace(1)* %out, float %in) {
|
||||
%in.bc = bitcast float %in to i32
|
||||
%int.abs = xor i32 %in.bc, 2147483648
|
||||
%bc = bitcast i32 %int.abs to float
|
||||
%fadd = fmul float %bc, 4.0
|
||||
store float %fadd, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue