forked from OSchip/llvm-project
AMDGPU: Conversions always produce canonical results
Not sure why this was checking for denormals for f16. My interpretation of the IEEE standard is conversions should produce a canonical result, and the ISA manual says denormals are created when appropriate. llvm-svn: 339064
This commit is contained in:
parent
ddbabc6b7c
commit
ce6d61fba8
|
@ -6775,16 +6775,11 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
|
|||
case ISD::FSQRT:
|
||||
case ISD::FDIV:
|
||||
case ISD::FREM:
|
||||
case ISD::FP_ROUND:
|
||||
case ISD::FP_EXTEND:
|
||||
case AMDGPUISD::FMUL_LEGACY:
|
||||
case AMDGPUISD::FMAD_FTZ:
|
||||
return true;
|
||||
case ISD::FP_ROUND:
|
||||
return Op.getValueType().getScalarType() != MVT::f16 ||
|
||||
Subtarget->hasFP16Denormals();
|
||||
|
||||
case ISD::FP_EXTEND:
|
||||
return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 ||
|
||||
Subtarget->hasFP16Denormals();
|
||||
|
||||
// It can/will be lowered or combined as a bit operation.
|
||||
// Need to check their input recursively to handle.
|
||||
|
|
|
@ -215,6 +215,22 @@ define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half ad
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
|
||||
; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
||||
; GCN-NOT: v_mul
|
||||
; GCN-NOT: v_max
|
||||
; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
|
||||
define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
|
||||
%load = load half, half addrspace(1)* %gep, align 2
|
||||
%v = fpext half %load to float
|
||||
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
|
||||
%gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
|
||||
store float %canonicalized, float addrspace(1)* %gep2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
|
||||
; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
|
||||
; GCN-NOT: v_mul
|
||||
|
@ -233,8 +249,9 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double a
|
|||
|
||||
; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
|
||||
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
||||
; GCN-NOT: v_max
|
||||
; GCN-NOT: v_mul
|
||||
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
|
||||
; GCN-NOT: 1.0
|
||||
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
||||
|
@ -246,6 +263,22 @@ define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float ad
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
|
||||
; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
|
||||
; GCN-NOT: v_max
|
||||
; GCN-NOT: v_mul
|
||||
; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
|
||||
define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 {
|
||||
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
|
||||
%load = load float, float addrspace(1)* %gep, align 4
|
||||
%v = fptrunc float %load to half
|
||||
%canonicalized = tail call half @llvm.canonicalize.f16(half %v)
|
||||
%gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
|
||||
store half %canonicalized, half addrspace(1)* %gep2, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
|
||||
; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
|
||||
; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
|
||||
|
@ -738,3 +771,4 @@ declare double @llvm.maxnum.f64(double, double) #0
|
|||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { "no-nans-fp-math"="true" }
|
||||
attributes #2 = { "target-features"="-fp64-fp16-denormals" }
|
||||
|
|
Loading…
Reference in New Issue