[FPEnv] Don't transform FSUB(-0,X)->FNEG(X) in SelectionDAGBuilder.

This patch stops unconditionally transforming FSUB(-0,X) into an FNEG(X) while building the DAG. There is also one small change to handle the new FSUB(-0,X) similarly to FNEG(X) in the AMDGPU backend.

Differential Revision: https://reviews.llvm.org/D84056
This commit is contained in:
Cameron McInally 2020-08-03 10:19:33 -05:00
parent 10851f9db5
commit 31c7a2fd5c
12 changed files with 155 additions and 162 deletions

View File

@ -3005,20 +3005,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
Type *Ty = I.getType();
if (isa<Constant>(I.getOperand(0)) &&
I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
SDValue Op2 = getValue(I.getOperand(1));
setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
Op2.getValueType(), Op2));
return;
}
visitBinary(I, ISD::FSUB);
}
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;

View File

@ -692,7 +692,7 @@ private:
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
void visitFSub(const User &I);
void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
void visitURem(const User &I) { visitBinary(I, ISD::UREM); }

View File

@ -3795,8 +3795,15 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
if (Res.getOpcode() != AMDGPUISD::FMED3)
return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
if (!N0.hasOneUse()) {
SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
DAG.ReplaceAllUsesWith(N0, Neg);
for (SDNode *U : Neg->uses())
DCI.AddToWorklist(U);
}
return Res;
}
case ISD::FP_EXTEND:

View File

@ -311,7 +311,7 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrsp
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
%load = load float, float addrspace(1)* %gep, align 4
%v = fsub float -0.0, %load
%v = fneg float %load
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
store float %canonicalized, float addrspace(1)* %gep, align 4
ret void
@ -327,7 +327,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace
%gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
%load = load float, float addrspace(1)* %gep, align 4
%v0 = fadd float %load, 0.0
%v = fsub float -0.0, %v0
%v = fneg float %v0
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
store float %canonicalized, float addrspace(1)* %gep, align 4
ret void

View File

@ -77,7 +77,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* %
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fabs = call half @llvm.fabs.f16(half %val)
%val.fabs.fneg = fsub half -0.0, %val.fabs
%val.fabs.fneg = fneg half %val.fabs
%canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
@ -91,7 +91,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(
; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
%val = load half, half addrspace(1)* %out
%val.fneg = fsub half -0.0, %val
%val.fneg = fneg half %val
%canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
@ -103,7 +103,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %
; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
%val = load half, half addrspace(1)* %out
%val.fneg = fsub half -0.0, %val
%val.fneg = fneg half %val
%canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half ad
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 {
%val = load half, half addrspace(1)* %out
%val.fabs = call half @llvm.fabs.f16(half %val)
%val.fabs.fneg = fsub half -0.0, %val.fabs
%val.fabs.fneg = fneg half %val.fabs
%canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
store half %canonicalized, half addrspace(1)* %out
ret void
@ -323,7 +323,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
%val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
%val.fabs.fneg = fneg <2 x half> %val.fabs
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
ret void
@ -340,7 +340,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
%val = load <2 x half>, <2 x half> addrspace(1)* %gep
%fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
%fneg.val = fneg <2 x half> %val
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
ret void

View File

@ -57,7 +57,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)*
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out
%val.fabs = call float @llvm.fabs.f32(float %val)
%val.fabs.fneg = fsub float -0.0, %val.fabs
%val.fabs.fneg = fneg float %val.fabs
%canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
store float %canonicalized, float addrspace(1)* %out
ret void
@ -69,7 +69,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
%val = load float, float addrspace(1)* %out
%val.fneg = fsub float -0.0, %val
%val.fneg = fneg float %val
%canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
store float %canonicalized, float addrspace(1)* %out
ret void
@ -264,7 +264,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)*
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out
%val.fabs = call double @llvm.fabs.f64(double %val)
%val.fabs.fneg = fsub double -0.0, %val.fabs
%val.fabs.fneg = fneg double %val.fabs
%canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
store double %canonicalized, double addrspace(1)* %out
ret void
@ -275,7 +275,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspac
; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
%val = load double, double addrspace(1)* %out
%val.fneg = fsub double -0.0, %val
%val.fneg = fneg double %val
%canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
store double %canonicalized, double addrspace(1)* %out
ret void

View File

@ -311,7 +311,7 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
; GCN: global_store_dwordx4
define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
%load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
%neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
%neg = fneg <4 x float> %load
%div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0
store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
ret void

View File

@ -256,7 +256,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(half addrspace(1)* %out,
%r1 = load volatile half, half addrspace(1)* %gep.0
%r2 = load volatile half, half addrspace(1)* %gep.1
%r1.fneg = fsub half -0.000000e+00, %r1
%r1.fneg = fneg half %r1
%r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2)
store half %r3, half addrspace(1)* %gep.out
@ -287,7 +287,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(half addrspace(1)* %out, half
%r1 = load volatile half, half addrspace(1)* %gep.0
%r2 = load volatile half, half addrspace(1)* %gep.1
%r1.fneg = fsub half -0.000000e+00, %r1
%r1.fneg = fneg half %r1
%r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2)
store half %r3, half addrspace(1)* %gep.out
@ -312,7 +312,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(half addrspace(1)* %out, half
%r1 = load volatile half, half addrspace(1)* %gep.0
%r2 = load volatile half, half addrspace(1)* %gep.1
%r2.fneg = fsub half -0.000000e+00, %r2
%r2.fneg = fneg half %r2
%r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2.fneg)
store half %r3, half addrspace(1)* %gep.out
@ -494,8 +494,8 @@ define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture
%a = load volatile half, half addrspace(1)* %gep0, align 2
%b = load volatile half, half addrspace(1)* %gep1, align 2
%c = load volatile half, half addrspace(1)* %gep2, align 2
%nega = fsub half -0.000000e+00, %a
%negb = fsub half -0.000000e+00, %b
%nega = fneg half %a
%negb = fneg half %b
%mul = fmul half %nega, %negb
%sub = fadd half %mul, %c
store half %sub, half addrspace(1)* %outgep, align 2

View File

@ -274,7 +274,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
%r1 = load volatile float, float addrspace(1)* %gep.0
%r2 = load volatile float, float addrspace(1)* %gep.1
%r1.fneg = fsub float -0.000000e+00, %r1
%r1.fneg = fneg float %r1
%r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
store float %r3, float addrspace(1)* %gep.out
@ -307,7 +307,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
%r1 = load volatile float, float addrspace(1)* %gep.0
%r2 = load volatile float, float addrspace(1)* %gep.1
%r1.fneg = fsub float -0.000000e+00, %r1
%r1.fneg = fneg float %r1
%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
store float %r3, float addrspace(1)* %gep.out
@ -339,7 +339,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, flo
%r1 = load volatile float, float addrspace(1)* %gep.0
%r2 = load volatile float, float addrspace(1)* %gep.1
%r2.fneg = fsub float -0.000000e+00, %r2
%r2.fneg = fneg float %r2
%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
store float %r3, float addrspace(1)* %gep.out
@ -517,8 +517,8 @@ define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture
%a = load volatile float, float addrspace(1)* %gep0, align 4
%b = load volatile float, float addrspace(1)* %gep1, align 4
%c = load volatile float, float addrspace(1)* %gep2, align 4
%nega = fsub float -0.000000e+00, %a
%negb = fsub float -0.000000e+00, %b
%nega = fneg float %a
%negb = fneg float %b
%mul = fmul float %nega, %negb
%sub = fadd float %mul, %c
store float %sub, float addrspace(1)* %outgep, align 4

View File

@ -26,7 +26,7 @@ define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrsp
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%add = fadd float %a, %b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -47,7 +47,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%add = fadd float %a, %b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
store volatile float %add, float addrspace(1)* %out
ret void
@ -75,7 +75,7 @@ define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%add = fadd float %a, %b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
%use1 = fmul float %add, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -100,9 +100,9 @@ define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%add = fadd float %fneg.a, %b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -124,9 +124,9 @@ define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.b = fsub float -0.000000e+00, %b
%fneg.b = fneg float %b
%add = fadd float %a, %fneg.b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -148,10 +148,10 @@ define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, fl
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.b = fsub float -0.000000e+00, %b
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%add = fadd float %fneg.a, %fneg.b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -177,9 +177,9 @@ define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%add = fadd float %fneg.a, %b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
@ -205,9 +205,9 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%add = fadd float %fneg.a, %b
%fneg = fsub float -0.000000e+00, %add
%fneg = fneg float %add
%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -226,7 +226,7 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
%.i188 = fadd float %tmp9, 0.000000e+00
%tmp10 = fcmp uge float %.i188, %tmp2
%tmp11 = fsub float -0.000000e+00, %.i188
%tmp11 = fneg float %.i188
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
%tmp12 = fcmp ule float %.i092, 0.000000e+00
%.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
@ -249,7 +249,7 @@ define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
%.i188 = fadd float %tmp9, 0.000000e+00
%tmp10 = fcmp uge float %.i188, %tmp2
%tmp11 = fsub float -0.000000e+00, %.i188
%tmp11 = fneg float %.i188
%.i092 = select i1 %tmp10, float %tmp2, float %tmp11
%tmp12 = fcmp ule float %.i092, 0.000000e+00
%.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
@ -274,7 +274,7 @@ define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrsp
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -295,7 +295,7 @@ define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %mul, float addrspace(1)* %out
ret void
@ -318,7 +318,7 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = fmul float %a, %b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
%use1 = fmul float %mul, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -338,9 +338,9 @@ define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%mul = fmul float %fneg.a, %b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -358,9 +358,9 @@ define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.b = fsub float -0.000000e+00, %b
%fneg.b = fneg float %b
%mul = fmul float %a, %fneg.b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -378,10 +378,10 @@ define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, fl
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.b = fsub float -0.000000e+00, %b
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%mul = fmul float %fneg.a, %fneg.b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -402,9 +402,9 @@ define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%mul = fmul float %fneg.a, %b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
@ -425,9 +425,9 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%mul = fmul float %fneg.a, %b
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -454,7 +454,7 @@ define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, floa
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -466,7 +466,7 @@ define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, floa
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
ret float %fneg
}
@ -509,7 +509,7 @@ define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 4.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -520,7 +520,7 @@ define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out,
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float 4.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
ret float %fneg
}
@ -536,7 +536,7 @@ define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float -4.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -547,7 +547,7 @@ define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out,
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float -4.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
ret float %fneg
}
@ -562,7 +562,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float a
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 0.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -579,7 +579,7 @@ define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float -0.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -602,7 +602,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, fl
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -624,7 +624,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -730,7 +730,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %ou
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
%min = call float @llvm.minnum.f32(float -0.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
ret float %fneg
}
@ -750,7 +750,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float 0.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
%mul = fmul float %fneg, %b
store float %mul, float addrspace(1)* %out.gep
ret void
@ -779,7 +779,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
%mul = fmul float %fneg, %b
store float %mul, float addrspace(1)* %out.gep
ret void
@ -793,7 +793,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
%min = call float @llvm.minnum.f32(float 0.0, float %a)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
%mul = fmul float %fneg, %b
ret float %mul
}
@ -816,7 +816,7 @@ define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspa
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
%use1 = fmul float %min, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -831,7 +831,7 @@ define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspa
; GCN-NEXT: ; return
define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
%min = call float @llvm.minnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %min
%fneg = fneg float %min
%use1 = fmul float %min, 4.0
%ins0 = insertelement <2 x float> undef, float %fneg, i32 0
%ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
@ -859,7 +859,7 @@ define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, floa
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -871,7 +871,7 @@ define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, floa
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
ret float %fneg
}
@ -914,7 +914,7 @@ define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float 4.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -925,7 +925,7 @@ define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out,
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float 4.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
ret float %fneg
}
@ -941,7 +941,7 @@ define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float -4.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -952,7 +952,7 @@ define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out,
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float -4.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
ret float %fneg
}
@ -967,7 +967,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float a
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -984,7 +984,7 @@ define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%max = call float @llvm.maxnum.f32(float -0.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -995,7 +995,7 @@ define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out,
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
%max = call float @llvm.maxnum.f32(float -0.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
ret float %fneg
}
@ -1015,7 +1015,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
%mul = fmul float %fneg, %b
store float %mul, float addrspace(1)* %out.gep
ret void
@ -1029,7 +1029,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace
; GCN-NEXT: ; return
define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
%max = call float @llvm.maxnum.f32(float 0.0, float %a)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
%mul = fmul float %fneg, %b
ret float %mul
}
@ -1052,7 +1052,7 @@ define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspa
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
%use1 = fmul float %max, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -1067,7 +1067,7 @@ define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspa
; GCN-NEXT: ; return
define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
%max = call float @llvm.maxnum.f32(float %a, float %b)
%fneg = fsub float -0.000000e+00, %max
%fneg = fneg float %max
%use1 = fmul float %max, 4.0
%ins0 = insertelement <2 x float> undef, float %fneg, i32 0
%ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
@ -1099,7 +1099,7 @@ define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrsp
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1123,7 +1123,7 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fma, float addrspace(1)* %out
ret void
@ -1154,7 +1154,7 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
%use1 = fmul float %fma, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -1182,9 +1182,9 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, flo
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1210,9 +1210,9 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, flo
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.b = fsub float -0.000000e+00, %b
%fneg.b = fneg float %b
%fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1238,10 +1238,10 @@ define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out,
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.b = fsub float -0.000000e+00, %b
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1267,10 +1267,10 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out,
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.c = fsub float -0.000000e+00, %c
%fneg.a = fneg float %a
%fneg.c = fneg float %c
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1296,9 +1296,9 @@ define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, flo
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.c = fsub float -0.000000e+00, %c
%fneg.c = fneg float %c
%fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1330,9 +1330,9 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)*
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
@ -1360,9 +1360,9 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)*
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
%use1 = fmul float %fneg.a, %d
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -1394,7 +1394,7 @@ define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrs
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1446,7 +1446,7 @@ define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %o
%b = load volatile float, float addrspace(1)* %b.gep
%c = load volatile float, float addrspace(1)* %c.gep
%fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
%fneg = fsub float -0.000000e+00, %fma
%fneg = fneg float %fma
%use1 = fmul float %fma, 4.0
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -1483,7 +1483,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)*
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fpext = fpext float %fneg.a to double
%fneg = fsub double -0.000000e+00, %fpext
store double %fneg, double addrspace(1)* %out.gep
@ -1502,7 +1502,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double add
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fpext = fpext float %fneg.a to double
%fneg = fsub double -0.000000e+00, %fpext
store volatile double %fneg, double addrspace(1)* %out.gep
@ -1559,7 +1559,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addr
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile half, half addrspace(1)* %a.gep
%fpext = fpext half %a to float
%fneg = fsub float -0.000000e+00, %fpext
%fneg = fneg float %fpext
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %fpext, float addrspace(1)* %out.gep
ret void
@ -1573,7 +1573,7 @@ define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(f
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile half, half addrspace(1)* %a.gep
%fpext = fpext half %a to float
%fneg = fsub float -0.000000e+00, %fpext
%fneg = fneg float %fpext
%mul = fmul float %fpext, 4.0
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %mul, float addrspace(1)* %out.gep
@ -1595,7 +1595,7 @@ define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out,
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fpround = fptrunc double %a to float
%fneg = fsub float -0.000000e+00, %fpround
%fneg = fneg float %fpround
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1612,7 +1612,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %
%a = load volatile double, double addrspace(1)* %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fsub float -0.000000e+00, %fpround
%fneg = fneg float %fpround
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1631,7 +1631,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrs
%a = load volatile double, double addrspace(1)* %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fsub float -0.000000e+00, %fpround
%fneg = fneg float %fpround
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile double %fneg.a, double addrspace(1)* undef
ret void
@ -1652,7 +1652,7 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrs
%a = load volatile double, double addrspace(1)* %a.gep
%fneg.a = fsub double -0.000000e+00, %a
%fpround = fptrunc double %fneg.a to float
%fneg = fsub float -0.000000e+00, %fpround
%fneg = fneg float %fpround
%use1 = fmul double %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile double %use1, double addrspace(1)* undef
@ -1685,7 +1685,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %o
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fpround = fptrunc float %fneg.a to half
%fneg = fsub half -0.000000e+00, %fpround
store half %fneg, half addrspace(1)* %out.gep
@ -1705,7 +1705,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrs
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile double, double addrspace(1)* %a.gep
%fpround = fptrunc double %a to float
%fneg = fsub float -0.000000e+00, %fpround
%fneg = fneg float %fpround
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %fpround, float addrspace(1)* %out.gep
ret void
@ -1723,7 +1723,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrsp
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fpround = fptrunc float %fneg.a to half
%fneg = fsub half -0.000000e+00, %fpround
store volatile half %fneg, half addrspace(1)* %out.gep
@ -1743,7 +1743,7 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrsp
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%fpround = fptrunc float %fneg.a to half
%fneg = fsub half -0.000000e+00, %fpround
%use1 = fmul float %fneg.a, %c
@ -1767,7 +1767,7 @@ define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrsp
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%rcp = call float @llvm.amdgcn.rcp.f32(float %a)
%fneg = fsub float -0.000000e+00, %rcp
%fneg = fneg float %rcp
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1782,9 +1782,9 @@ define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float a
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
%fneg = fsub float -0.000000e+00, %rcp
%fneg = fneg float %rcp
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1801,9 +1801,9 @@ define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %ou
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
%fneg = fsub float -0.000000e+00, %rcp
%fneg = fneg float %rcp
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %fneg.a, float addrspace(1)* undef
ret void
@ -1821,9 +1821,9 @@ define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %ou
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
%fneg = fsub float -0.000000e+00, %rcp
%fneg = fneg float %rcp
%use1 = fmul float %fneg.a, %c
store volatile float %fneg, float addrspace(1)* %out.gep
store volatile float %use1, float addrspace(1)* undef
@ -1848,7 +1848,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store float %fneg, float addrspace(1)* %out.gep
ret void
}
@ -1869,7 +1869,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addr
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %mul, float addrspace(1)* %out
ret void
@ -1891,7 +1891,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addr
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
%use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -1911,9 +1911,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1931,9 +1931,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.b = fsub float -0.000000e+00, %b
%fneg.b = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1951,10 +1951,10 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.b = fsub float -0.000000e+00, %b
%fneg.a = fneg float %a
%fneg.b = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
ret void
}
@ -1974,9 +1974,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspac
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
store volatile float %fneg, float addrspace(1)* %out
store volatile float %fneg.a, float addrspace(1)* %out
ret void
@ -1997,9 +1997,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspac
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%b = load volatile float, float addrspace(1)* %b.gep
%fneg.a = fsub float -0.000000e+00, %a
%fneg.a = fneg float %a
%mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
%fneg = fsub float -0.000000e+00, %mul
%fneg = fneg float %mul
%use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
store volatile float %fneg, float addrspace(1)* %out
store volatile float %use1, float addrspace(1)* %out
@ -2023,7 +2023,7 @@ define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrsp
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
%a = load volatile float, float addrspace(1)* %a.gep
%sin = call float @llvm.sin.f32(float %a)
%fneg = fsub float -0.000000e+00, %sin
%fneg = fneg float %sin
store float %fneg, float addrspace(1)* %out.gep
ret void
}

View File

@ -11,7 +11,7 @@ define amdgpu_kernel void @test_a(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 0.000000e+00
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
%4 = bitcast i32 %3 to float
%5 = bitcast float %4 to i32
@ -39,7 +39,7 @@ define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 0.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
%4 = bitcast i32 %3 to float
%5 = bitcast float %4 to i32

View File

@ -12,7 +12,7 @@ define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
ret void
@ -38,7 +38,7 @@ define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
ret void
@ -64,7 +64,7 @@ define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
ret void
@ -90,7 +90,7 @@ define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
ret void
@ -116,7 +116,7 @@ define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
ret void
@ -142,7 +142,7 @@ define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fsub float -0.000000e+00, %1
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
ret void