forked from OSchip/llvm-project
[X86] Don't fold (fneg (fma (fneg X), Y, (fneg Z))) to (fma X, Y, Z)
Check if it has no signed zeros flag (nsz) in getNegatedExpression for x86. This patch fixed miscompilation: https://alive2.llvm.org/ce/z/XxwBAJ Reviewed By: RKSimon, spatel Differential Revision: https://reviews.llvm.org/D90901
This commit is contained in:
parent
35e5c3310f
commit
4456805938
|
@ -47045,6 +47045,7 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
EVT VT = Op.getValueType();
|
||||
EVT SVT = VT.getScalarType();
|
||||
unsigned Opc = Op.getOpcode();
|
||||
SDNodeFlags Flags = Op.getNode()->getFlags();
|
||||
switch (Opc) {
|
||||
case ISD::FMA:
|
||||
case X86ISD::FMSUB:
|
||||
|
@ -47059,6 +47060,11 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
|
|||
!isOperationLegal(ISD::FMA, VT))
|
||||
break;
|
||||
|
||||
// Don't fold (fneg (fma (fneg x), y, (fneg z))) to (fma x, y, z)
|
||||
// if it may have signed zeros.
|
||||
if (!Flags.hasNoSignedZeros())
|
||||
break;
|
||||
|
||||
// This is always negatible for free but we might be able to remove some
|
||||
// extra operand negations as well.
|
||||
SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
|
||||
|
|
|
@ -20,7 +20,7 @@ define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
|||
; X64-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
|
||||
; X64-NEXT: retq
|
||||
%sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c
|
||||
%r = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
|
||||
%r = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
|
||||
ret <8 x float> %r
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
; X64: # %bb.0:
|
||||
; X64-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
||||
; X64-NEXT: retq
|
||||
%t0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
|
||||
%t0 = tail call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
|
||||
%sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %t0
|
||||
ret <4 x float> %sub.i
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|||
%b0 = extractelement <4 x float> %b, i64 0
|
||||
%c0 = extractelement <4 x float> %c, i64 0
|
||||
%negb0 = fneg float %b0
|
||||
%t0 = tail call float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2
|
||||
%t0 = tail call nsz float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2
|
||||
%i = insertelement <4 x float> %a, float %t0, i64 0
|
||||
%sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %i
|
||||
ret <4 x float> %sub.i
|
||||
|
@ -74,7 +74,7 @@ define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
|||
; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
||||
; X64-NEXT: retq
|
||||
%negc = fneg <8 x float> %c
|
||||
%t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2
|
||||
%t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2
|
||||
%sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t0
|
||||
ret <8 x float> %sub.i
|
||||
}
|
||||
|
@ -91,7 +91,7 @@ define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
|||
; X64-NEXT: retq
|
||||
%sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c
|
||||
%negsubc = fneg <8 x float> %sub.c
|
||||
%t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2
|
||||
%t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2
|
||||
ret <8 x float> %t0
|
||||
}
|
||||
|
||||
|
@ -105,7 +105,7 @@ define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|||
; X64: # %bb.0:
|
||||
; X64-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
||||
; X64-NEXT: retq
|
||||
%t0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
|
||||
%t0 = tail call nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
|
||||
%sub.i = fsub <2 x double> <double -0.0, double -0.0>, %t0
|
||||
ret <2 x double> %sub.i
|
||||
}
|
||||
|
@ -125,7 +125,7 @@ define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c) {
|
|||
%t0 = insertelement <8 x float> undef, float %a, i32 0
|
||||
%t1 = fsub <8 x float> <float -0.0, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %t0
|
||||
%t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer
|
||||
%t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
|
||||
%t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
|
||||
ret <8 x float> %t3
|
||||
|
||||
}
|
||||
|
@ -145,6 +145,6 @@ define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c) {
|
|||
%t0 = fsub float -0.0, %a
|
||||
%t1 = insertelement <8 x float> undef, float %t0, i32 0
|
||||
%t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer
|
||||
%t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
|
||||
%t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
|
||||
ret <8 x float> %t3
|
||||
}
|
||||
|
|
|
@ -29,10 +29,17 @@ define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
|||
}
|
||||
|
||||
define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
|
||||
; CHECK-NEXT: retq
|
||||
; SKX-LABEL: test2:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test2:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
||||
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
%fma = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
|
||||
%neg = fneg <16 x float> %fma
|
||||
ret <16 x float> %neg
|
||||
|
@ -49,10 +56,17 @@ define <16 x float> @test2_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
|
|||
}
|
||||
|
||||
define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
|
||||
; CHECK-NEXT: retq
|
||||
; SKX-LABEL: test3:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test3:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
|
||||
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
%t0 = fneg <16 x float> %b
|
||||
%t1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c)
|
||||
%sub.i = fneg <16 x float> %t1
|
||||
|
@ -71,10 +85,17 @@ define <16 x float> @test3_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
|
|||
}
|
||||
|
||||
define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
||||
; CHECK-NEXT: retq
|
||||
; SKX-LABEL: test4:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test4:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
|
||||
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
%t0 = fneg <16 x float> %b
|
||||
%t1 = fneg <16 x float> %c
|
||||
%t2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1)
|
||||
|
@ -106,10 +127,17 @@ entry:
|
|||
}
|
||||
|
||||
define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
; SKX-LABEL: test6:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test6:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
|
||||
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
%t0 = fneg <16 x float> %b
|
||||
%t1 = fneg <16 x float> %c
|
||||
%t2 = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10)
|
||||
|
@ -130,10 +158,18 @@ define <16 x float> @test6_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
|
|||
}
|
||||
|
||||
define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
||||
; CHECK-NEXT: retq
|
||||
; SKX-LABEL: test7:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
|
||||
; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test7:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
|
||||
; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
||||
; KNL-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
%t0 = fneg <8 x float> %c
|
||||
%t1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0)
|
||||
%sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t1
|
||||
|
@ -163,10 +199,17 @@ entry:
|
|||
}
|
||||
|
||||
define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
|
||||
; CHECK-LABEL: test9:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
|
||||
; CHECK-NEXT: retq
|
||||
; SKX-LABEL: test9:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
||||
; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test9:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
||||
; KNL-NEXT: vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
%t0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4)
|
||||
%sub.i = fneg <8 x double> %t0
|
||||
ret <8 x double> %sub.i
|
||||
|
|
|
@ -9,7 +9,8 @@ declare float @llvm.fma.f32(float, float, float)
|
|||
define float @fneg_fma32(float %x, float %y, float %z) {
|
||||
; CHECK-LABEL: fneg_fma32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
||||
; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%negx = fneg float %x
|
||||
%negz = fneg float %z
|
||||
|
@ -35,7 +36,8 @@ declare double @llvm.fma.f64(double, double, double)
|
|||
define double @fneg_fma64(double %x, double %y, double %z) {
|
||||
; CHECK-LABEL: fneg_fma64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
|
||||
; CHECK-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
||||
; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%negx = fneg double %x
|
||||
%negz = fneg double %z
|
||||
|
|
|
@ -1308,10 +1308,10 @@ define float @test_f32_interp(float %x, float %y, float %t) {
|
|||
; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub float 1.0, %t
|
||||
%tx = fmul float %x, %t
|
||||
%ty = fmul float %y, %t1
|
||||
%r = fadd float %tx, %ty
|
||||
%t1 = fsub nsz float 1.0, %t
|
||||
%tx = fmul nsz float %x, %t
|
||||
%ty = fmul nsz float %y, %t1
|
||||
%r = fadd nsz float %tx, %ty
|
||||
ret float %r
|
||||
}
|
||||
|
||||
|
@ -1357,10 +1357,10 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
|
|||
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
%tx = fmul <4 x float> %x, %t
|
||||
%ty = fmul <4 x float> %y, %t1
|
||||
%r = fadd <4 x float> %tx, %ty
|
||||
%t1 = fsub nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
%tx = fmul nsz <4 x float> %x, %t
|
||||
%ty = fmul nsz <4 x float> %y, %t1
|
||||
%r = fadd nsz <4 x float> %tx, %ty
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
|
@ -1406,10 +1406,10 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
|
|||
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
%tx = fmul <8 x float> %x, %t
|
||||
%ty = fmul <8 x float> %y, %t1
|
||||
%r = fadd <8 x float> %tx, %ty
|
||||
%t1 = fsub nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
%tx = fmul nsz <8 x float> %x, %t
|
||||
%ty = fmul nsz <8 x float> %y, %t1
|
||||
%r = fadd nsz <8 x float> %tx, %ty
|
||||
ret <8 x float> %r
|
||||
}
|
||||
|
||||
|
@ -1455,10 +1455,10 @@ define double @test_f64_interp(double %x, double %y, double %t) {
|
|||
; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub double 1.0, %t
|
||||
%tx = fmul double %x, %t
|
||||
%ty = fmul double %y, %t1
|
||||
%r = fadd double %tx, %ty
|
||||
%t1 = fsub nsz double 1.0, %t
|
||||
%tx = fmul nsz double %x, %t
|
||||
%ty = fmul nsz double %y, %t1
|
||||
%r = fadd nsz double %tx, %ty
|
||||
ret double %r
|
||||
}
|
||||
|
||||
|
@ -1504,10 +1504,10 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
|
|||
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
|
||||
%tx = fmul <2 x double> %x, %t
|
||||
%ty = fmul <2 x double> %y, %t1
|
||||
%r = fadd <2 x double> %tx, %ty
|
||||
%t1 = fsub nsz <2 x double> <double 1.0, double 1.0>, %t
|
||||
%tx = fmul nsz <2 x double> %x, %t
|
||||
%ty = fmul nsz <2 x double> %y, %t1
|
||||
%r = fadd nsz <2 x double> %tx, %ty
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
|
@ -1553,10 +1553,10 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
|
|||
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
|
||||
%tx = fmul <4 x double> %x, %t
|
||||
%ty = fmul <4 x double> %y, %t1
|
||||
%r = fadd <4 x double> %tx, %ty
|
||||
%t1 = fsub nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
|
||||
%tx = fmul nsz <4 x double> %x, %t
|
||||
%ty = fmul nsz <4 x double> %y, %t1
|
||||
%r = fadd nsz <4 x double> %tx, %ty
|
||||
ret <4 x double> %r
|
||||
}
|
||||
|
||||
|
@ -1579,9 +1579,9 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <4 x float> %a0, %a1
|
||||
%add = fadd <4 x float> %mul, %a2
|
||||
%neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
%mul = fmul nsz <4 x float> %a0, %a1
|
||||
%add = fadd nsz <4 x float> %mul, %a2
|
||||
%neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
ret <4 x float> %neg
|
||||
}
|
||||
|
||||
|
@ -1600,9 +1600,9 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <4 x double> %a0, %a1
|
||||
%sub = fsub <4 x double> %mul, %a2
|
||||
%neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
%mul = fmul nsz <4 x double> %a0, %a1
|
||||
%sub = fsub nsz <4 x double> %mul, %a2
|
||||
%neg = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
ret <4 x double> %neg
|
||||
}
|
||||
|
||||
|
@ -1621,10 +1621,10 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <4 x float> %a0, %a1
|
||||
%neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
|
||||
%add = fadd <4 x float> %neg0, %a2
|
||||
%neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
%mul = fmul nsz <4 x float> %a0, %a1
|
||||
%neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
|
||||
%add = fadd nsz <4 x float> %neg0, %a2
|
||||
%neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
ret <4 x float> %neg1
|
||||
}
|
||||
|
||||
|
@ -1643,10 +1643,10 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <4 x double> %a0, %a1
|
||||
%neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
|
||||
%sub = fsub <4 x double> %neg0, %a2
|
||||
%neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
%mul = fmul nsz <4 x double> %a0, %a1
|
||||
%neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
|
||||
%sub = fsub nsz <4 x double> %neg0, %a2
|
||||
%neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
ret <4 x double> %neg1
|
||||
}
|
||||
|
||||
|
|
|
@ -868,10 +868,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
|
|||
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
%tx = fmul <16 x float> %x, %t
|
||||
%ty = fmul <16 x float> %y, %t1
|
||||
%r = fadd <16 x float> %tx, %ty
|
||||
%t1 = fsub nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
|
||||
%tx = fmul nsz <16 x float> %x, %t
|
||||
%ty = fmul nsz <16 x float> %y, %t1
|
||||
%r = fadd nsz <16 x float> %tx, %ty
|
||||
ret <16 x float> %r
|
||||
}
|
||||
|
||||
|
@ -927,10 +927,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
|
|||
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
|
||||
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
|
||||
; AVX512-NOINFS-NEXT: retq
|
||||
%t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
|
||||
%tx = fmul <8 x double> %x, %t
|
||||
%ty = fmul <8 x double> %y, %t1
|
||||
%r = fadd <8 x double> %tx, %ty
|
||||
%t1 = fsub nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
|
||||
%tx = fmul nsz <8 x double> %x, %t
|
||||
%ty = fmul nsz <8 x double> %y, %t1
|
||||
%r = fadd nsz <8 x double> %tx, %ty
|
||||
ret <8 x double> %r
|
||||
}
|
||||
|
||||
|
@ -955,9 +955,9 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1,
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <16 x float> %a0, %a1
|
||||
%add = fadd <16 x float> %mul, %a2
|
||||
%neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
%mul = fmul nsz <16 x float> %a0, %a1
|
||||
%add = fadd nsz <16 x float> %mul, %a2
|
||||
%neg = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
ret <16 x float> %neg
|
||||
}
|
||||
|
||||
|
@ -978,9 +978,9 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <8 x double> %a0, %a1
|
||||
%sub = fsub <8 x double> %mul, %a2
|
||||
%neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
%mul = fmul nsz <8 x double> %a0, %a1
|
||||
%sub = fsub nsz <8 x double> %mul, %a2
|
||||
%neg = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
ret <8 x double> %neg
|
||||
}
|
||||
|
||||
|
@ -1001,10 +1001,10 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1,
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <16 x float> %a0, %a1
|
||||
%neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
|
||||
%add = fadd <16 x float> %neg0, %a2
|
||||
%neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
%mul = fmul nsz <16 x float> %a0, %a1
|
||||
%neg0 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
|
||||
%add = fadd nsz <16 x float> %neg0, %a2
|
||||
%neg1 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
|
||||
ret <16 x float> %neg1
|
||||
}
|
||||
|
||||
|
@ -1025,10 +1025,10 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1,
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
|
||||
; AVX512-NEXT: retq
|
||||
%mul = fmul <8 x double> %a0, %a1
|
||||
%neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
|
||||
%sub = fsub <8 x double> %neg0, %a2
|
||||
%neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
%mul = fmul nsz <8 x double> %a0, %a1
|
||||
%neg0 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
|
||||
%sub = fsub nsz <8 x double> %neg0, %a2
|
||||
%neg1 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
|
||||
ret <8 x double> %neg1
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue