[X86] Don't fold (fneg (fma (fneg X), Y, (fneg Z))) to (fma X, Y, Z)

Check if it has no signed zeros flag (nsz) in getNegatedExpression for x86.
This patch fixed miscompilation: https://alive2.llvm.org/ce/z/XxwBAJ

Reviewed By: RKSimon, spatel

Differential Revision: https://reviews.llvm.org/D90901
This commit is contained in:
Jim Lin 2021-05-21 22:50:44 +08:00
parent 35e5c3310f
commit 4456805938
6 changed files with 145 additions and 94 deletions

View File

@ -47045,6 +47045,7 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
EVT SVT = VT.getScalarType(); EVT SVT = VT.getScalarType();
unsigned Opc = Op.getOpcode(); unsigned Opc = Op.getOpcode();
SDNodeFlags Flags = Op.getNode()->getFlags();
switch (Opc) { switch (Opc) {
case ISD::FMA: case ISD::FMA:
case X86ISD::FMSUB: case X86ISD::FMSUB:
@ -47059,6 +47060,11 @@ SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
!isOperationLegal(ISD::FMA, VT)) !isOperationLegal(ISD::FMA, VT))
break; break;
// Don't fold (fneg (fma (fneg x), y, (fneg z))) to (fma x, y, z)
// if it may have signed zeros.
if (!Flags.hasNoSignedZeros())
break;
// This is always negatible for free but we might be able to remove some // This is always negatible for free but we might be able to remove some
// extra operand negations as well. // extra operand negations as well.
SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue()); SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());

View File

@ -20,7 +20,7 @@ define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X64-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 ; X64-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
; X64-NEXT: retq ; X64-NEXT: retq
%sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c
%r = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 %r = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
ret <8 x float> %r ret <8 x float> %r
} }
@ -34,7 +34,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; X64-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
; X64-NEXT: retq ; X64-NEXT: retq
%t0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 %t0 = tail call nsz <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
%sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %t0 %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %t0
ret <4 x float> %sub.i ret <4 x float> %sub.i
} }
@ -57,7 +57,7 @@ define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
%b0 = extractelement <4 x float> %b, i64 0 %b0 = extractelement <4 x float> %b, i64 0
%c0 = extractelement <4 x float> %c, i64 0 %c0 = extractelement <4 x float> %c, i64 0
%negb0 = fneg float %b0 %negb0 = fneg float %b0
%t0 = tail call float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2 %t0 = tail call nsz float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2
%i = insertelement <4 x float> %a, float %t0, i64 0 %i = insertelement <4 x float> %a, float %t0, i64 0
%sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %i %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %i
ret <4 x float> %sub.i ret <4 x float> %sub.i
@ -74,7 +74,7 @@ define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
; X64-NEXT: retq ; X64-NEXT: retq
%negc = fneg <8 x float> %c %negc = fneg <8 x float> %c
%t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2 %t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2
%sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t0 %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t0
ret <8 x float> %sub.i ret <8 x float> %sub.i
} }
@ -91,7 +91,7 @@ define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; X64-NEXT: retq ; X64-NEXT: retq
%sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c %sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c
%negsubc = fneg <8 x float> %sub.c %negsubc = fneg <8 x float> %sub.c
%t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2 %t0 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2
ret <8 x float> %t0 ret <8 x float> %t0
} }
@ -105,7 +105,7 @@ define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; X64: # %bb.0: ; X64: # %bb.0:
; X64-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; X64-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
; X64-NEXT: retq ; X64-NEXT: retq
%t0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 %t0 = tail call nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
%sub.i = fsub <2 x double> <double -0.0, double -0.0>, %t0 %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %t0
ret <2 x double> %sub.i ret <2 x double> %sub.i
} }
@ -125,7 +125,7 @@ define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c) {
%t0 = insertelement <8 x float> undef, float %a, i32 0 %t0 = insertelement <8 x float> undef, float %a, i32 0
%t1 = fsub <8 x float> <float -0.0, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %t0 %t1 = fsub <8 x float> <float -0.0, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %t0
%t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer
%t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) %t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
ret <8 x float> %t3 ret <8 x float> %t3
} }
@ -145,6 +145,6 @@ define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c) {
%t0 = fsub float -0.0, %a %t0 = fsub float -0.0, %a
%t1 = insertelement <8 x float> undef, float %t0, i32 0 %t1 = insertelement <8 x float> undef, float %t0, i32 0
%t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer
%t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) %t3 = tail call nsz <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c)
ret <8 x float> %t3 ret <8 x float> %t3
} }

View File

@ -29,10 +29,17 @@ define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
} }
define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) { define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test2: ; SKX-LABEL: test2:
; CHECK: # %bb.0: ; SKX: # %bb.0:
; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; SKX-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; CHECK-NEXT: retq ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
;
; KNL-LABEL: test2:
; KNL: # %bb.0:
; KNL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
%fma = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) %fma = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
%neg = fneg <16 x float> %fma %neg = fneg <16 x float> %fma
ret <16 x float> %neg ret <16 x float> %neg
@ -49,10 +56,17 @@ define <16 x float> @test2_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
} }
define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) { define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test3: ; SKX-LABEL: test3:
; CHECK: # %bb.0: ; SKX: # %bb.0:
; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; CHECK-NEXT: retq ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
;
; KNL-LABEL: test3:
; KNL: # %bb.0:
; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
%t0 = fneg <16 x float> %b %t0 = fneg <16 x float> %b
%t1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c) %t1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %c)
%sub.i = fneg <16 x float> %t1 %sub.i = fneg <16 x float> %t1
@ -71,10 +85,17 @@ define <16 x float> @test3_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
} }
define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) { define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test4: ; SKX-LABEL: test4:
; CHECK: # %bb.0: ; SKX: # %bb.0:
; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; SKX-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; CHECK-NEXT: retq ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
;
; KNL-LABEL: test4:
; KNL: # %bb.0:
; KNL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
%t0 = fneg <16 x float> %b %t0 = fneg <16 x float> %b
%t1 = fneg <16 x float> %c %t1 = fneg <16 x float> %c
%t2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1) %t2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %t0, <16 x float> %t1)
@ -106,10 +127,17 @@ entry:
} }
define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) { define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
; CHECK-LABEL: test6: ; SKX-LABEL: test6:
; CHECK: # %bb.0: ; SKX: # %bb.0:
; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ; SKX-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
; CHECK-NEXT: retq ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; SKX-NEXT: retq
;
; KNL-LABEL: test6:
; KNL: # %bb.0:
; KNL-NEXT: vfnmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
; KNL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-NEXT: retq
%t0 = fneg <16 x float> %b %t0 = fneg <16 x float> %b
%t1 = fneg <16 x float> %c %t1 = fneg <16 x float> %c
%t2 = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10) %t2 = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %t0, <16 x float> %t1, i32 10)
@ -130,10 +158,18 @@ define <16 x float> @test6_nsz(<16 x float> %a, <16 x float> %b, <16 x float> %c
} }
define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) { define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: test7: ; SKX-LABEL: test7:
; CHECK: # %bb.0: ; SKX: # %bb.0:
; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
; CHECK-NEXT: retq ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
; SKX-NEXT: retq
;
; KNL-LABEL: test7:
; KNL: # %bb.0:
; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; KNL-NEXT: vxorps %ymm1, %ymm0, %ymm0
; KNL-NEXT: retq
%t0 = fneg <8 x float> %c %t0 = fneg <8 x float> %c
%t1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0) %t1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %t0)
%sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t1 %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t1
@ -163,10 +199,17 @@ entry:
} }
define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) { define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
; CHECK-LABEL: test9: ; SKX-LABEL: test9:
; CHECK: # %bb.0: ; SKX: # %bb.0:
; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; SKX-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; CHECK-NEXT: retq ; SKX-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: retq
;
; KNL-LABEL: test9:
; KNL: # %bb.0:
; KNL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; KNL-NEXT: vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; KNL-NEXT: retq
%t0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) %t0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4)
%sub.i = fneg <8 x double> %t0 %sub.i = fneg <8 x double> %t0
ret <8 x double> %sub.i ret <8 x double> %sub.i

View File

@ -9,7 +9,8 @@ declare float @llvm.fma.f32(float, float, float)
define float @fneg_fma32(float %x, float %y, float %z) { define float @fneg_fma32(float %x, float %y, float %z) {
; CHECK-LABEL: fneg_fma32: ; CHECK-LABEL: fneg_fma32:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
; CHECK-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%negx = fneg float %x %negx = fneg float %x
%negz = fneg float %z %negz = fneg float %z
@ -35,7 +36,8 @@ declare double @llvm.fma.f64(double, double, double)
define double @fneg_fma64(double %x, double %y, double %z) { define double @fneg_fma64(double %x, double %y, double %z) {
; CHECK-LABEL: fneg_fma64: ; CHECK-LABEL: fneg_fma64:
; CHECK: # %bb.0: ; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 ; CHECK-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%negx = fneg double %x %negx = fneg double %x
%negz = fneg double %z %negz = fneg double %z

View File

@ -1308,10 +1308,10 @@ define float @test_f32_interp(float %x, float %y, float %t) {
; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub float 1.0, %t %t1 = fsub nsz float 1.0, %t
%tx = fmul float %x, %t %tx = fmul nsz float %x, %t
%ty = fmul float %y, %t1 %ty = fmul nsz float %y, %t1
%r = fadd float %tx, %ty %r = fadd nsz float %tx, %ty
ret float %r ret float %r
} }
@ -1357,10 +1357,10 @@ define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t %t1 = fsub nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
%tx = fmul <4 x float> %x, %t %tx = fmul nsz <4 x float> %x, %t
%ty = fmul <4 x float> %y, %t1 %ty = fmul nsz <4 x float> %y, %t1
%r = fadd <4 x float> %tx, %ty %r = fadd nsz <4 x float> %tx, %ty
ret <4 x float> %r ret <4 x float> %r
} }
@ -1406,10 +1406,10 @@ define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t %t1 = fsub nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
%tx = fmul <8 x float> %x, %t %tx = fmul nsz <8 x float> %x, %t
%ty = fmul <8 x float> %y, %t1 %ty = fmul nsz <8 x float> %y, %t1
%r = fadd <8 x float> %tx, %ty %r = fadd nsz <8 x float> %tx, %ty
ret <8 x float> %r ret <8 x float> %r
} }
@ -1455,10 +1455,10 @@ define double @test_f64_interp(double %x, double %y, double %t) {
; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub double 1.0, %t %t1 = fsub nsz double 1.0, %t
%tx = fmul double %x, %t %tx = fmul nsz double %x, %t
%ty = fmul double %y, %t1 %ty = fmul nsz double %y, %t1
%r = fadd double %tx, %ty %r = fadd nsz double %tx, %ty
ret double %r ret double %r
} }
@ -1504,10 +1504,10 @@ define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x do
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub <2 x double> <double 1.0, double 1.0>, %t %t1 = fsub nsz <2 x double> <double 1.0, double 1.0>, %t
%tx = fmul <2 x double> %x, %t %tx = fmul nsz <2 x double> %x, %t
%ty = fmul <2 x double> %y, %t1 %ty = fmul nsz <2 x double> %y, %t1
%r = fadd <2 x double> %tx, %ty %r = fadd nsz <2 x double> %tx, %ty
ret <2 x double> %r ret <2 x double> %r
} }
@ -1553,10 +1553,10 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t %t1 = fsub nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
%tx = fmul <4 x double> %x, %t %tx = fmul nsz <4 x double> %x, %t
%ty = fmul <4 x double> %y, %t1 %ty = fmul nsz <4 x double> %y, %t1
%r = fadd <4 x double> %tx, %ty %r = fadd nsz <4 x double> %tx, %ty
ret <4 x double> %r ret <4 x double> %r
} }
@ -1579,9 +1579,9 @@ define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <4 x float> %a0, %a1 %mul = fmul nsz <4 x float> %a0, %a1
%add = fadd <4 x float> %mul, %a2 %add = fadd nsz <4 x float> %mul, %a2
%neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add %neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
ret <4 x float> %neg ret <4 x float> %neg
} }
@ -1600,9 +1600,9 @@ define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <4 x double> %a0, %a1 %mul = fmul nsz <4 x double> %a0, %a1
%sub = fsub <4 x double> %mul, %a2 %sub = fsub nsz <4 x double> %mul, %a2
%neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub %neg = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
ret <4 x double> %neg ret <4 x double> %neg
} }
@ -1621,10 +1621,10 @@ define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <4 x float> %a0, %a1 %mul = fmul nsz <4 x float> %a0, %a1
%neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul %neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
%add = fadd <4 x float> %neg0, %a2 %add = fadd nsz <4 x float> %neg0, %a2
%neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add %neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
ret <4 x float> %neg1 ret <4 x float> %neg1
} }
@ -1643,10 +1643,10 @@ define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1,
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <4 x double> %a0, %a1 %mul = fmul nsz <4 x double> %a0, %a1
%neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul %neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
%sub = fsub <4 x double> %neg0, %a2 %sub = fsub nsz <4 x double> %neg0, %a2
%neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub %neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
ret <4 x double> %neg1 ret <4 x double> %neg1
} }

View File

@ -868,10 +868,10 @@ define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t %t1 = fsub nsz <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
%tx = fmul <16 x float> %x, %t %tx = fmul nsz <16 x float> %x, %t
%ty = fmul <16 x float> %y, %t1 %ty = fmul nsz <16 x float> %y, %t1
%r = fadd <16 x float> %tx, %ty %r = fadd nsz <16 x float> %tx, %ty
ret <16 x float> %r ret <16 x float> %r
} }
@ -927,10 +927,10 @@ define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x do
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm1 = (zmm2 * zmm1) - zmm1
; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm2 * zmm0) - zmm1
; AVX512-NOINFS-NEXT: retq ; AVX512-NOINFS-NEXT: retq
%t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t %t1 = fsub nsz <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
%tx = fmul <8 x double> %x, %t %tx = fmul nsz <8 x double> %x, %t
%ty = fmul <8 x double> %y, %t1 %ty = fmul nsz <8 x double> %y, %t1
%r = fadd <8 x double> %tx, %ty %r = fadd nsz <8 x double> %tx, %ty
ret <8 x double> %r ret <8 x double> %r
} }
@ -955,9 +955,9 @@ define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1,
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <16 x float> %a0, %a1 %mul = fmul nsz <16 x float> %a0, %a1
%add = fadd <16 x float> %mul, %a2 %add = fadd nsz <16 x float> %mul, %a2
%neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add %neg = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
ret <16 x float> %neg ret <16 x float> %neg
} }
@ -978,9 +978,9 @@ define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <8 x double> %a0, %a1 %mul = fmul nsz <8 x double> %a0, %a1
%sub = fsub <8 x double> %mul, %a2 %sub = fsub nsz <8 x double> %mul, %a2
%neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub %neg = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
ret <8 x double> %neg ret <8 x double> %neg
} }
@ -1001,10 +1001,10 @@ define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1,
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2 ; AVX512-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <16 x float> %a0, %a1 %mul = fmul nsz <16 x float> %a0, %a1
%neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul %neg0 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
%add = fadd <16 x float> %neg0, %a2 %add = fadd nsz <16 x float> %neg0, %a2
%neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add %neg1 = fsub nsz <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
ret <16 x float> %neg1 ret <16 x float> %neg1
} }
@ -1025,10 +1025,10 @@ define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1,
; AVX512: # %bb.0: ; AVX512: # %bb.0:
; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; AVX512-NEXT: retq ; AVX512-NEXT: retq
%mul = fmul <8 x double> %a0, %a1 %mul = fmul nsz <8 x double> %a0, %a1
%neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul %neg0 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
%sub = fsub <8 x double> %neg0, %a2 %sub = fsub nsz <8 x double> %neg0, %a2
%neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub %neg1 = fsub nsz <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
ret <8 x double> %neg1 ret <8 x double> %neg1
} }