diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 56508fc88408..d8f9c5f7270d 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5220,6 +5220,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) { SelectCode(Res.getNode()); return; } + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 07d774852544..441cad36b3ea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -817,8 +817,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v2f32, Custom); // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend their Legal since they will be someday. + // but its sufficient to pretend they're Legal since they will be someday. setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -1109,8 +1113,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend their Legal since they will be someday. + // but its sufficient to pretend they're Legal since they will be someday. setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); @@ -1375,7 +1383,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, // but its sufficient to pretend their Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll index fea87eb16694..d8de98ca89bf 100644 --- a/llvm/test/CodeGen/X86/vec-strict-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-128.ll @@ -18,22 +18,12 @@ declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { ; SSE-LABEL: f1: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: addsd %xmm1, %xmm2 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: addpd %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: f1: ; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %b, metadata !"round.dynamic", @@ -44,42 +34,12 @@ define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 { define <4 x float> @f2(<4 x float> %a, <4 x float> %b) #0 { ; SSE-LABEL: f2: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] -; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] -; SSE-NEXT: addss %xmm2, %xmm3 -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] -; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] -; SSE-NEXT: addss %xmm2, %xmm4 -; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: addss %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE-NEXT: addss %xmm1, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: addps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: f2: ; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] -; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX-NEXT: vaddss %xmm3, %xmm4, %xmm3 -; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] -; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] -; AVX-NEXT: vaddss %xmm3, %xmm4, %xmm3 -; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] +; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %b, metadata !"round.dynamic", @@ -90,22 +50,12 @@ define <4 x float> @f2(<4 x float> %a, <4 x float> %b) #0 { define <2 x double> @f3(<2 x double> %a, <2 x double> %b) #0 { ; SSE-LABEL: f3: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: subsd %xmm1, %xmm2 -; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: subsd %xmm1, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: subpd %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: f3: ; AVX: # %bb.0: -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double> %a, <2 x double> %b, metadata !"round.dynamic", @@ -116,42 +66,12 @@ define <2 x double> @f3(<2 x double> %a, <2 x double> %b) #0 { define <4 x float> @f4(<4 x float> %a, <4 x float> %b) #0 { ; SSE-LABEL: f4: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] -; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] -; SSE-NEXT: subss %xmm2, %xmm3 -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] -; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] -; SSE-NEXT: subss %xmm2, %xmm4 -; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: subss %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE-NEXT: subss %xmm1, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: subps %xmm1, %xmm0 ; SSE-NEXT: ret{{[l|q]}} ; ; AVX-LABEL: f4: ; AVX: # %bb.0: -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] -; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] -; AVX-NEXT: vsubss %xmm3, %xmm4, %xmm3 -; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] -; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] -; AVX-NEXT: vsubss %xmm3, %xmm4, %xmm3 -; AVX-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] +; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: ret{{[l|q]}} %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %a, <4 x float> %b, metadata !"round.dynamic", diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll index 23971a140c87..5b44c6c77b3d 100644 --- a/llvm/test/CodeGen/X86/vec-strict-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -16,19 +16,7 @@ declare <8 x float> @llvm.experimental.constrained.fdiv.v8f32(<8 x float>, <8 x define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vaddsd %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vaddsd %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm4[0],xmm2[0] -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm3 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double> %a, <4 x double> %b, metadata !"round.dynamic", @@ -39,35 +27,7 @@ define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 { define <8 x float> @f2(<8 x float> %a, <8 x float> %b) #0 { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vaddss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vaddss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0] -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm3 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] -; CHECK-NEXT: vaddss %xmm4, %xmm5, %xmm4 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0] -; CHECK-NEXT: vaddss %xmm4, %xmm5, %xmm4 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <8 x float> @llvm.experimental.constrained.fadd.v8f32(<8 x float> %a, <8 x float> %b, metadata !"round.dynamic", @@ -78,19 +38,7 @@ define <8 x float> @f2(<8 x float> %a, <8 x float> %b) #0 { define <4 x double> @f3(<4 x double> %a, <4 x double> %b) #0 { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vsubsd %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vsubsd %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm4[0],xmm2[0] -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm3 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-NEXT: vsubpd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double> %a, <4 x double> %b, metadata !"round.dynamic", @@ -101,35 +49,7 @@ define <4 x double> @f3(<4 x double> %a, <4 x double> %b) #0 { define <8 x float> @f4(<8 x float> %a, <8 x float> %b) #0 { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vsubss %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vsubss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vsubss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vsubss %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0] -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm3 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] -; CHECK-NEXT: vsubss %xmm4, %xmm5, %xmm4 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0] -; CHECK-NEXT: vsubss %xmm4, %xmm5, %xmm4 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <8 x float> @llvm.experimental.constrained.fsub.v8f32(<8 x float> %a, <8 x float> %b, metadata !"round.dynamic", diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll index d4d9538f8705..aa23735cbe2a 100644 --- a/llvm/test/CodeGen/X86/vec-strict-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -14,35 +14,7 @@ declare <16 x float> @llvm.experimental.constrained.fdiv.v16f32(<16 x float>, <1 define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm2 -; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm3 -; CHECK-NEXT: vaddsd %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vaddsd %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm4[0],xmm2[0] -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm3 -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm4 -; CHECK-NEXT: vaddsd %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; CHECK-NEXT: vaddsd %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm5[0],xmm3[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm4 -; CHECK-NEXT: vaddsd %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; CHECK-NEXT: vaddsd %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm5[0],xmm3[0] -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm4[0],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <8 x double> @llvm.experimental.constrained.fadd.v8f64(<8 x double> %a, <8 x double> %b, metadata !"round.dynamic", @@ -53,67 +25,7 @@ define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 { define <16 x float> @f2(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm2 -; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm3 -; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vaddss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vaddss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vaddss %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0] -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm3 -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm4 -; CHECK-NEXT: vaddss %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm7 = xmm4[1,1,3,3] -; CHECK-NEXT: vaddss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; CHECK-NEXT: vaddss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm6[0],xmm5[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] -; CHECK-NEXT: vaddss %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0,1,2],xmm3[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm4 -; CHECK-NEXT: vaddss %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm7 = xmm4[1,1,3,3] -; CHECK-NEXT: vaddss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; CHECK-NEXT: vaddss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm6[0],xmm5[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] -; CHECK-NEXT: vaddss %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0,1,2],xmm3[0] -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] -; CHECK-NEXT: vaddss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] -; CHECK-NEXT: vaddss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm4[0,1,2],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %a, <16 x float> %b, metadata !"round.dynamic", @@ -124,35 +36,7 @@ define <16 x float> @f2(<16 x float> %a, <16 x float> %b) #0 { define <8 x double> @f3(<8 x double> %a, <8 x double> %b) #0 { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm2 -; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm3 -; CHECK-NEXT: vsubsd %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vsubsd %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm4[0],xmm2[0] -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm3 -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm4 -; CHECK-NEXT: vsubsd %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; CHECK-NEXT: vsubsd %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm5[0],xmm3[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm4 -; CHECK-NEXT: vsubsd %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0] -; CHECK-NEXT: vsubsd %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm5[0],xmm3[0] -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm4[0],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; CHECK-NEXT: vsubpd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <8 x double> @llvm.experimental.constrained.fsub.v8f64(<8 x double> %a, <8 x double> %b, metadata !"round.dynamic", @@ -163,67 +47,7 @@ define <8 x double> @f3(<8 x double> %a, <8 x double> %b) #0 { define <16 x float> @f4(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: -; CHECK-NEXT: vextractf32x4 $3, %zmm1, %xmm2 -; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm3 -; CHECK-NEXT: vsubss %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vsubss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm2[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vsubss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vsubss %xmm2, %xmm3, %xmm2 -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0] -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm3 -; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm4 -; CHECK-NEXT: vsubss %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm7 = xmm4[1,1,3,3] -; CHECK-NEXT: vsubss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; CHECK-NEXT: vsubss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm6[0],xmm5[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] -; CHECK-NEXT: vsubss %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0,1,2],xmm3[0] -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm4 -; CHECK-NEXT: vsubss %xmm3, %xmm4, %xmm5 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm7 = xmm4[1,1,3,3] -; CHECK-NEXT: vsubss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm3[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0] -; CHECK-NEXT: vsubss %xmm6, %xmm7, %xmm6 -; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm6[0],xmm5[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3] -; CHECK-NEXT: vsubss %xmm3, %xmm4, %xmm3 -; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0,1,2],xmm3[0] -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm4 -; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] -; CHECK-NEXT: vmovshdup {{.*#+}} xmm6 = xmm0[1,1,3,3] -; CHECK-NEXT: vsubss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm5 = xmm1[1,0] -; CHECK-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0] -; CHECK-NEXT: vsubss %xmm5, %xmm6, %xmm5 -; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm4[0,1,2],xmm0[0] -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %ret = call <16 x float> @llvm.experimental.constrained.fsub.v16f32(<16 x float> %a, <16 x float> %b, metadata !"round.dynamic", diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 9f8f066ec9c0..8905eecf6ca3 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -559,19 +559,14 @@ entry: define <2 x double> @constrained_vector_fadd_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm0, %xmm1 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( @@ -618,24 +613,22 @@ entry: define <3 x double> @constrained_vector_fadd_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm2, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v3f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; AVX-NEXT: vaddpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -651,29 +644,23 @@ entry: define <4 x double> @constrained_vector_fadd_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fadd_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: addsd %xmm1, %xmm2 -; CHECK-NEXT: addsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0000000000000001E-1] +; CHECK-NEXT: addpd %xmm1, %xmm0 +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm1 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fadd_v4f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fadd_v4f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] +; AVX1-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fadd_v4f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] +; AVX512-NEXT: vaddpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq entry: %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( <4 x double> @constrained_vector_fsub_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v2f64: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq entry: %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( @@ -771,12 +753,12 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd %xmm0, %xmm2 -; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movapd %xmm0, %xmm1 +; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq ; @@ -785,9 +767,8 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 { ; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm2 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX-NEXT: vsubpd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX-NEXT: retq entry: @@ -803,29 +784,23 @@ entry: define <4 x double> @constrained_vector_fsub_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fsub_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 +; CHECK-NEXT: movapd {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] ; CHECK-NEXT: movapd %xmm1, %xmm0 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movapd %xmm1, %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm2 -; CHECK-NEXT: subsd {{.*}}(%rip), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm1 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_fsub_v4f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm1 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_fsub_v4f64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX1-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: constrained_vector_fsub_v4f64: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] +; AVX512-NEXT: vsubpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq entry: %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64( <4 x double>