diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 702e163b46ac..b6d134ff0fb5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7779,16 +7779,18 @@ static SDValue lowerVectorShuffleAsElementInsertion( MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + MVT ExtVT = VT; + MVT EltVT = VT.getVectorElementType(); int V2Index = std::find_if(Mask.begin(), Mask.end(), [&Mask](int M) { return M >= (int)Mask.size(); }) - Mask.begin(); + bool IsV1Zeroable = true; for (int i = 0, Size = Mask.size(); i < Size; ++i) - if (i != V2Index && !Zeroable[i]) - return SDValue(); // Not inserting into a zero vector. - - MVT ExtVT = VT; - MVT EltVT = VT.getVectorElementType(); + if (i != V2Index && !Zeroable[i]) { + IsV1Zeroable = false; + break; + } // Check for a single input from a SCALAR_TO_VECTOR node. // FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and @@ -7800,6 +7802,11 @@ static SDValue lowerVectorShuffleAsElementInsertion( // We need to zext the scalar if it is smaller than an i32. V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { + // Using zext to expand a narrow element won't work for non-zero + // insertions. + if (!IsV1Zeroable) + return SDValue(); + // Zero-extend directly to i32. ExtVT = MVT::v4i32; V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); @@ -7812,6 +7819,25 @@ static SDValue lowerVectorShuffleAsElementInsertion( return SDValue(); } + if (!IsV1Zeroable) { + // If V1 can't be treated as a zero vector we have fewer options to lower + // this. We can't support integer vectors or non-zero targets cheaply, and + // the V1 elements can't be permuted in any way. + assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); + if (!VT.isFloatingPoint() || V2Index != 0) + return SDValue(); + SmallVector V1Mask(Mask.begin(), Mask.end()); + V1Mask[V2Index] = -1; + if (!isNoopShuffleMask(V1Mask)) + return SDValue(); + + // Otherwise, use MOVSD or MOVSS. + assert((EltVT == MVT::f32 || EltVT == MVT::f64) && + "Only two types of floating point element types to handle!"); + return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL, + ExtVT, V1, V2); + } + V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); if (ExtVT != VT) V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f833d043027b..bd00bdd02b5d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1269,6 +1269,9 @@ let Predicates = [HasAVX] in { (VMOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), (VMOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, + (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (VMOVLPDrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), @@ -1316,6 +1319,9 @@ let Predicates = [UseSSE2] in { (MOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, + (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVLPDrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index d73736024318..e8613be02724 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -336,13 +336,14 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20( ; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7] -; SSE2-NEXT: shufpd {{.*#+}} xmm4 = xmm4[0],xmm3[1] +; SSE2-NEXT: movsd %xmm4, %xmm3 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; SSE2-NEXT: packuswb %xmm4, %xmm0 +; SSE2-NEXT: movsd %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index e15773b067a9..aa837f15e579 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -211,61 +211,28 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { -; SSE2-LABEL: shuffle_v2f64_03: -; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; SSE2-NEXT: retq -; -; SSE3-LABEL: shuffle_v2f64_03: -; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; SSE3-NEXT: retq -; -; SSSE3-LABEL: shuffle_v2f64_03: -; SSSE3: # BB#0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; SSSE3-NEXT: retq -; -; SSE41-LABEL: shuffle_v2f64_03: -; SSE41: # BB#0: -; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; SSE41-NEXT: retq +; SSE-LABEL: shuffle_v2f64_03: +; SSE: # BB#0: +; SSE-NEXT: movsd %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_03: ; AVX: # BB#0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle } define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { -; SSE2-LABEL: shuffle_v2f64_21: -; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: shuffle_v2f64_21: -; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE3-NEXT: movapd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: shuffle_v2f64_21: -; SSSE3: # BB#0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSSE3-NEXT: movapd %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: shuffle_v2f64_21: -; SSE41: # BB#0: -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE41-NEXT: movapd %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: shuffle_v2f64_21: +; SSE: # BB#0: +; SSE-NEXT: movsd %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_21: ; AVX: # BB#0: -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %shuffle @@ -302,17 +269,20 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: movsd %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: movsd %xmm0, %xmm1 +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03: ; SSSE3: # BB#0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: movsd %xmm0, %xmm1 +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03: @@ -335,20 +305,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1] -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movsd %xmm1, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1] -; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: movsd %xmm1, %xmm2 +; SSE3-NEXT: movaps %xmm2, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1] -; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: movsd %xmm1, %xmm2 +; SSSE3-NEXT: movaps %xmm2, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03_copy: @@ -489,20 +459,17 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: movsd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: movsd %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21: ; SSSE3: # BB#0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: movsd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21: @@ -526,20 +493,20 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_21_copy: ; SSE2: # BB#0: -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movsd %xmm2, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_21_copy: ; SSE3: # BB#0: -; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; SSE3-NEXT: movapd %xmm2, %xmm0 +; SSE3-NEXT: movsd %xmm2, %xmm1 +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_21_copy: ; SSSE3: # BB#0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; SSSE3-NEXT: movapd %xmm2, %xmm0 +; SSSE3-NEXT: movsd %xmm2, %xmm1 +; SSSE3-NEXT: movaps %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_21_copy: @@ -700,23 +667,20 @@ define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) { define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) { ; SSE2-LABEL: shuffle_v2i64_z1: ; SSE2: # BB#0: -; SSE2-NEXT: xorpd %xmm1, %xmm1 -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: movsd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_z1: ; SSE3: # BB#0: -; SSE3-NEXT: xorpd %xmm1, %xmm1 -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: movsd %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_z1: ; SSSE3: # BB#0: -; SSSE3-NEXT: xorpd %xmm1, %xmm1 -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSSE3-NEXT: movapd %xmm1, %xmm0 +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: movsd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_z1: @@ -789,38 +753,16 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { } define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { -; SSE2-LABEL: shuffle_v2f64_z1: -; SSE2: # BB#0: -; SSE2-NEXT: xorpd %xmm1, %xmm1 -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE3-LABEL: shuffle_v2f64_z1: -; SSE3: # BB#0: -; SSE3-NEXT: xorpd %xmm1, %xmm1 -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE3-NEXT: movapd %xmm1, %xmm0 -; SSE3-NEXT: retq -; -; SSSE3-LABEL: shuffle_v2f64_z1: -; SSSE3: # BB#0: -; SSSE3-NEXT: xorpd %xmm1, %xmm1 -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSSE3-NEXT: movapd %xmm1, %xmm0 -; SSSE3-NEXT: retq -; -; SSE41-LABEL: shuffle_v2f64_z1: -; SSE41: # BB#0: -; SSE41-NEXT: xorpd %xmm1, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; SSE41-NEXT: movapd %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: shuffle_v2f64_z1: +; SSE: # BB#0: +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: movsd %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_z1: ; AVX: # BB#0: -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 10a27f44320a..595447775b58 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -55,7 +55,7 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_0300: @@ -382,7 +382,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_0300: @@ -518,7 +518,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] +; AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_4012: @@ -654,7 +654,7 @@ define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] -; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] +; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: stress_test1: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 2f02f2fc08f4..662b98326113 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -91,7 +91,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) { ; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] ; ALL-NEXT: vbroadcastsd %xmm0, %ymm0 -; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3] +; ALL-NEXT: vmovsd %xmm1, %xmm0, %xmm1 ; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> @@ -275,12 +275,12 @@ define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_08991abb: ; ALL: # BB#0: -; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm0[1,0,2,2] -; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm1[0,2,3,3] -; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3] -; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,1] -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1] +; ALL-NEXT: vmovsd %xmm0, %xmm2, %xmm2 +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] +; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,3,3] +; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> ret <8 x double> %shuffle @@ -303,11 +303,11 @@ define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) { ; ALL-LABEL: shuffle_v8f64_09ab1def: ; ALL: # BB#0: -; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2 -; ALL-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2] -; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3] -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] -; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 +; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm2 +; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1 +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] +; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 +; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> ret <8 x double> %shuffle @@ -721,7 +721,7 @@ define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) { ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3] ; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1 ; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3] -; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] +; ALL-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32>