diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 90ed8c920565..70203dacef09 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36922,6 +36922,15 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, return SDValue(); switch (SrcOpc0) { + case X86ISD::MOVDDUP: { + SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0)); + SDValue RHS = + DAG.getBitcast(VT, Src1.isUndef() ? Src1 : Src1.getOperand(0)); + SDValue Res = + DAG.getNode(X86ISD::VPERM2X128, DL, VT, LHS, RHS, V.getOperand(2)); + Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res)); + return DAG.getBitcast(VT, Res); + } case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: diff --git a/llvm/test/CodeGen/X86/extract-concat.ll b/llvm/test/CodeGen/X86/extract-concat.ll index 49ac851d88fc..f979f23f82f8 100644 --- a/llvm/test/CodeGen/X86/extract-concat.ll +++ b/llvm/test/CodeGen/X86/extract-concat.ll @@ -68,13 +68,12 @@ define <16 x i64> @catcat(<4 x i64> %x) { ; ; AVX1-LABEL: catcat: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3] ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm4 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vmovddup {{.*#+}} ymm2 = ymm0[0,0,2,2] ; AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,1,3,3] ; AVX1-NEXT: vmovaps %ymm4, %ymm0 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 38600884262c..80acaef8a0a0 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -109,8 +109,8 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_2200: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_2200: @@ -129,8 +129,8 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_2222: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_2222: @@ -149,8 +149,8 @@ define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4f64_2222_bc: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4f64_2222_bc: @@ -856,8 +856,8 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_2200: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_2200: