diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index c982774d6ea5..502503a92643 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -9335,6 +9335,14 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr }, + { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm }, + { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr }, + { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr }, + { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr }, // AVX 128-bit support { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, @@ -9355,6 +9363,14 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, + { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr }, + { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr }, + { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr }, + { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr }, // AVX 256-bit support { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, @@ -9413,6 +9429,30 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri }, { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi }, { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri }, + { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm }, + { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr }, + { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm }, + { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr }, + { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm }, + { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr }, + { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm }, + { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr }, + { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm }, + { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr }, + { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm }, + { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr }, + { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm }, + { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr }, + { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm }, + { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr }, + { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm }, + { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr }, + { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm }, + { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr }, + { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm }, + { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr }, + { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm }, + { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr }, }; static const uint16_t ReplaceableInstrsAVX2[][3] = { @@ -9440,6 +9480,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi }, { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi }, { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri }, + { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm }, + { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr }, + { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm }, + { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr }, + { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm }, + { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr }, + { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm }, + { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr }, }; static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { diff --git a/llvm/test/CodeGen/X86/avx-shuffle-x86_32.ll b/llvm/test/CodeGen/X86/avx-shuffle-x86_32.ll index 6defe7efb941..c95ac5694b1b 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle-x86_32.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle-x86_32.ll @@ -6,7 +6,7 @@ define <4 x i64> @test1(<4 x i64> %a) nounwind { ; CHECK-LABEL: test1: ; CHECK: # BB#0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; CHECK-NEXT: retl %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> ret <4 x i64>%b diff --git a/llvm/test/CodeGen/X86/avx-unpack.ll b/llvm/test/CodeGen/X86/avx-unpack.ll index 7826bc97eec5..88d1de4f00df 100644 --- a/llvm/test/CodeGen/X86/avx-unpack.ll +++ b/llvm/test/CodeGen/X86/avx-unpack.ll @@ -120,7 +120,7 @@ define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp { ; CHECK-LABEL: unpackhipd2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq %a = load <4 x i64>, <4 x i64>* %src1 @@ -162,7 +162,7 @@ define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp { ; CHECK-LABEL: unpacklopd2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq %a = load <4 x i64>, <4 x i64>* %src1 diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index 22be3c21f40f..8cea36f06320 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -3314,12 +3314,12 @@ define <4 x i64> @test_mm256_unpackhi_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwi define <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_unpackhi_epi32: ; X32: # BB#0: -; X32-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; X32-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_unpackhi_epi32: ; X64: # BB#0: -; X64-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; X64-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast <4 x i64> %a1 to <8 x i32> @@ -3331,12 +3331,12 @@ define <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwi define <4 x i64> @test_mm256_unpackhi_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_unpackhi_epi64: ; X32: # BB#0: -; X32-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; X32-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_unpackhi_epi64: ; X64: # BB#0: -; X64-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; X64-NEXT: retq %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> ret <4 x i64> %res @@ -3379,12 +3379,12 @@ define <4 x i64> @test_mm256_unpacklo_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwi define <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_unpacklo_epi32: ; X32: # BB#0: -; X32-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; X32-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_unpacklo_epi32: ; X64: # BB#0: -; X64-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; X64-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; X64-NEXT: retq %arg0 = bitcast <4 x i64> %a0 to <8 x i32> %arg1 = bitcast <4 x i64> %a1 to <8 x i32> @@ -3396,12 +3396,12 @@ define <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwi define <4 x i64> @test_mm256_unpacklo_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_unpacklo_epi64: ; X32: # BB#0: -; X32-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_unpacklo_epi64: ; X64: # BB#0: -; X64-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; X64-NEXT: retq %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> ret <4 x i64> %res diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 6afe0a1769d9..8fc73b647ebd 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -697,12 +697,12 @@ define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) { ; X32-LABEL: test_mm512_unpackhi_epi32: ; X32: # BB#0: -; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_unpackhi_epi32: ; X64: # BB#0: -; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; X64-NEXT: retq %arg0 = bitcast <8 x i64> %a0 to <16 x i32> %arg1 = bitcast <8 x i64> %a1 to <16 x i32> @@ -759,12 +759,12 @@ define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) { ; X32-LABEL: test_mm512_unpackhi_epi64: ; X32: # BB#0: -; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_unpackhi_epi64: ; X64: # BB#0: -; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; X64-NEXT: retq %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> ret <8 x i64> %res @@ -915,12 +915,12 @@ define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) { ; X32-LABEL: test_mm512_unpacklo_epi32: ; X32: # BB#0: -; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_unpacklo_epi32: ; X64: # BB#0: -; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; X64-NEXT: retq %arg0 = bitcast <8 x i64> %a0 to <16 x i32> %arg1 = bitcast <8 x i64> %a1 to <16 x i32> @@ -977,12 +977,12 @@ define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) { ; X32-LABEL: test_mm512_unpacklo_epi64: ; X32: # BB#0: -; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_unpacklo_epi64: ; X64: # BB#0: -; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X64-NEXT: retq %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> ret <8 x i64> %res diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 42f66da3b986..3c078ca65892 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1905,8 +1905,8 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp) define <2 x i64> @test_4xi64_to_2xi64_perm_mask0(<4 x i64> %vec) { ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32> @@ -1972,9 +1972,9 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mask1(<4 x i64> %vec) { define <2 x i64> @test_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp) { ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -2311,9 +2311,9 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec) { define <2 x i64> @test_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec) { ; CHECK-LABEL: test_8xi64_to_2xi64_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 +; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> @@ -2681,10 +2681,10 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask7(<8 x i64>* %vp) { define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) { ; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 +; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll index a687d30d8734..b92161d83f36 100644 --- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -853,9 +853,9 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { ; SSE-NEXT: andb $15, %r11b ; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: popq %rbx ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq @@ -1031,9 +1031,9 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; SSE-NEXT: andb $15, %r11b ; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: popq %rbx ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll index 43e09bfe5fea..c75fde453805 100644 --- a/llvm/test/CodeGen/X86/combine-fcopysign.ll +++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll @@ -258,7 +258,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: andps %xmm8, %xmm0 ; SSE-NEXT: orps %xmm0, %xmm1 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_fcopysign_fpext_sgn: diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index d7f52d247988..6bf22991afe7 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -182,7 +182,7 @@ define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test14: ; CHECK: # BB#0: -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32> @@ -207,8 +207,8 @@ define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test16: ; CHECK: # BB#0: -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32> diff --git a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll index d60fb734e685..5169e2039b01 100644 --- a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll +++ b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll @@ -9,7 +9,7 @@ define void @test(<2 x double>* %dst, <4 x double> %src) nounwind { ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: movapd %xmm0, (%eax) +; CHECK-NEXT: movaps %xmm0, (%eax) ; CHECK-NEXT: retl entry: %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll index 091d1a22dbcd..8a9bf6458c30 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/CodeGen/X86/haddsub-undef.ll @@ -172,7 +172,7 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test8_undef: diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 376d18fbb4ee..65318b42f76c 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -902,16 +902,16 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) { define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinline ssp { ; SSE-LABEL: merge_2i64_i64_12_volatile: ; SSE: # BB#0: -; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: merge_2i64_i64_12_volatile: ; AVX: # BB#0: -; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq ; ; X32-SSE1-LABEL: merge_2i64_i64_12_volatile: diff --git a/llvm/test/CodeGen/X86/mulvi32.ll b/llvm/test/CodeGen/X86/mulvi32.ll index 58744ad553f6..cb557fef2da9 100644 --- a/llvm/test/CodeGen/X86/mulvi32.ll +++ b/llvm/test/CodeGen/X86/mulvi32.ll @@ -308,8 +308,8 @@ define <4 x i64> @_mul4xi32toi64b(<4 x i32>, <4 x i32>) { ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm2[1],xmm0[1] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm2[1],xmm0[1] +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 86bcea4cfbff..72d02ce9b1e2 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -22,9 +22,9 @@ define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) nounwind { ; ; AVX1-LABEL: v3i64: ; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0] ; AVX1-NEXT: vpextrq $1, %xmm0, 16(%rdi) -; AVX1-NEXT: vmovapd %xmm1, (%rdi) +; AVX1-NEXT: vmovdqa %xmm1, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: v3i64: diff --git a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll index c99d3494b8ee..ad1c2d49d23b 100644 --- a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -502,8 +502,8 @@ define double @u64_to_d(i64 %a) nounwind { ; AVX512_32-NEXT: movl %esp, %ebp ; AVX512_32-NEXT: andl $-8, %esp ; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512_32-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; AVX512_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 ; AVX512_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 ; AVX512_32-NEXT: vmovlpd %xmm0, (%esp) @@ -523,8 +523,8 @@ define double @u64_to_d(i64 %a) nounwind { ; SSE2_32-NEXT: movl %esp, %ebp ; SSE2_32-NEXT: andl $-8, %esp ; SSE2_32-NEXT: subl $8, %esp -; SSE2_32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE2_32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; SSE2_32-NEXT: subpd {{\.LCPI.*}}, %xmm0 ; SSE2_32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2_32-NEXT: addpd %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/sse-align-12.ll b/llvm/test/CodeGen/X86/sse-align-12.ll index 1ccb5ed4e6f3..688dd56cc00c 100644 --- a/llvm/test/CodeGen/X86/sse-align-12.ll +++ b/llvm/test/CodeGen/X86/sse-align-12.ll @@ -54,7 +54,7 @@ define <2 x double> @c(<2 x double>* %y) nounwind { define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind { ; CHECK-LABEL: d: ; CHECK: # BB#0: -; CHECK-NEXT: movupd (%rdi), %xmm1 +; CHECK-NEXT: movups (%rdi), %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %y, align 8 diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 79fcc9508dd1..836bc885b8cd 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -2256,13 +2256,13 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ; X32-LABEL: test_mm_set_epi32: ; X32: # BB#0: -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi32: @@ -2288,13 +2288,13 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { ; X32-LABEL: test_mm_set_epi64x: ; X32: # BB#0: -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi64x: @@ -2319,7 +2319,7 @@ define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { ; X64-LABEL: test_mm_set_pd: ; X64: # BB#0: ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %res0 = insertelement <2 x double> undef, double %a1, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 @@ -2665,13 +2665,13 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ; X32-LABEL: test_mm_setr_epi32: ; X32: # BB#0: -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi32: @@ -2697,13 +2697,13 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { ; X32-LABEL: test_mm_setr_epi64x: ; X32: # BB#0: -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi64x: @@ -3743,12 +3743,12 @@ define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_unpackhi_epi32: ; X32: # BB#0: -; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X32-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_unpackhi_epi32: ; X64: # BB#0: -; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> @@ -3760,12 +3760,12 @@ define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_unpackhi_epi64: ; X32: # BB#0: -; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_unpackhi_epi64: ; X64: # BB#0: -; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> ret <2 x i64> %res @@ -3822,12 +3822,12 @@ define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_unpacklo_epi32: ; X32: # BB#0: -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_unpacklo_epi32: ; X64: # BB#0: -; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> @@ -3839,12 +3839,12 @@ define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_unpacklo_epi64: ; X32: # BB#0: -; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_unpacklo_epi64: ; X64: # BB#0: -; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> ret <2 x i64> %res diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index a4d0f4a245d2..736a622d9d4a 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -2864,13 +2864,13 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; GENERIC-LABEL: test_movsd_reg: ; GENERIC: # BB#0: ; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movsd_reg: ; ATOM: # BB#0: ; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -2880,7 +2880,7 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SLM-LABEL: test_movsd_reg: ; SLM: # BB#0: ; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movsd_reg: diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll index 5e7def9150e9..72af5db98380 100644 --- a/llvm/test/CodeGen/X86/sse2.ll +++ b/llvm/test/CodeGen/X86/sse2.ll @@ -39,9 +39,9 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; ; X64-LABEL: test2: ; X64: # BB#0: -; X64-NEXT: movapd (%rsi), %xmm1 +; X64-NEXT: movaps (%rsi), %xmm1 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-NEXT: movapd %xmm1, (%rdi) +; X64-NEXT: movaps %xmm1, (%rdi) ; X64-NEXT: retq %tmp3 = load <2 x double>, <2 x double>* %A, align 16 %tmp7 = insertelement <2 x double> undef, double %B, i32 0 @@ -340,13 +340,13 @@ define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind { ; X86: # BB#0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movapd (%ecx), %xmm0 +; X86-NEXT: movaps (%ecx), %xmm0 ; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] ; X86-NEXT: retl ; ; X64-LABEL: test15: ; X64: # BB#0: # %entry -; X64-NEXT: movapd (%rdi), %xmm0 +; X64-NEXT: movaps (%rdi), %xmm0 ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] ; X64-NEXT: retq entry: @@ -362,13 +362,13 @@ define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocap ; X86-LABEL: test16: ; X86: # BB#0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movapd 96(%eax), %xmm0 +; X86-NEXT: movaps 96(%eax), %xmm0 ; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X86-NEXT: retl ; ; X64-LABEL: test16: ; X64: # BB#0: -; X64-NEXT: movapd 96(%rdi), %xmm0 +; X64-NEXT: movaps 96(%rdi), %xmm0 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; X64-NEXT: retq %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3 diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index b5aa26f532ef..38d535589aa7 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -343,7 +343,7 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: subss %xmm1, %xmm0 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test14: @@ -418,7 +418,7 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test16: diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll index 1e7b9da6a321..3e9b06a57b94 100644 --- a/llvm/test/CodeGen/X86/sse3.ll +++ b/llvm/test/CodeGen/X86/sse3.ll @@ -128,14 +128,14 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind { ; X86-LABEL: t5: ; X86: # BB#0: -; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-NEXT: movdqa %xmm1, %xmm0 +; X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-NEXT: movaps %xmm1, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: t5: ; X64: # BB#0: -; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: movdqa %xmm1, %xmm0 +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 > ret <8 x i16> %tmp @@ -402,16 +402,16 @@ define <4 x i32> @t17() nounwind { ; X86: # BB#0: # %entry ; X86-NEXT: movaps (%eax), %xmm0 ; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] -; X86-NEXT: pxor %xmm1, %xmm1 -; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-NEXT: xorps %xmm1, %xmm1 +; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-NEXT: retl ; ; X64-LABEL: t17: ; X64: # BB#0: # %entry ; X64-NEXT: movaps (%rax), %xmm0 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] -; X64-NEXT: pxor %xmm1, %xmm1 -; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-NEXT: retq entry: %tmp1 = load <4 x float>, <4 x float>* undef, align 16 diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 4a0dc9c1eb17..3ed93a958e29 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -936,14 +936,14 @@ define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) { ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X32-NEXT: movapd %xmm1, %xmm0 +; X32-NEXT: movaps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: insertps_with_undefs: ; X64: ## BB#0: ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %1 = load float, float* %b, align 4 %2 = insertelement <4 x float> undef, float %1, i32 0 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index 6cfe41ac503d..b307923766fc 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -110,7 +110,7 @@ define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { ; SSE: # BB#0: ; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_4f64_to_2i32: @@ -235,7 +235,7 @@ define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { ; SSE: # BB#0: ; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 ; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: fptosi_4f64_to_4i32: @@ -2373,10 +2373,10 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { ; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) ; SSE-NEXT: fistpll -{{[0-9]+}}(%rsp) ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSE-NEXT: retq ; @@ -2386,9 +2386,9 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) ; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; AVX-NEXT: retq %cvt = fptosi <2 x x86_fp80> %a to <2 x i32> diff --git a/llvm/test/CodeGen/X86/vec_insert-2.ll b/llvm/test/CodeGen/X86/vec_insert-2.ll index 5604049d49ab..bb0951b8eaf7 100644 --- a/llvm/test/CodeGen/X86/vec_insert-2.ll +++ b/llvm/test/CodeGen/X86/vec_insert-2.ll @@ -47,7 +47,7 @@ define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind { ; X64-LABEL: t3: ; X64: # BB#0: ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1 ret <2 x double> %tmp1 diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 38d4afa0950f..add7a0d7dd0c 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -26,7 +26,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; VEX-LABEL: sitofp_2i64_to_2f64: @@ -231,8 +231,8 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 ; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSE-NEXT: movapd %xmm2, %xmm0 -; SSE-NEXT: movapd %xmm3, %xmm1 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: movaps %xmm3, %xmm1 ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_4i64_to_4f64: @@ -462,12 +462,12 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; ; VEX-LABEL: uitofp_2i64_to_2f64: ; VEX: # BB#0: -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] -; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0] +; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 -; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0 ; VEX-NEXT: retq @@ -769,45 +769,25 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: retq ; -; AVX1-LABEL: uitofp_4i64_to_4f64: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] -; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: uitofp_4i64_to_4f64: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] -; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1 -; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0 -; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq +; VEX-LABEL: uitofp_4i64_to_4f64: +; VEX: # BB#0: +; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0] +; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] +; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; VEX-NEXT: vsubpd %xmm4, %xmm1, %xmm1 +; VEX-NEXT: vhaddpd %xmm1, %xmm3, %xmm1 +; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; VEX-NEXT: vsubpd %xmm4, %xmm0, %xmm0 +; VEX-NEXT: vhaddpd %xmm0, %xmm3, %xmm0 +; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_4i64_to_4f64: ; AVX512F: # BB#0: @@ -1117,7 +1097,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE-NEXT: retq ; @@ -1383,7 +1363,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_4i64_to_4f32: @@ -2210,7 +2190,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: .LBB47_12: ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: uitofp_4i64_to_4f32: @@ -2919,13 +2899,13 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; ; VEX-LABEL: uitofp_load_2i64_to_2f64: ; VEX: # BB#0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 -; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] -; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VEX-NEXT: vmovapd (%rdi), %xmm0 +; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0] +; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 -; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0 ; VEX-NEXT: retq @@ -3129,47 +3109,26 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: retq ; -; AVX1-LABEL: uitofp_load_4i64_to_4f64: -; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] -; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: uitofp_load_4i64_to_4f64: -; AVX2: # BB#0: -; AVX2-NEXT: vmovdqa (%rdi), %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] -; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1 -; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0 -; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0 -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq +; VEX-LABEL: uitofp_load_4i64_to_4f64: +; VEX: # BB#0: +; VEX-NEXT: vmovapd (%rdi), %ymm0 +; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0] +; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] +; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; VEX-NEXT: vsubpd %xmm4, %xmm1, %xmm1 +; VEX-NEXT: vhaddpd %xmm1, %xmm3, %xmm1 +; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3 +; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; VEX-NEXT: vsubpd %xmm4, %xmm0, %xmm0 +; VEX-NEXT: vhaddpd %xmm0, %xmm3, %xmm0 +; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_load_4i64_to_4f64: ; AVX512F: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-interleave.ll b/llvm/test/CodeGen/X86/vector-interleave.ll index cadb02c6b5f3..9324398ff0eb 100644 --- a/llvm/test/CodeGen/X86/vector-interleave.ll +++ b/llvm/test/CodeGen/X86/vector-interleave.ll @@ -63,18 +63,18 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; AVX1-NEXT: vunpckhps {{.*#+}} xmm9 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; AVX1-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; AVX1-NEXT: vunpckhps {{.*#+}} xmm3 = xmm8[2],xmm1[2],xmm8[3],xmm1[3] -; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm8[0],xmm1[0],xmm8[1],xmm1[1] +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm9 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm8[2],xmm1[2],xmm8[3],xmm1[3] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm8[0],xmm1[0],xmm8[1],xmm1[1] ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] -; AVX1-NEXT: vunpckhps {{.*#+}} xmm7 = xmm4[2],xmm6[2],xmm4[3],xmm6[3] -; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] -; AVX1-NEXT: vunpckhps {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3] -; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm7 = xmm4[2],xmm6[2],xmm4[3],xmm6[3] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-rem.ll b/llvm/test/CodeGen/X86/vector-rem.ll index 3e3e93a7d5b0..aa8d42ae20a4 100644 --- a/llvm/test/CodeGen/X86/vector-rem.ll +++ b/llvm/test/CodeGen/X86/vector-rem.ll @@ -108,7 +108,7 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind { ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: addq $72, %rsp ; CHECK-NEXT: retq %m = frem <4 x float> %t, %u diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index d9a942bb9a06..7c7ab0166646 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -303,12 +303,12 @@ define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -316,13 +316,13 @@ define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02_copy: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02_copy: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -475,12 +475,12 @@ define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_13: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_13: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -488,13 +488,13 @@ define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_13_copy: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_13_copy: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -502,13 +502,13 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -516,13 +516,13 @@ define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20_copy: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20_copy: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -672,13 +672,13 @@ define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_31: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_31: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -686,13 +686,13 @@ define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_31_copy: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] +; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_31_copy: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %shuffle @@ -803,26 +803,26 @@ define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) { define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { ; SSE-LABEL: shuffle_v2f64_1z: ; SSE: # BB#0: -; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_1z: ; AVX1: # BB#0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_1z: ; AVX2: # BB#0: -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_1z: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle @@ -831,27 +831,27 @@ define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) { define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { ; SSE-LABEL: shuffle_v2f64_z0: ; SSE: # BB#0: -; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_z0: ; AVX1: # BB#0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_z0: ; AVX2: # BB#0: -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_z0: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> ret <2 x double> %shuffle @@ -906,26 +906,26 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) { define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { ; SSE-LABEL: shuffle_v2f64_bitcast_1z: ; SSE: # BB#0: -; SSE-NEXT: xorpd %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_bitcast_1z: ; AVX1: # BB#0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_bitcast_1z: ; AVX2: # BB#0: -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX512VL-NEXT: retq %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> @@ -1154,14 +1154,14 @@ define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) { define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { ; SSE-LABEL: insert_mem_hi_v2i64: ; SSE: # BB#0: -; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_hi_v2i64: ; AVX: # BB#0: -; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %a = load i64, i64* %ptr %v = insertelement <2 x i64> undef, i64 %a, i32 0 @@ -1232,7 +1232,7 @@ define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { ; SSE-LABEL: insert_reg_hi_v2f64: ; SSE: # BB#0: ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_hi_v2f64: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index d968f6054cb2..418f8881c8a6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -499,12 +499,12 @@ define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: shuffle_v4i32_0145: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_0145: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle @@ -554,13 +554,13 @@ define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: shuffle_v4i32_4501: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_4501: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle @@ -1525,13 +1525,13 @@ define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: shuffle_v4i32_40u1: ; SSE: # BB#0: -; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_40u1: ; AVX: # BB#0: -; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %shuffle @@ -1602,20 +1602,20 @@ define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) { ; SSE2-LABEL: shuffle_v4i32_0z1z: ; SSE2: # BB#0: -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_0z1z: ; SSE3: # BB#0: -; SSE3-NEXT: pxor %xmm1, %xmm1 -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: xorps %xmm1, %xmm1 +; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_0z1z: ; SSSE3: # BB#0: -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v4i32_0z1z: @@ -1796,12 +1796,12 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) { define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: shuffle_v4i32_bitcast_0415: ; SSE: # BB#0: -; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_bitcast_0415: ; AVX: # BB#0: -; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX-NEXT: retq %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double> @@ -2206,14 +2206,14 @@ define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) { define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; SSE-LABEL: insert_mem_hi_v4i32: ; SSE: # BB#0: -; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX1OR2-LABEL: insert_mem_hi_v4i32: ; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1OR2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX1OR2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1OR2-NEXT: retq ; ; AVX512VL-LABEL: insert_mem_hi_v4i32: @@ -2286,7 +2286,7 @@ define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) { ; SSE-LABEL: insert_reg_hi_v4f32: ; SSE: # BB#0: ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_hi_v4f32: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 2d72f8ae5c7c..1a7a5010257a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1090,7 +1090,7 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { ; AVX1-LABEL: shuffle_v4i64_z4z6: ; AVX1: # BB#0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] ; AVX1-NEXT: retq ; @@ -1110,7 +1110,7 @@ define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { ; AVX1-LABEL: shuffle_v4i64_5zuz: ; AVX1: # BB#0: -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; AVX1-NEXT: retq ; @@ -1128,20 +1128,10 @@ define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { } define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_40u2: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4i64_40u2: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4i64_40u2: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4i64_40u2: +; ALL: # BB#0: +; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } @@ -1149,7 +1139,7 @@ define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) { ; ALL-LABEL: shuffle_v4i64_15uu: ; ALL: # BB#0: -; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; ALL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle @@ -1423,20 +1413,10 @@ define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) { } define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: bitcast_v4f64_0426: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: bitcast_v4f64_0426: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: bitcast_v4f64_0426: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX512VL-NEXT: retq +; ALL-LABEL: bitcast_v4f64_0426: +; ALL: # BB#0: +; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; ALL-NEXT: retq %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float> %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index ecd1201d113b..477e9af23428 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1126,11 +1126,17 @@ define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2OR512VL-LABEL: shuffle_v8i32_08080808: -; AVX2OR512VL: # BB#0: -; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %ymm0 -; AVX2OR512VL-NEXT: retq +; AVX2-LABEL: shuffle_v8i32_08080808: +; AVX2: # BB#0: +; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_v8i32_08080808: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -1180,47 +1186,30 @@ define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) { } define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_08194c5d: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_08194c5d: -; AVX2OR512VL: # BB#0: -; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_08194c5d: +; ALL: # BB#0: +; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_2a3b6e7f: -; AVX1: # BB#0: -; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_2a3b6e7f: -; AVX2OR512VL: # BB#0: -; AVX2OR512VL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_2a3b6e7f: +; ALL: # BB#0: +; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_08192a3b: -; AVX1: # BB#0: -; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v8i32_08192a3b: -; AVX2: # BB#0: -; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX1OR2-LABEL: shuffle_v8i32_08192a3b: +; AVX1OR2: # BB#0: +; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1OR2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v8i32_08192a3b: ; AVX512VL: # BB#0: @@ -1901,15 +1890,10 @@ define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) { } define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) { -; AVX1-LABEL: shuffle_v8i32_80u1b4uu: -; AVX1: # BB#0: -; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] -; AVX1-NEXT: retq -; -; AVX2OR512VL-LABEL: shuffle_v8i32_80u1b4uu: -; AVX2OR512VL: # BB#0: -; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] -; AVX2OR512VL-NEXT: retq +; ALL-LABEL: shuffle_v8i32_80u1b4uu: +; ALL: # BB#0: +; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5] +; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -1936,7 +1920,7 @@ define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) { define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) { ; ALL-LABEL: shuffle_v8i32_2A3Buuuu: ; ALL: # BB#0: -; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; ALL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index bd9011d7bd3f..1aecb33cefb2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -76,7 +76,7 @@ define <16 x float> @shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14 define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: ; ALL: # BB#0: -; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %shuffle @@ -85,8 +85,8 @@ define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1 define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d: ; ALL: # BB#0: -; ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32> ret <16 x i32> %shuffle @@ -178,7 +178,7 @@ define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_0 define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f: ; ALL: # BB#0: -; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %shuffle @@ -187,8 +187,8 @@ define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1 define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz: ; ALL: # BB#0: -; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; ALL-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32> ret <16 x i32> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index c19b0146db34..a8355fdb43b5 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1983,13 +1983,13 @@ define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z: ; AVX512F: # BB#0: -; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512F-32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> @@ -2000,12 +2000,12 @@ define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_082a4c6e: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2015,14 +2015,14 @@ define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_z8zazcze: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; AVX512F-32-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2047,13 +2047,13 @@ define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) { ; ; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf: ; AVX512F: # BB#0: -; AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX512F-32-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32> @@ -2064,12 +2064,12 @@ define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_193b5d7f: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2079,14 +2079,14 @@ define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; AVX512F-32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> ret <8 x i64> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll index ee2b62babca9..e4b0b6859427 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll @@ -914,12 +914,12 @@ define <16 x float> @combine_vpermt2var_vpermi2var_16f32_as_unpckhps(<16 x float define <16 x i32> @vpermt2var_vpermi2var_16i32_as_unpckldq(<16 x i32> %a0, <16 x i32> %a1) { ; X32-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq: ; X32: # BB#0: -; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; X32-NEXT: retl ; ; X64-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq: ; X64: # BB#0: -; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] +; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; X64-NEXT: retq %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %a0, <16 x i32> , <16 x i32> %a1, i16 -1) ret <16 x i32> %res0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index b9a0c1d4a370..f1c994d2f6c4 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -510,15 +510,15 @@ define <16 x i8> @combine_pshufb_as_unpackhi_undef(<16 x i8> %a0) { define <16 x i8> @combine_pshufb_as_unpacklo_zero(<16 x i8> %a0) { ; SSE-LABEL: combine_pshufb_as_unpacklo_zero: ; SSE: # BB#0: -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_pshufb_as_unpacklo_zero: ; AVX: # BB#0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX-NEXT: retq %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> ) ret <16 x i8> %1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index dcd8a2e341c5..26e2226a614a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1381,12 +1381,12 @@ define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test8: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test8: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> @@ -1396,13 +1396,13 @@ define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test9: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test9: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> @@ -1578,12 +1578,12 @@ define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test18: ; SSE: # BB#0: -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test18: ; AVX: # BB#0: -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> @@ -1593,12 +1593,12 @@ define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test19: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test19: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> @@ -1640,29 +1640,20 @@ define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) { ; SSE-LABEL: combine_test21: ; SSE: # BB#0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; SSE-NEXT: movdqa %xmm2, (%rdi) +; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: movaps %xmm2, (%rdi) ; SSE-NEXT: retq ; -; AVX1-LABEL: combine_test21: -; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0] -; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX1-NEXT: vmovdqa %xmm2, (%rdi) -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_test21: -; AVX2: # BB#0: -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0] -; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX2-NEXT: vmovdqa %xmm2, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: combine_test21: +; AVX: # BB#0: +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX-NEXT: vmovaps %xmm2, (%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> %2 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> store <4 x i32> %1, <4 x i32>* %ptr, align 16 @@ -2100,13 +2091,13 @@ define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) { define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test_movhl_1: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test_movhl_1: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> @@ -2116,13 +2107,13 @@ define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test_movhl_2: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test_movhl_2: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> @@ -2132,13 +2123,13 @@ define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test_movhl_3: ; SSE: # BB#0: -; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test_movhl_3: ; AVX: # BB#0: -; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll index e458bb6fa52f..64cc9af5a2ae 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll @@ -212,14 +212,14 @@ define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) { define <4 x i32> @shuf_0z1z(<4 x i32> %a0) { ; AMD10H-LABEL: shuf_0z1z: ; AMD10H: # BB#0: -; AMD10H-NEXT: pxor %xmm1, %xmm1 -; AMD10H-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AMD10H-NEXT: xorps %xmm1, %xmm1 +; AMD10H-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AMD10H-NEXT: retq ; ; BTVER1-LABEL: shuf_0z1z: ; BTVER1: # BB#0: -; BTVER1-NEXT: pxor %xmm1, %xmm1 -; BTVER1-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; BTVER1-NEXT: xorps %xmm1, %xmm1 +; BTVER1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; BTVER1-NEXT: retq ; ; BTVER2-LABEL: shuf_0z1z: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll index 5e9e78d0b1f0..3dc9e3f72178 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll @@ -42,9 +42,9 @@ define <2 x i64> @var_shuffle_v2i64_v2i64_xx_i64(<2 x i64> %x, i32 %i0, i32 %i1) ; SSE-NEXT: andl $1, %edi ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: andl $1, %esi -; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64: @@ -54,9 +54,9 @@ define <2 x i64> @var_shuffle_v2i64_v2i64_xx_i64(<2 x i64> %x, i32 %i0, i32 %i1) ; AVX-NEXT: andl $1, %edi ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX-NEXT: andl $1, %esi -; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %x0 = extractelement <2 x i64> %x, i32 %i0 %x1 = extractelement <2 x i64> %x, i32 %i1 @@ -162,13 +162,13 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i ; SSE2-NEXT: andl $3, %edx ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: andl $3, %ecx -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32: @@ -182,13 +182,13 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i ; SSSE3-NEXT: andl $3, %edx ; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: andl $3, %ecx -; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32: @@ -733,13 +733,13 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi ; SSE2-NEXT: andl $3, %edx ; SSE2-NEXT: movl 12(%rdi), %esi ; SSE2-NEXT: andl $3, %esi -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32: @@ -753,13 +753,13 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi ; SSSE3-NEXT: andl $3, %edx ; SSSE3-NEXT: movl 12(%rdi), %esi ; SSSE3-NEXT: andl $3, %esi -; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll index 70c44c435e41..5320050051ae 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll @@ -91,49 +91,27 @@ define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, } define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { -; AVX1-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64: -; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: andq $-32, %rsp -; AVX1-NEXT: subq $64, %rsp -; AVX1-NEXT: andl $3, %edi -; AVX1-NEXT: andl $3, %esi -; AVX1-NEXT: andl $3, %edx -; AVX1-NEXT: andl $3, %ecx -; AVX1-NEXT: vmovaps %ymm0, (%rsp) -; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: movq %rbp, %rsp -; AVX1-NEXT: popq %rbp -; AVX1-NEXT: retq -; -; AVX2-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64: -; AVX2: # BB#0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: movq %rsp, %rbp -; AVX2-NEXT: andq $-32, %rsp -; AVX2-NEXT: subq $64, %rsp -; AVX2-NEXT: andl $3, %edi -; AVX2-NEXT: andl $3, %esi -; AVX2-NEXT: andl $3, %edx -; AVX2-NEXT: andl $3, %ecx -; AVX2-NEXT: vmovaps %ymm0, (%rsp) -; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: movq %rbp, %rsp -; AVX2-NEXT: popq %rbp -; AVX2-NEXT: retq +; ALL-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64: +; ALL: # BB#0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: andq $-32, %rsp +; ALL-NEXT: subq $64, %rsp +; ALL-NEXT: andl $3, %edi +; ALL-NEXT: andl $3, %esi +; ALL-NEXT: andl $3, %edx +; ALL-NEXT: andl $3, %ecx +; ALL-NEXT: vmovaps %ymm0, (%rsp) +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: movq %rbp, %rsp +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq %x0 = extractelement <4 x i64> %x, i64 %i0 %x1 = extractelement <4 x i64> %x, i64 %i1 %x2 = extractelement <4 x i64> %x, i64 %i2 @@ -155,10 +133,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i ; ALL-NEXT: andl $3, %edi ; ALL-NEXT: andl $3, %esi ; ALL-NEXT: vmovaps %ymm0, (%rsp) -; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; ALL-NEXT: vmovdqa %xmm0, %xmm0 +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovaps %xmm0, %xmm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp ; ALL-NEXT: retq @@ -174,37 +152,21 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i } define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { -; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64: -; AVX1: # BB#0: -; AVX1-NEXT: andl $1, %edi -; AVX1-NEXT: andl $1, %esi -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64: -; AVX2: # BB#0: -; AVX2-NEXT: andl $1, %edi -; AVX2-NEXT: andl $1, %esi -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: retq +; ALL-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64: +; ALL: # BB#0: +; ALL-NEXT: andl $1, %edi +; ALL-NEXT: andl $1, %esi +; ALL-NEXT: andl $1, %edx +; ALL-NEXT: andl $1, %ecx +; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: retq %x0 = extractelement <2 x i64> %x, i64 %i0 %x1 = extractelement <2 x i64> %x, i64 %i1 %x2 = extractelement <2 x i64> %x, i64 %i2 @@ -631,57 +593,31 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i ; define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind { -; AVX1-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64: -; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: andq $-32, %rsp -; AVX1-NEXT: subq $64, %rsp -; AVX1-NEXT: movq (%rdi), %rax -; AVX1-NEXT: movq 8(%rdi), %rcx -; AVX1-NEXT: andl $3, %eax -; AVX1-NEXT: andl $3, %ecx -; AVX1-NEXT: movq 16(%rdi), %rdx -; AVX1-NEXT: andl $3, %edx -; AVX1-NEXT: movq 24(%rdi), %rsi -; AVX1-NEXT: andl $3, %esi -; AVX1-NEXT: vmovaps %ymm0, (%rsp) -; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: movq %rbp, %rsp -; AVX1-NEXT: popq %rbp -; AVX1-NEXT: retq -; -; AVX2-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64: -; AVX2: # BB#0: -; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: movq %rsp, %rbp -; AVX2-NEXT: andq $-32, %rsp -; AVX2-NEXT: subq $64, %rsp -; AVX2-NEXT: movq (%rdi), %rax -; AVX2-NEXT: movq 8(%rdi), %rcx -; AVX2-NEXT: andl $3, %eax -; AVX2-NEXT: andl $3, %ecx -; AVX2-NEXT: movq 16(%rdi), %rdx -; AVX2-NEXT: andl $3, %edx -; AVX2-NEXT: movq 24(%rdi), %rsi -; AVX2-NEXT: andl $3, %esi -; AVX2-NEXT: vmovaps %ymm0, (%rsp) -; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: movq %rbp, %rsp -; AVX2-NEXT: popq %rbp -; AVX2-NEXT: retq +; ALL-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64: +; ALL: # BB#0: +; ALL-NEXT: pushq %rbp +; ALL-NEXT: movq %rsp, %rbp +; ALL-NEXT: andq $-32, %rsp +; ALL-NEXT: subq $64, %rsp +; ALL-NEXT: movq (%rdi), %rax +; ALL-NEXT: movq 8(%rdi), %rcx +; ALL-NEXT: andl $3, %eax +; ALL-NEXT: andl $3, %ecx +; ALL-NEXT: movq 16(%rdi), %rdx +; ALL-NEXT: andl $3, %edx +; ALL-NEXT: movq 24(%rdi), %rsi +; ALL-NEXT: andl $3, %esi +; ALL-NEXT: vmovaps %ymm0, (%rsp) +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: movq %rbp, %rsp +; ALL-NEXT: popq %rbp +; ALL-NEXT: retq %p0 = getelementptr inbounds i64, i64* %i, i32 0 %p1 = getelementptr inbounds i64, i64* %i, i32 1 %p2 = getelementptr inbounds i64, i64* %i, i32 2 @@ -702,45 +638,25 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi } define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind { -; AVX1-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64: -; AVX1: # BB#0: -; AVX1-NEXT: movq (%rdi), %rax -; AVX1-NEXT: movq 8(%rdi), %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: andl $1, %ecx -; AVX1-NEXT: movq 16(%rdi), %rdx -; AVX1-NEXT: andl $1, %edx -; AVX1-NEXT: movq 24(%rdi), %rsi -; AVX1-NEXT: andl $1, %esi -; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64: -; AVX2: # BB#0: -; AVX2-NEXT: movq (%rdi), %rax -; AVX2-NEXT: movq 8(%rdi), %rcx -; AVX2-NEXT: andl $1, %eax -; AVX2-NEXT: andl $1, %ecx -; AVX2-NEXT: movq 16(%rdi), %rdx -; AVX2-NEXT: andl $1, %edx -; AVX2-NEXT: movq 24(%rdi), %rsi -; AVX2-NEXT: andl $1, %esi -; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero -; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: retq +; ALL-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64: +; ALL: # BB#0: +; ALL-NEXT: movq (%rdi), %rax +; ALL-NEXT: movq 8(%rdi), %rcx +; ALL-NEXT: andl $1, %eax +; ALL-NEXT: andl $1, %ecx +; ALL-NEXT: movq 16(%rdi), %rdx +; ALL-NEXT: andl $1, %edx +; ALL-NEXT: movq 24(%rdi), %rsi +; ALL-NEXT: andl $1, %esi +; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: retq %p0 = getelementptr inbounds i64, i64* %i, i32 0 %p1 = getelementptr inbounds i64, i64* %i, i32 1 %p2 = getelementptr inbounds i64, i64* %i, i32 2 diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 13d90f4b0d71..b0544169dad7 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -772,14 +772,14 @@ entry: define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: zext_4i32_to_2i64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_4i32_to_2i64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_4i32_to_2i64: @@ -800,18 +800,18 @@ entry: define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: zext_4i32_to_4i64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: xorps %xmm2, %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_4i32_to_4i64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: movaps %xmm0, %xmm1 +; SSSE3-NEXT: xorps %xmm2, %xmm2 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_4i32_to_4i64: @@ -847,26 +847,26 @@ entry: define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: zext_8i32_to_8i64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] -; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: xorps %xmm4, %xmm4 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSE2-NEXT: movaps %xmm3, %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: zext_8i32_to_8i64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movdqa %xmm1, %xmm3 -; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm4, %xmm4 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] -; SSSE3-NEXT: movdqa %xmm3, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] +; SSSE3-NEXT: movaps %xmm1, %xmm3 +; SSSE3-NEXT: movaps %xmm0, %xmm1 +; SSSE3-NEXT: xorps %xmm4, %xmm4 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSSE3-NEXT: movaps %xmm3, %xmm2 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: zext_8i32_to_8i64: @@ -1432,16 +1432,16 @@ entry: define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) { ; SSE2-LABEL: load_zext_2i32_to_2i64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: load_zext_2i32_to_2i64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSSE3-NEXT: xorps %xmm1, %xmm1 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: load_zext_2i32_to_2i64: @@ -1462,20 +1462,20 @@ entry: define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { ; SSE2-LABEL: load_zext_4i32_to_4i64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa (%rdi), %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: movaps (%rdi), %xmm1 +; SSE2-NEXT: xorps %xmm2, %xmm2 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: load_zext_4i32_to_4i64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movdqa (%rdi), %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: movdqa %xmm1, %xmm0 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: movaps (%rdi), %xmm1 +; SSSE3-NEXT: xorps %xmm2, %xmm2 +; SSSE3-NEXT: movaps %xmm1, %xmm0 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: load_zext_4i32_to_4i64: @@ -1612,18 +1612,18 @@ entry: define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: shuf_zext_4i32_to_4i64: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: xorps %xmm2, %xmm2 +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuf_zext_4i32_to_4i64: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: movaps %xmm0, %xmm1 +; SSSE3-NEXT: xorps %xmm2, %xmm2 +; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuf_zext_4i32_to_4i64: @@ -2032,14 +2032,14 @@ entry: define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { ; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: ; SSE: # BB#0: # %entry -; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE-NEXT: retq ; ; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX-NEXT: retq entry: %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> diff --git a/llvm/test/CodeGen/X86/widen_extract-1.ll b/llvm/test/CodeGen/X86/widen_extract-1.ll index 332dc940b17b..760c33f5c86a 100644 --- a/llvm/test/CodeGen/X86/widen_extract-1.ll +++ b/llvm/test/CodeGen/X86/widen_extract-1.ll @@ -15,7 +15,7 @@ define void @convert(<2 x double>* %dst.addr, <3 x double> %src) { ; X64-LABEL: convert: ; X64: # BB#0: # %entry ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-NEXT: movapd %xmm0, (%rdi) +; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: retq entry: %val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32> diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index e442d342bdc2..d11ce42767e3 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -115,10 +115,10 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) { ; AVX1-LABEL: load_factori64_4: ; AVX1: # BB#0: -; AVX1-NEXT: vmovupd (%rdi), %ymm0 -; AVX1-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX1-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX1-NEXT: vmovupd 96(%rdi), %ymm3 +; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovups 32(%rdi), %ymm1 +; AVX1-NEXT: vmovups 64(%rdi), %ymm2 +; AVX1-NEXT: vmovups 96(%rdi), %ymm3 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] @@ -180,10 +180,10 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX1-NEXT: vmovupd %ymm0, 96(%rdi) -; AVX1-NEXT: vmovupd %ymm3, 64(%rdi) -; AVX1-NEXT: vmovupd %ymm4, 32(%rdi) -; AVX1-NEXT: vmovupd %ymm2, (%rdi) +; AVX1-NEXT: vmovups %ymm0, 96(%rdi) +; AVX1-NEXT: vmovups %ymm3, 64(%rdi) +; AVX1-NEXT: vmovups %ymm4, 32(%rdi) +; AVX1-NEXT: vmovups %ymm2, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -197,10 +197,10 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX2-NEXT: vmovupd %ymm0, 96(%rdi) -; AVX2-NEXT: vmovupd %ymm3, 64(%rdi) -; AVX2-NEXT: vmovupd %ymm4, 32(%rdi) -; AVX2-NEXT: vmovupd %ymm2, (%rdi) +; AVX2-NEXT: vmovups %ymm0, 96(%rdi) +; AVX2-NEXT: vmovups %ymm3, 64(%rdi) +; AVX2-NEXT: vmovups %ymm4, 32(%rdi) +; AVX2-NEXT: vmovups %ymm2, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -216,8 +216,8 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl ; AVX512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; AVX512-NEXT: vinsertf64x4 $1, %ymm4, %zmm2, %zmm1 ; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm3, %zmm0 -; AVX512-NEXT: vmovupd %zmm0, 64(%rdi) -; AVX512-NEXT: vmovupd %zmm1, (%rdi) +; AVX512-NEXT: vmovups %zmm0, 64(%rdi) +; AVX512-NEXT: vmovups %zmm1, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> @@ -238,44 +238,44 @@ define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, < ; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX1-NEXT: vmovupd %ymm0, 96(%rdi) -; AVX1-NEXT: vmovupd %ymm3, 64(%rdi) -; AVX1-NEXT: vmovupd %ymm4, 32(%rdi) -; AVX1-NEXT: vmovupd %ymm2, (%rdi) +; AVX1-NEXT: vmovups %ymm0, 96(%rdi) +; AVX1-NEXT: vmovups %ymm3, 64(%rdi) +; AVX1-NEXT: vmovups %ymm4, 32(%rdi) +; AVX1-NEXT: vmovups %ymm2, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: store_factori64_4: ; AVX2: # BB#0: -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm4 -; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5 -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] -; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] -; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) -; AVX2-NEXT: vmovdqu %ymm3, 64(%rdi) -; AVX2-NEXT: vmovdqu %ymm4, 32(%rdi) -; AVX2-NEXT: vmovdqu %ymm2, (%rdi) +; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm4 +; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm5 +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] +; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX2-NEXT: vmovups %ymm0, 96(%rdi) +; AVX2-NEXT: vmovups %ymm3, 64(%rdi) +; AVX2-NEXT: vmovups %ymm4, 32(%rdi) +; AVX2-NEXT: vmovups %ymm2, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: store_factori64_4: ; AVX512: # BB#0: -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm4 -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5 -; AVX512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX512-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX512-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] -; AVX512-NEXT: vpunpcklqdq {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] -; AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm1 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 -; AVX512-NEXT: vmovdqu64 %zmm0, 64(%rdi) -; AVX512-NEXT: vmovdqu64 %zmm1, (%rdi) +; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm4 +; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm5 +; AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] +; AVX512-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] +; AVX512-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] +; AVX512-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX512-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] +; AVX512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX512-NEXT: vinsertf64x4 $1, %ymm4, %zmm2, %zmm1 +; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm3, %zmm0 +; AVX512-NEXT: vmovups %zmm0, 64(%rdi) +; AVX512-NEXT: vmovups %zmm1, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> diff --git a/llvm/test/CodeGen/X86/xop-mask-comments.ll b/llvm/test/CodeGen/X86/xop-mask-comments.ll index 4ba47380f89a..dadb0103e13a 100644 --- a/llvm/test/CodeGen/X86/xop-mask-comments.ll +++ b/llvm/test/CodeGen/X86/xop-mask-comments.ll @@ -100,13 +100,13 @@ define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) { define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) { ; X32-LABEL: vpermil2pd_21: ; X32: # BB#0: -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; X32-NEXT: retl ; ; X64-LABEL: vpermil2pd_21: ; X64: # BB#0: -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; X64-NEXT: retq %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> , i8 2)