[X86] Teach execution domain fixing to convert between FP and int unpack instructions.

llvm-svn: 313508
This commit is contained in:
Craig Topper 2017-09-18 03:29:54 +00:00
parent d4341920d5
commit 87f7381edf
43 changed files with 621 additions and 743 deletions

View File

@ -9335,6 +9335,14 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
{ X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
{ X86::UNPCKLPDrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
{ X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
{ X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
{ X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
{ X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
{ X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
// AVX 128-bit support
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
@ -9355,6 +9363,14 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
{ X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
{ X86::VUNPCKLPDrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
{ X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
{ X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
{ X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
{ X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
{ X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
{ X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
// AVX 256-bit support
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
@ -9413,6 +9429,30 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri },
{ X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi },
{ X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri },
{ X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm },
{ X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr },
{ X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm },
{ X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr },
{ X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm },
{ X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr },
{ X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
{ X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
{ X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
{ X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
{ X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
{ X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
{ X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
{ X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr },
{ X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm },
{ X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr },
{ X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm },
{ X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr },
{ X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm },
{ X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr },
{ X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm },
{ X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
{ X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
{ X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {
@ -9440,6 +9480,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
{ X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
{ X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri },
{ X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm },
{ X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr },
{ X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm },
{ X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr },
{ X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm },
{ X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr },
{ X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm },
{ X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
};
static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {

View File

@ -6,7 +6,7 @@ define <4 x i64> @test1(<4 x i64> %a) nounwind {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: retl
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64>%b

View File

@ -120,7 +120,7 @@ define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable
define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
; CHECK-LABEL: unpackhipd2:
; CHECK: # BB#0:
; CHECK-NEXT: vmovapd (%rdi), %ymm0
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
; CHECK-NEXT: retq
%a = load <4 x i64>, <4 x i64>* %src1
@ -162,7 +162,7 @@ define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable
define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
; CHECK-LABEL: unpacklopd2:
; CHECK: # BB#0:
; CHECK-NEXT: vmovapd (%rdi), %ymm0
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
; CHECK-NEXT: retq
%a = load <4 x i64>, <4 x i64>* %src1

View File

@ -3314,12 +3314,12 @@ define <4 x i64> @test_mm256_unpackhi_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwi
define <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_unpackhi_epi32:
; X32: # BB#0:
; X32-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; X32-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_unpackhi_epi32:
; X64: # BB#0:
; X64-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; X64-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
@ -3331,12 +3331,12 @@ define <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwi
define <4 x i64> @test_mm256_unpackhi_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_unpackhi_epi64:
; X32: # BB#0:
; X32-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; X32-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_unpackhi_epi64:
; X64: # BB#0:
; X64-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; X64-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; X64-NEXT: retq
%res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
ret <4 x i64> %res
@ -3379,12 +3379,12 @@ define <4 x i64> @test_mm256_unpacklo_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwi
define <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_unpacklo_epi32:
; X32: # BB#0:
; X32-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; X32-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_unpacklo_epi32:
; X64: # BB#0:
; X64-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; X64-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; X64-NEXT: retq
%arg0 = bitcast <4 x i64> %a0 to <8 x i32>
%arg1 = bitcast <4 x i64> %a1 to <8 x i32>
@ -3396,12 +3396,12 @@ define <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwi
define <4 x i64> @test_mm256_unpacklo_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_unpacklo_epi64:
; X32: # BB#0:
; X32-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; X32-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_unpacklo_epi64:
; X64: # BB#0:
; X64-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; X64-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; X64-NEXT: retq
%res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i64> %res

View File

@ -697,12 +697,12 @@ define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x
define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpackhi_epi32:
; X32: # BB#0:
; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpackhi_epi32:
; X64: # BB#0:
; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
@ -759,12 +759,12 @@ define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i
define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpackhi_epi64:
; X32: # BB#0:
; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpackhi_epi64:
; X64: # BB#0:
; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X64-NEXT: retq
%res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x i64> %res
@ -915,12 +915,12 @@ define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16
define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpacklo_epi32:
; X32: # BB#0:
; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpacklo_epi32:
; X64: # BB#0:
; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
@ -977,12 +977,12 @@ define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i
define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpacklo_epi64:
; X32: # BB#0:
; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpacklo_epi64:
; X64: # BB#0:
; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x i64> %res

View File

@ -1905,8 +1905,8 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp)
define <2 x i64> @test_4xi64_to_2xi64_perm_mask0(<4 x i64> %vec) {
; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32> <i32 2, i32 0>
@ -1972,9 +1972,9 @@ define <2 x i64> @test_masked_z_4xi64_to_2xi64_perm_mask1(<4 x i64> %vec) {
define <2 x i64> @test_4xi64_to_2xi64_perm_mem_mask0(<4 x i64>* %vp) {
; CHECK-LABEL: test_4xi64_to_2xi64_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: vmovaps (%rdi), %ymm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %vp
@ -2311,9 +2311,9 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec) {
define <2 x i64> @test_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec) {
; CHECK-LABEL: test_8xi64_to_2xi64_perm_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 3, i32 0>
@ -2681,10 +2681,10 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask7(<8 x i64>* %vp) {
define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) {
; CHECK-LABEL: test_8xi64_to_2xi64_perm_mem_mask0:
; CHECK: # BB#0:
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vmovaps (%rdi), %zmm0
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%vec = load <8 x i64>, <8 x i64>* %vp

View File

@ -853,9 +853,9 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
; SSE-NEXT: andb $15, %r11b
; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r14
; SSE-NEXT: retq
@ -1031,9 +1031,9 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
; SSE-NEXT: andb $15, %r11b
; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: popq %rbx
; SSE-NEXT: popq %r14
; SSE-NEXT: retq

View File

@ -258,7 +258,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float
; SSE-NEXT: andps %xmm8, %xmm0
; SSE-NEXT: orps %xmm0, %xmm1
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:

View File

@ -182,7 +182,7 @@ define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test14:
; CHECK: # BB#0:
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
@ -207,8 +207,8 @@ define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test16:
; CHECK: # BB#0:
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>

View File

@ -9,7 +9,7 @@ define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: movaps %xmm0, (%eax)
; CHECK-NEXT: retl
entry:
%tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> <i32 0, i32 2>

View File

@ -172,7 +172,7 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test8_undef:

View File

@ -902,16 +902,16 @@ define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinline ssp {
; SSE-LABEL: merge_2i64_i64_12_volatile:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: merge_2i64_i64_12_volatile:
; AVX: # BB#0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
;
; X32-SSE1-LABEL: merge_2i64_i64_12_volatile:

View File

@ -308,8 +308,8 @@ define <4 x i64> @_mul4xi32toi64b(<4 x i32>, <4 x i32>) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm2[1],xmm0[1]
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm2[1],xmm0[1]
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;

View File

@ -22,9 +22,9 @@ define void @v3i64(<2 x i64> %a, <2 x i64> %b, <3 x i64>* %p) nounwind {
;
; AVX1-LABEL: v3i64:
; AVX1: # BB#0:
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm0[0],xmm1[0]
; AVX1-NEXT: vpextrq $1, %xmm0, 16(%rdi)
; AVX1-NEXT: vmovapd %xmm1, (%rdi)
; AVX1-NEXT: vmovdqa %xmm1, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: v3i64:

View File

@ -502,8 +502,8 @@ define double @u64_to_d(i64 %a) nounwind {
; AVX512_32-NEXT: movl %esp, %ebp
; AVX512_32-NEXT: andl $-8, %esp
; AVX512_32-NEXT: subl $8, %esp
; AVX512_32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512_32-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; AVX512_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
; AVX512_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
; AVX512_32-NEXT: vmovlpd %xmm0, (%esp)
@ -523,8 +523,8 @@ define double @u64_to_d(i64 %a) nounwind {
; SSE2_32-NEXT: movl %esp, %ebp
; SSE2_32-NEXT: andl $-8, %esp
; SSE2_32-NEXT: subl $8, %esp
; SSE2_32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2_32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; SSE2_32-NEXT: subpd {{\.LCPI.*}}, %xmm0
; SSE2_32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2_32-NEXT: addpd %xmm0, %xmm1

View File

@ -54,7 +54,7 @@ define <2 x double> @c(<2 x double>* %y) nounwind {
define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
; CHECK-LABEL: d:
; CHECK: # BB#0:
; CHECK-NEXT: movupd (%rdi), %xmm1
; CHECK-NEXT: movups (%rdi), %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %y, align 8

View File

@ -2256,13 +2256,13 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
; X32-LABEL: test_mm_set_epi32:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi32:
@ -2288,13 +2288,13 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: test_mm_set_epi64x:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi64x:
@ -2319,7 +2319,7 @@ define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
; X64-LABEL: test_mm_set_pd:
; X64: # BB#0:
; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%res0 = insertelement <2 x double> undef, double %a1, i32 0
%res1 = insertelement <2 x double> %res0, double %a0, i32 1
@ -2665,13 +2665,13 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
; X32-LABEL: test_mm_setr_epi32:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi32:
@ -2697,13 +2697,13 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin
define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
; X32-LABEL: test_mm_setr_epi64x:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi64x:
@ -3743,12 +3743,12 @@ define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_unpackhi_epi32:
; X32: # BB#0:
; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X32-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_unpackhi_epi32:
; X64: # BB#0:
; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
@ -3760,12 +3760,12 @@ define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_unpackhi_epi64:
; X32: # BB#0:
; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_unpackhi_epi64:
; X64: # BB#0:
; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %res
@ -3822,12 +3822,12 @@ define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_unpacklo_epi32:
; X32: # BB#0:
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_unpacklo_epi32:
; X64: # BB#0:
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
@ -3839,12 +3839,12 @@ define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_unpacklo_epi64:
; X32: # BB#0:
; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_unpacklo_epi64:
; X64: # BB#0:
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %res

View File

@ -2864,13 +2864,13 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
; GENERIC-LABEL: test_movsd_reg:
; GENERIC: # BB#0:
; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movsd_reg:
; ATOM: # BB#0:
; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
; ATOM-NEXT: nop # sched: [1:0.50]
@ -2880,7 +2880,7 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
; SLM-LABEL: test_movsd_reg:
; SLM: # BB#0:
; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: test_movsd_reg:

View File

@ -39,9 +39,9 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
;
; X64-LABEL: test2:
; X64: # BB#0:
; X64-NEXT: movapd (%rsi), %xmm1
; X64-NEXT: movaps (%rsi), %xmm1
; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movapd %xmm1, (%rdi)
; X64-NEXT: movaps %xmm1, (%rdi)
; X64-NEXT: retq
%tmp3 = load <2 x double>, <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
@ -340,13 +340,13 @@ define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movapd (%ecx), %xmm0
; X86-NEXT: movaps (%ecx), %xmm0
; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
; X86-NEXT: retl
;
; X64-LABEL: test15:
; X64: # BB#0: # %entry
; X64-NEXT: movapd (%rdi), %xmm0
; X64-NEXT: movaps (%rdi), %xmm0
; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
; X64-NEXT: retq
entry:
@ -362,13 +362,13 @@ define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocap
; X86-LABEL: test16:
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movapd 96(%eax), %xmm0
; X86-NEXT: movaps 96(%eax), %xmm0
; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X86-NEXT: retl
;
; X64-LABEL: test16:
; X64: # BB#0:
; X64-NEXT: movapd 96(%rdi), %xmm0
; X64-NEXT: movaps 96(%rdi), %xmm0
; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X64-NEXT: retq
%i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3

View File

@ -343,7 +343,7 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: subss %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test14:
@ -418,7 +418,7 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test16:

View File

@ -128,14 +128,14 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
; X86-LABEL: t5:
; X86: # BB#0:
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqa %xmm1, %xmm0
; X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movaps %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t5:
; X64: # BB#0:
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
ret <8 x i16> %tmp
@ -402,16 +402,16 @@ define <4 x i32> @t17() nounwind {
; X86: # BB#0: # %entry
; X86-NEXT: movaps (%eax), %xmm0
; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X86-NEXT: pxor %xmm1, %xmm1
; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: xorps %xmm1, %xmm1
; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: retl
;
; X64-LABEL: t17:
; X64: # BB#0: # %entry
; X64-NEXT: movaps (%rax), %xmm0
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: retq
entry:
%tmp1 = load <4 x float>, <4 x float>* undef, align 16

View File

@ -936,14 +936,14 @@ define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X32-NEXT: movapd %xmm1, %xmm0
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_with_undefs:
; X64: ## BB#0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%1 = load float, float* %b, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0

View File

@ -110,7 +110,7 @@ define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
; SSE: # BB#0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_4f64_to_2i32:
@ -235,7 +235,7 @@ define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
; SSE: # BB#0:
; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_4f64_to_4i32:
@ -2373,10 +2373,10 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: fistpll -{{[0-9]+}}(%rsp)
; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; SSE-NEXT: retq
;
@ -2386,9 +2386,9 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind {
; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; AVX-NEXT: retq
%cvt = fptosi <2 x x86_fp80> %a to <2 x i32>

View File

@ -47,7 +47,7 @@ define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
; X64-LABEL: t3:
; X64: # BB#0:
; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
ret <2 x double> %tmp1

View File

@ -26,7 +26,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2sdq %rax, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; VEX-LABEL: sitofp_2i64_to_2f64:
@ -231,8 +231,8 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2sdq %rax, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: movapd %xmm3, %xmm1
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: movaps %xmm3, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_4i64_to_4f64:
@ -462,12 +462,12 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
;
; VEX-LABEL: uitofp_2i64_to_2f64:
; VEX: # BB#0:
; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
; VEX-NEXT: retq
@ -769,45 +769,25 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_4i64_to_4f64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_4i64_to_4f64:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
; VEX-LABEL: uitofp_4i64_to_4f64:
; VEX: # BB#0:
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; VEX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
; VEX-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; VEX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
; VEX-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f64:
; AVX512F: # BB#0:
@ -1117,7 +1097,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
@ -1383,7 +1363,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_4i64_to_4f32:
@ -2210,7 +2190,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: .LBB47_12:
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_4i64_to_4f32:
@ -2919,13 +2899,13 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) {
;
; VEX-LABEL: uitofp_load_2i64_to_2f64:
; VEX: # BB#0:
; VEX-NEXT: vmovdqa (%rdi), %xmm0
; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd (%rdi), %xmm0
; VEX-NEXT: vmovapd {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
; VEX-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
; VEX-NEXT: retq
@ -3129,47 +3109,26 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) {
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_4i64_to_4f64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovdqa (%rdi), %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: uitofp_load_4i64_to_4f64:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
; VEX-LABEL: uitofp_load_4i64_to_4f64:
; VEX: # BB#0:
; VEX-NEXT: vmovapd (%rdi), %ymm0
; VEX-NEXT: vextractf128 $1, %ymm0, %xmm1
; VEX-NEXT: vmovapd {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; VEX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; VEX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1]
; VEX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; VEX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
; VEX-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
; VEX-NEXT: vunpcklps {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; VEX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
; VEX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; VEX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; VEX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
; VEX-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_load_4i64_to_4f64:
; AVX512F: # BB#0:

View File

@ -63,18 +63,18 @@ define <64 x i16> @interleave8x8(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
; AVX1-NEXT: vunpckhps {{.*#+}} xmm9 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX1-NEXT: vunpcklps {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vunpckhps {{.*#+}} xmm3 = xmm8[2],xmm1[2],xmm8[3],xmm1[3]
; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm8[0],xmm1[0],xmm8[1],xmm1[1]
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm9 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm8[2],xmm1[2],xmm8[3],xmm1[3]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm8[0],xmm1[0],xmm8[1],xmm1[1]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3]
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
; AVX1-NEXT: vunpckhps {{.*#+}} xmm7 = xmm4[2],xmm6[2],xmm4[3],xmm6[3]
; AVX1-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
; AVX1-NEXT: vunpckhps {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm7 = xmm4[2],xmm6[2],xmm4[3],xmm6[3]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0

View File

@ -108,7 +108,7 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%m = frem <4 x float> %t, %u

View File

@ -303,12 +303,12 @@ define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_02:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_02:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
@ -316,13 +316,13 @@ define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_02_copy:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_02_copy:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
@ -475,12 +475,12 @@ define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_13:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_13:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle
@ -488,13 +488,13 @@ define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_13_copy:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_13_copy:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm2[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle
@ -502,13 +502,13 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_20:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_20:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
@ -516,13 +516,13 @@ define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_20_copy:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_20_copy:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
@ -672,13 +672,13 @@ define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_31:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_31:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
ret <2 x i64> %shuffle
@ -686,13 +686,13 @@ define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE-LABEL: shuffle_v2i64_31_copy:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_31_copy:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm2[1],xmm1[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
ret <2 x i64> %shuffle
@ -803,26 +803,26 @@ define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
; SSE-LABEL: shuffle_v2f64_1z:
; SSE: # BB#0:
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_1z:
; AVX1: # BB#0:
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_1z:
; AVX2: # BB#0:
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_1z:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
ret <2 x double> %shuffle
@ -831,27 +831,27 @@ define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
; SSE-LABEL: shuffle_v2f64_z0:
; SSE: # BB#0:
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_z0:
; AVX1: # BB#0:
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_z0:
; AVX2: # BB#0:
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_z0:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x double> %shuffle
@ -906,26 +906,26 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
; SSE-LABEL: shuffle_v2f64_bitcast_1z:
; SSE: # BB#0:
; SSE-NEXT: xorpd %xmm1, %xmm1
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
; AVX1: # BB#0:
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
; AVX2: # BB#0:
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
%bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
@ -1154,14 +1154,14 @@ define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
; SSE-LABEL: insert_mem_hi_v2i64:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v2i64:
; AVX: # BB#0:
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
@ -1232,7 +1232,7 @@ define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
; SSE-LABEL: insert_reg_hi_v2f64:
; SSE: # BB#0:
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_hi_v2f64:

View File

@ -499,12 +499,12 @@ define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_0145:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_0145:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %shuffle
@ -554,13 +554,13 @@ define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_4501:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_4501:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i32> %shuffle
@ -1525,13 +1525,13 @@ define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_40u1:
; SSE: # BB#0:
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_40u1:
; AVX: # BB#0:
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1>
ret <4 x i32> %shuffle
@ -1602,20 +1602,20 @@ define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_0z1z:
; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_0z1z:
; SSE3: # BB#0:
; SSE3-NEXT: pxor %xmm1, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: xorps %xmm1, %xmm1
; SSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_0z1z:
; SSSE3: # BB#0:
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v4i32_0z1z:
@ -1796,12 +1796,12 @@ define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_bitcast_0415:
; SSE: # BB#0:
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_bitcast_0415:
; AVX: # BB#0:
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX-NEXT: retq
%shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
%bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
@ -2206,14 +2206,14 @@ define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
; SSE-LABEL: insert_mem_hi_v4i32:
; SSE: # BB#0:
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: insert_mem_hi_v4i32:
; AVX1OR2: # BB#0:
; AVX1OR2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1OR2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX1OR2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: insert_mem_hi_v4i32:
@ -2286,7 +2286,7 @@ define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
; SSE-LABEL: insert_reg_hi_v4f32:
; SSE: # BB#0:
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_hi_v4f32:

View File

@ -1090,7 +1090,7 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
; AVX1-LABEL: shuffle_v4i64_z4z6:
; AVX1: # BB#0:
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX1-NEXT: retq
;
@ -1110,7 +1110,7 @@ define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
; AVX1-LABEL: shuffle_v4i64_5zuz:
; AVX1: # BB#0:
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX1-NEXT: retq
;
@ -1128,20 +1128,10 @@ define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
}
define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_40u2:
; AVX1: # BB#0:
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_40u2:
; AVX2: # BB#0:
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_40u2:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX512VL-NEXT: retq
; ALL-LABEL: shuffle_v4i64_40u2:
; ALL: # BB#0:
; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
ret <4 x i64> %shuffle
}
@ -1149,7 +1139,7 @@ define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
; ALL-LABEL: shuffle_v4i64_15uu:
; ALL: # BB#0:
; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
ret <4 x i64> %shuffle
@ -1423,20 +1413,10 @@ define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
}
define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: bitcast_v4f64_0426:
; AVX1: # BB#0:
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: bitcast_v4f64_0426:
; AVX2: # BB#0:
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: bitcast_v4f64_0426:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX512VL-NEXT: retq
; ALL-LABEL: bitcast_v4f64_0426:
; ALL: # BB#0:
; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; ALL-NEXT: retq
%shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
%bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
%shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>

View File

@ -1126,11 +1126,17 @@ define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8i32_08080808:
; AVX2OR512VL: # BB#0:
; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %ymm0
; AVX2OR512VL-NEXT: retq
; AVX2-LABEL: shuffle_v8i32_08080808:
; AVX2: # BB#0:
; AVX2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_08080808:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x i32> %shuffle
}
@ -1180,47 +1186,30 @@ define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
}
define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08194c5d:
; AVX1: # BB#0:
; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8i32_08194c5d:
; AVX2OR512VL: # BB#0:
; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; AVX2OR512VL-NEXT: retq
; ALL-LABEL: shuffle_v8i32_08194c5d:
; ALL: # BB#0:
; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
; AVX1: # BB#0:
; AVX1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8i32_2a3b6e7f:
; AVX2OR512VL: # BB#0:
; AVX2OR512VL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; AVX2OR512VL-NEXT: retq
; ALL-LABEL: shuffle_v8i32_2a3b6e7f:
; ALL: # BB#0:
; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x i32> %shuffle
}
define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08192a3b:
; AVX1: # BB#0:
; AVX1-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_08192a3b:
; AVX2: # BB#0:
; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
; AVX1OR2-LABEL: shuffle_v8i32_08192a3b:
; AVX1OR2: # BB#0:
; AVX1OR2-NEXT: vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1OR2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX1OR2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
; AVX512VL: # BB#0:
@ -1901,15 +1890,10 @@ define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
}
define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
; AVX1: # BB#0:
; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
; AVX1-NEXT: retq
;
; AVX2OR512VL-LABEL: shuffle_v8i32_80u1b4uu:
; AVX2OR512VL: # BB#0:
; AVX2OR512VL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
; AVX2OR512VL-NEXT: retq
; ALL-LABEL: shuffle_v8i32_80u1b4uu:
; ALL: # BB#0:
; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
ret <8 x i32> %shuffle
}
@ -1936,7 +1920,7 @@ define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
; ALL: # BB#0:
; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %shuffle

View File

@ -76,7 +76,7 @@ define <16 x float> @shuffle_v16f32_01_01_03_00_06_04_05_07_08_08_09_09_15_14_14
define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
; ALL: # BB#0:
; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
ret <16 x i32> %shuffle
@ -85,8 +85,8 @@ define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1
define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d:
; ALL: # BB#0:
; ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29>
ret <16 x i32> %shuffle
@ -178,7 +178,7 @@ define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_0
define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
; ALL: # BB#0:
; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
ret <16 x i32> %shuffle
@ -187,8 +187,8 @@ define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1
define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz:
; ALL: # BB#0:
; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16>
ret <16 x i32> %shuffle

View File

@ -1983,13 +1983,13 @@ define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
;
; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z:
; AVX512F: # BB#0:
; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX512F-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
@ -2000,12 +2000,12 @@ define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
;
; AVX512F-LABEL: shuffle_v8i64_082a4c6e:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x i64> %shuffle
@ -2015,14 +2015,14 @@ define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) {
;
; AVX512F-LABEL: shuffle_v8i64_z8zazcze:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32><i32 7, i32 8, i32 5, i32 10, i32 3, i32 12, i32 1, i32 14>
ret <8 x i64> %shuffle
@ -2047,13 +2047,13 @@ define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
;
; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf:
; AVX512F: # BB#0:
; AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; AVX512F-32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
@ -2064,12 +2064,12 @@ define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
;
; AVX512F-LABEL: shuffle_v8i64_193b5d7f:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x i64> %shuffle
@ -2079,14 +2079,14 @@ define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) {
;
; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z:
; AVX512F: # BB#0:
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 15, i32 5, i32 8, i32 7, i32 15>
ret <8 x i64> %shuffle

View File

@ -914,12 +914,12 @@ define <16 x float> @combine_vpermt2var_vpermi2var_16f32_as_unpckhps(<16 x float
define <16 x i32> @vpermt2var_vpermi2var_16i32_as_unpckldq(<16 x i32> %a0, <16 x i32> %a1) {
; X32-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq:
; X32: # BB#0:
; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: retl
;
; X64-LABEL: vpermt2var_vpermi2var_16i32_as_unpckldq:
; X64: # BB#0:
; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>, <16 x i32> %a1, i16 -1)
ret <16 x i32> %res0

View File

@ -510,15 +510,15 @@ define <16 x i8> @combine_pshufb_as_unpackhi_undef(<16 x i8> %a0) {
define <16 x i8> @combine_pshufb_as_unpacklo_zero(<16 x i8> %a0) {
; SSE-LABEL: combine_pshufb_as_unpacklo_zero:
; SSE: # BB#0:
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_pshufb_as_unpacklo_zero:
; AVX: # BB#0:
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; AVX-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 4, i8 5, i8 6, i8 7>)
ret <16 x i8> %1

View File

@ -1381,12 +1381,12 @@ define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test8:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test8:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
@ -1396,13 +1396,13 @@ define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test9:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test9:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@ -1578,12 +1578,12 @@ define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test18:
; SSE: # BB#0:
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test18:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@ -1593,12 +1593,12 @@ define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test19:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test19:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
%2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@ -1640,29 +1640,20 @@ define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
; SSE-LABEL: combine_test21:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: movdqa %xmm2, (%rdi)
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: movaps %xmm2, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_test21:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_test21:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; AVX-LABEL: combine_test21:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX-NEXT: vmovaps %xmm2, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
%2 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
store <4 x i32> %1, <4 x i32>* %ptr, align 16
@ -2100,13 +2091,13 @@ define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test_movhl_1:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test_movhl_1:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3>
@ -2116,13 +2107,13 @@ define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test_movhl_2:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test_movhl_2:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
@ -2132,13 +2123,13 @@ define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: combine_test_movhl_3:
; SSE: # BB#0:
; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_test_movhl_3:
; AVX: # BB#0:
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2>
%2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2>

View File

@ -212,14 +212,14 @@ define <8 x i16> @shuf_0zzz1zzz(<8 x i16> %a0) {
define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
; AMD10H-LABEL: shuf_0z1z:
; AMD10H: # BB#0:
; AMD10H-NEXT: pxor %xmm1, %xmm1
; AMD10H-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AMD10H-NEXT: xorps %xmm1, %xmm1
; AMD10H-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AMD10H-NEXT: retq
;
; BTVER1-LABEL: shuf_0z1z:
; BTVER1: # BB#0:
; BTVER1-NEXT: pxor %xmm1, %xmm1
; BTVER1-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; BTVER1-NEXT: xorps %xmm1, %xmm1
; BTVER1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; BTVER1-NEXT: retq
;
; BTVER2-LABEL: shuf_0z1z:

View File

@ -42,9 +42,9 @@ define <2 x i64> @var_shuffle_v2i64_v2i64_xx_i64(<2 x i64> %x, i32 %i0, i32 %i1)
; SSE-NEXT: andl $1, %edi
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andl $1, %esi
; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64:
@ -54,9 +54,9 @@ define <2 x i64> @var_shuffle_v2i64_v2i64_xx_i64(<2 x i64> %x, i32 %i0, i32 %i1)
; AVX-NEXT: andl $1, %edi
; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX-NEXT: andl $1, %esi
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%x0 = extractelement <2 x i64> %x, i32 %i0
%x1 = extractelement <2 x i64> %x, i32 %i1
@ -162,13 +162,13 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
; SSE2-NEXT: andl $3, %edx
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: andl $3, %ecx
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@ -182,13 +182,13 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
; SSSE3-NEXT: andl $3, %edx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $3, %ecx
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@ -733,13 +733,13 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
; SSE2-NEXT: andl $3, %edx
; SSE2-NEXT: movl 12(%rdi), %esi
; SSE2-NEXT: andl $3, %esi
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@ -753,13 +753,13 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
; SSSE3-NEXT: andl $3, %edx
; SSSE3-NEXT: movl 12(%rdi), %esi
; SSSE3-NEXT: andl $3, %esi
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:

View File

@ -91,49 +91,27 @@ define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0,
}
define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
; AVX1-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
; AVX1: # BB#0:
; AVX1-NEXT: pushq %rbp
; AVX1-NEXT: movq %rsp, %rbp
; AVX1-NEXT: andq $-32, %rsp
; AVX1-NEXT: subq $64, %rsp
; AVX1-NEXT: andl $3, %edi
; AVX1-NEXT: andl $3, %esi
; AVX1-NEXT: andl $3, %edx
; AVX1-NEXT: andl $3, %ecx
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
; AVX2: # BB#0:
; AVX2-NEXT: pushq %rbp
; AVX2-NEXT: movq %rsp, %rbp
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
; AVX2-NEXT: andl $3, %edi
; AVX2-NEXT: andl $3, %esi
; AVX2-NEXT: andl $3, %edx
; AVX2-NEXT: andl $3, %ecx
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
; ALL-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
; ALL: # BB#0:
; ALL-NEXT: pushq %rbp
; ALL-NEXT: movq %rsp, %rbp
; ALL-NEXT: andq $-32, %rsp
; ALL-NEXT: subq $64, %rsp
; ALL-NEXT: andl $3, %edi
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
%x0 = extractelement <4 x i64> %x, i64 %i0
%x1 = extractelement <4 x i64> %x, i64 %i1
%x2 = extractelement <4 x i64> %x, i64 %i2
@ -155,10 +133,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
; ALL-NEXT: andl $3, %edi
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovdqa %xmm0, %xmm0
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovaps %xmm0, %xmm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
@ -174,37 +152,21 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i
}
define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
; AVX1: # BB#0:
; AVX1-NEXT: andl $1, %edi
; AVX1-NEXT: andl $1, %esi
; AVX1-NEXT: andl $1, %edx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
; AVX2: # BB#0:
; AVX2-NEXT: andl $1, %edi
; AVX2-NEXT: andl $1, %esi
; AVX2-NEXT: andl $1, %edx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
; ALL-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
; ALL: # BB#0:
; ALL-NEXT: andl $1, %edi
; ALL-NEXT: andl $1, %esi
; ALL-NEXT: andl $1, %edx
; ALL-NEXT: andl $1, %ecx
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%x0 = extractelement <2 x i64> %x, i64 %i0
%x1 = extractelement <2 x i64> %x, i64 %i1
%x2 = extractelement <2 x i64> %x, i64 %i2
@ -631,57 +593,31 @@ define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i
;
define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind {
; AVX1-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
; AVX1: # BB#0:
; AVX1-NEXT: pushq %rbp
; AVX1-NEXT: movq %rsp, %rbp
; AVX1-NEXT: andq $-32, %rsp
; AVX1-NEXT: subq $64, %rsp
; AVX1-NEXT: movq (%rdi), %rax
; AVX1-NEXT: movq 8(%rdi), %rcx
; AVX1-NEXT: andl $3, %eax
; AVX1-NEXT: andl $3, %ecx
; AVX1-NEXT: movq 16(%rdi), %rdx
; AVX1-NEXT: andl $3, %edx
; AVX1-NEXT: movq 24(%rdi), %rsi
; AVX1-NEXT: andl $3, %esi
; AVX1-NEXT: vmovaps %ymm0, (%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: movq %rbp, %rsp
; AVX1-NEXT: popq %rbp
; AVX1-NEXT: retq
;
; AVX2-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
; AVX2: # BB#0:
; AVX2-NEXT: pushq %rbp
; AVX2-NEXT: movq %rsp, %rbp
; AVX2-NEXT: andq $-32, %rsp
; AVX2-NEXT: subq $64, %rsp
; AVX2-NEXT: movq (%rdi), %rax
; AVX2-NEXT: movq 8(%rdi), %rcx
; AVX2-NEXT: andl $3, %eax
; AVX2-NEXT: andl $3, %ecx
; AVX2-NEXT: movq 16(%rdi), %rdx
; AVX2-NEXT: andl $3, %edx
; AVX2-NEXT: movq 24(%rdi), %rsi
; AVX2-NEXT: andl $3, %esi
; AVX2-NEXT: vmovaps %ymm0, (%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: movq %rbp, %rsp
; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq
; ALL-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
; ALL: # BB#0:
; ALL-NEXT: pushq %rbp
; ALL-NEXT: movq %rsp, %rbp
; ALL-NEXT: andq $-32, %rsp
; ALL-NEXT: subq $64, %rsp
; ALL-NEXT: movq (%rdi), %rax
; ALL-NEXT: movq 8(%rdi), %rcx
; ALL-NEXT: andl $3, %eax
; ALL-NEXT: andl $3, %ecx
; ALL-NEXT: movq 16(%rdi), %rdx
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: movq 24(%rdi), %rsi
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: vmovaps %ymm0, (%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: movq %rbp, %rsp
; ALL-NEXT: popq %rbp
; ALL-NEXT: retq
%p0 = getelementptr inbounds i64, i64* %i, i32 0
%p1 = getelementptr inbounds i64, i64* %i, i32 1
%p2 = getelementptr inbounds i64, i64* %i, i32 2
@ -702,45 +638,25 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi
}
define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind {
; AVX1-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
; AVX1: # BB#0:
; AVX1-NEXT: movq (%rdi), %rax
; AVX1-NEXT: movq 8(%rdi), %rcx
; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: movq 16(%rdi), %rdx
; AVX1-NEXT: andl $1, %edx
; AVX1-NEXT: movq 24(%rdi), %rsi
; AVX1-NEXT: andl $1, %esi
; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
; AVX2: # BB#0:
; AVX2-NEXT: movq (%rdi), %rax
; AVX2-NEXT: movq 8(%rdi), %rcx
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: movq 16(%rdi), %rdx
; AVX2-NEXT: andl $1, %edx
; AVX2-NEXT: movq 24(%rdi), %rsi
; AVX2-NEXT: andl $1, %esi
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
; ALL-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
; ALL: # BB#0:
; ALL-NEXT: movq (%rdi), %rax
; ALL-NEXT: movq 8(%rdi), %rcx
; ALL-NEXT: andl $1, %eax
; ALL-NEXT: andl $1, %ecx
; ALL-NEXT: movq 16(%rdi), %rdx
; ALL-NEXT: andl $1, %edx
; ALL-NEXT: movq 24(%rdi), %rsi
; ALL-NEXT: andl $1, %esi
; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; ALL-NEXT: retq
%p0 = getelementptr inbounds i64, i64* %i, i32 0
%p1 = getelementptr inbounds i64, i64* %i, i32 1
%p2 = getelementptr inbounds i64, i64* %i, i32 2

View File

@ -772,14 +772,14 @@ entry:
define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_4i32_to_2i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_4i32_to_2i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_4i32_to_2i64:
@ -800,18 +800,18 @@ entry:
define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_4i32_to_4i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_4i32_to_4i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: movaps %xmm0, %xmm1
; SSSE3-NEXT: xorps %xmm2, %xmm2
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_4i32_to_4i64:
@ -847,26 +847,26 @@ entry:
define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_8i32_to_8i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
; SSE2-NEXT: movdqa %xmm3, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: xorps %xmm4, %xmm4
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
; SSE2-NEXT: movaps %xmm3, %xmm2
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_8i32_to_8i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm1, %xmm3
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm4, %xmm4
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
; SSSE3-NEXT: movdqa %xmm3, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSSE3-NEXT: movaps %xmm1, %xmm3
; SSSE3-NEXT: movaps %xmm0, %xmm1
; SSSE3-NEXT: xorps %xmm4, %xmm4
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
; SSSE3-NEXT: movaps %xmm3, %xmm2
; SSSE3-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; SSSE3-NEXT: unpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_8i32_to_8i64:
@ -1432,16 +1432,16 @@ entry:
define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
; SSE2-LABEL: load_zext_2i32_to_2i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: xorps %xmm1, %xmm1
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_zext_2i32_to_2i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSSE3-NEXT: xorps %xmm1, %xmm1
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_zext_2i32_to_2i64:
@ -1462,20 +1462,20 @@ entry:
define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
; SSE2-LABEL: load_zext_4i32_to_4i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa (%rdi), %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: movaps (%rdi), %xmm1
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_zext_4i32_to_4i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa (%rdi), %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: movaps (%rdi), %xmm1
; SSSE3-NEXT: xorps %xmm2, %xmm2
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_zext_4i32_to_4i64:
@ -1612,18 +1612,18 @@ entry:
define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: shuf_zext_4i32_to_4i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: movaps %xmm0, %xmm1
; SSSE3-NEXT: xorps %xmm2, %xmm2
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuf_zext_4i32_to_4i64:
@ -2032,14 +2032,14 @@ entry:
define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
; SSE: # BB#0: # %entry
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE-NEXT: retq
;
; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX-NEXT: retq
entry:
%B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>

View File

@ -15,7 +15,7 @@ define void @convert(<2 x double>* %dst.addr, <3 x double> %src) {
; X64-LABEL: convert:
; X64: # BB#0: # %entry
; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: movapd %xmm0, (%rdi)
; X64-NEXT: movaps %xmm0, (%rdi)
; X64-NEXT: retq
entry:
%val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32> <i32 0, i32 1>

View File

@ -115,10 +115,10 @@ define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) {
; AVX1-LABEL: load_factori64_4:
; AVX1: # BB#0:
; AVX1-NEXT: vmovupd (%rdi), %ymm0
; AVX1-NEXT: vmovupd 32(%rdi), %ymm1
; AVX1-NEXT: vmovupd 64(%rdi), %ymm2
; AVX1-NEXT: vmovupd 96(%rdi), %ymm3
; AVX1-NEXT: vmovups (%rdi), %ymm0
; AVX1-NEXT: vmovups 32(%rdi), %ymm1
; AVX1-NEXT: vmovups 64(%rdi), %ymm2
; AVX1-NEXT: vmovups 96(%rdi), %ymm3
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
@ -180,10 +180,10 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX1-NEXT: vmovupd %ymm0, 96(%rdi)
; AVX1-NEXT: vmovupd %ymm3, 64(%rdi)
; AVX1-NEXT: vmovupd %ymm4, 32(%rdi)
; AVX1-NEXT: vmovupd %ymm2, (%rdi)
; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
; AVX1-NEXT: vmovups %ymm3, 64(%rdi)
; AVX1-NEXT: vmovups %ymm4, 32(%rdi)
; AVX1-NEXT: vmovups %ymm2, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -197,10 +197,10 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl
; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX2-NEXT: vmovupd %ymm0, 96(%rdi)
; AVX2-NEXT: vmovupd %ymm3, 64(%rdi)
; AVX2-NEXT: vmovupd %ymm4, 32(%rdi)
; AVX2-NEXT: vmovupd %ymm2, (%rdi)
; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
; AVX2-NEXT: vmovups %ymm3, 64(%rdi)
; AVX2-NEXT: vmovups %ymm4, 32(%rdi)
; AVX2-NEXT: vmovups %ymm2, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -216,8 +216,8 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl
; AVX512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX512-NEXT: vinsertf64x4 $1, %ymm4, %zmm2, %zmm1
; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm3, %zmm0
; AVX512-NEXT: vmovupd %zmm0, 64(%rdi)
; AVX512-NEXT: vmovupd %zmm1, (%rdi)
; AVX512-NEXT: vmovups %zmm0, 64(%rdi)
; AVX512-NEXT: vmovups %zmm1, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -238,44 +238,44 @@ define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX1-NEXT: vmovupd %ymm0, 96(%rdi)
; AVX1-NEXT: vmovupd %ymm3, 64(%rdi)
; AVX1-NEXT: vmovupd %ymm4, 32(%rdi)
; AVX1-NEXT: vmovupd %ymm2, (%rdi)
; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
; AVX1-NEXT: vmovups %ymm3, 64(%rdi)
; AVX1-NEXT: vmovups %ymm4, 32(%rdi)
; AVX1-NEXT: vmovups %ymm2, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: store_factori64_4:
; AVX2: # BB#0:
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm4
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi)
; AVX2-NEXT: vmovdqu %ymm3, 64(%rdi)
; AVX2-NEXT: vmovdqu %ymm4, 32(%rdi)
; AVX2-NEXT: vmovdqu %ymm2, (%rdi)
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm4
; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm5
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
; AVX2-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
; AVX2-NEXT: vmovups %ymm3, 64(%rdi)
; AVX2-NEXT: vmovups %ymm4, 32(%rdi)
; AVX2-NEXT: vmovups %ymm2, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: store_factori64_4:
; AVX512: # BB#0:
; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm4
; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm5
; AVX512-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX512-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
; AVX512-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
; AVX512-NEXT: vpunpcklqdq {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm1
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0
; AVX512-NEXT: vmovdqu64 %zmm0, 64(%rdi)
; AVX512-NEXT: vmovdqu64 %zmm1, (%rdi)
; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm4
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm5
; AVX512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
; AVX512-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3]
; AVX512-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2]
; AVX512-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX512-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3]
; AVX512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX512-NEXT: vinsertf64x4 $1, %ymm4, %zmm2, %zmm1
; AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm3, %zmm0
; AVX512-NEXT: vmovups %zmm0, 64(%rdi)
; AVX512-NEXT: vmovups %zmm1, (%rdi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

View File

@ -100,13 +100,13 @@ define <16 x i8> @vpperm_shuffle_general(<16 x i8> %a0, <16 x i8> %a1) {
define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: vpermil2pd_21:
; X32: # BB#0:
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X32-NEXT: retl
;
; X64-LABEL: vpermil2pd_21:
; X64: # BB#0:
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; X64-NEXT: retq
%1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 10, i64 1>, i8 2)