[X86] Allow execution domain fixing to turn SHUFPD into SHUFPS.

This can help with code size on SSE targets where SHUFPD requires
a 0x66 prefix and SHUFPS doesn't.

llvm-svn: 365293
This commit is contained in:
Craig Topper 2019-07-08 06:52:49 +00:00
parent d8261f0288
commit 1deca50ab1
19 changed files with 134 additions and 120 deletions

View File

@ -6438,6 +6438,8 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
MI.getOperand(2).getSubReg() == 0)
return 0x6;
return 0;
case X86::SHUFPDrri:
return 0x6;
}
return 0;
}
@ -6558,6 +6560,18 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
// We must always return true for MOVHLPSrr.
if (Opcode == X86::MOVHLPSrr)
return true;
break;
case X86::SHUFPDrri: {
if (Domain == 1) {
unsigned Imm = MI.getOperand(3).getImm();
unsigned NewImm = 0x44;
if (Imm & 1) NewImm |= 0x0a;
if (Imm & 2) NewImm |= 0xa0;
MI.getOperand(3).setImm(NewImm);
MI.setDesc(get(X86::SHUFPSrri));
}
return true;
}
}
return false;
}

View File

@ -9,7 +9,7 @@
define <2 x double> @insert_f64(double %a0, <2 x double> %a1) {
; SSE2-LABEL: insert_f64:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: insert_f64:

View File

@ -1529,7 +1529,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v2i64:
@ -1615,7 +1615,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
@ -1753,7 +1753,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: psrlq $62, %xmm4
@ -1764,7 +1764,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
; SSE2-NEXT: psrlq $2, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm4[1]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3]
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]

View File

@ -61,8 +61,8 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK-SSE2-LABEL: test4:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; CHECK-SSE2-NEXT: movapd %xmm1, %xmm0
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE2-NEXT: retl
;
; CHECK-SSSE3-LABEL: test4:
@ -81,8 +81,8 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
; CHECK-SSE-LABEL: test5:
; CHECK-SSE: # %bb.0:
; CHECK-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; CHECK-SSE-NEXT: movapd %xmm1, %xmm0
; CHECK-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; CHECK-SSE-NEXT: movaps %xmm1, %xmm0
; CHECK-SSE-NEXT: retl
;
; CHECK-AVX-LABEL: test5:

View File

@ -697,7 +697,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
; SSSE3-NEXT: pshufb %xmm5, %xmm7
; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm7[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm7[2,3]
; SSSE3-NEXT: psubd %xmm8, %xmm3
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSSE3-NEXT: pand %xmm5, %xmm4
@ -707,7 +707,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSSE3-NEXT: pand %xmm5, %xmm1
; SSSE3-NEXT: packuswb %xmm2, %xmm1
; SSSE3-NEXT: packuswb %xmm3, %xmm1
; SSSE3-NEXT: andnpd %xmm1, %xmm0
; SSSE3-NEXT: pandn %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test14:

View File

@ -82,9 +82,9 @@ define <4 x i32> @test5(<4 x i32> %x) {
; X86: # %bb.0:
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrad $3, %xmm1
; X86-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; X86-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,3264175145,3264175145]
; X86-NEXT: movapd %xmm1, %xmm0
; X86-NEXT: movaps %xmm1, %xmm0
; X86-NEXT: pmuludq %xmm2, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]

View File

@ -40,8 +40,8 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
define <2 x double> @c(<2 x double>* %y) nounwind {
; CHECK-LABEL: c:
; CHECK: # %bb.0:
; CHECK-NEXT: movupd (%rdi), %xmm0
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: movups (%rdi), %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %y, align 8
%a = extractelement <2 x double> %x, i32 0

View File

@ -2179,9 +2179,9 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
; X86-SSE-LABEL: test_mm_loadr_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00]
; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
; X86-SSE-NEXT: # xmm0 = xmm0[1,0]
; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_loadr_pd:
@ -2200,9 +2200,9 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
;
; X64-SSE-LABEL: test_mm_loadr_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07]
; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
; X64-SSE-NEXT: # xmm0 = xmm0[1,0]
; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_loadr_pd:
@ -4728,8 +4728,8 @@ define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
; SSE-LABEL: test_mm_shuffle_pd:
; SSE: # %bb.0:
; SSE-NEXT: shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01]
; SSE-NEXT: # xmm0 = xmm0[1],xmm1[0]
; SSE-NEXT: shufps $78, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x4e]
; SSE-NEXT: # xmm0 = xmm0[2,3],xmm1[0,1]
; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3]
;
; AVX1-LABEL: test_mm_shuffle_pd:
@ -5650,9 +5650,9 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
; X86-SSE-LABEL: test_mm_storer_pd:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
; X86-SSE-NEXT: # xmm0 = xmm0[1,0]
; X86-SSE-NEXT: movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00]
; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00]
; X86-SSE-NEXT: retl # encoding: [0xc3]
;
; X86-AVX1-LABEL: test_mm_storer_pd:
@ -5673,9 +5673,9 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
;
; X64-SSE-LABEL: test_mm_storer_pd:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01]
; X64-SSE-NEXT: # xmm0 = xmm0[1,0]
; X64-SSE-NEXT: movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07]
; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1]
; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
; X64-SSE-NEXT: retq # encoding: [0xc3]
;
; X64-AVX1-LABEL: test_mm_storer_pd:

View File

@ -192,7 +192,7 @@ define <4 x float> @swizzle_18(<4 x float> %v) {
define <4 x float> @swizzle_19(<4 x float> %v) {
; CHECK-LABEL: swizzle_19:
; CHECK: # %bb.0:
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
%2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
@ -232,7 +232,7 @@ define <4 x float> @swizzle_22(<4 x float> %v) {
define <4 x float> @swizzle_23(<4 x float> %v) {
; CHECK-LABEL: swizzle_23:
; CHECK: # %bb.0:
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
%2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>

View File

@ -101,7 +101,7 @@ define <2 x i32> @test5(<8 x i32> %v) {
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; AVX2-LABEL: test5:
@ -216,10 +216,10 @@ define <2 x i32> @test9(<8 x i32> %v) {
define <2 x i32> @test10(<8 x i32> %v) {
; SSE2-LABEL: test10:
; SSE2: # %bb.0:
; SSE2-NEXT: xorpd %xmm2, %xmm2
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; AVX2-LABEL: test10:

View File

@ -149,12 +149,12 @@ entry:
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@ -174,12 +174,12 @@ entry:
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@ -336,16 +336,16 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double8:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double8:
@ -371,16 +371,16 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: movaps %xmm5, %xmm1
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i648:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: movaps %xmm5, %xmm1
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i648:
@ -404,14 +404,14 @@ entry:
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2-LABEL: vsel_double4:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@ -513,13 +513,13 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; SSE2-LABEL: constant_blendvpd_avx:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@ -695,12 +695,12 @@ entry:
define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:

View File

@ -137,7 +137,7 @@ define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_10:
; SSE: # %bb.0:
; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_10:
@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: shuffle_v2f64_32:
; SSE: # %bb.0:
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_32:
@ -222,17 +222,17 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@ -348,17 +348,17 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@ -376,20 +376,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # %bb.0:
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # %bb.0:
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@ -408,12 +408,12 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_12:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_12:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_12:
@ -438,14 +438,14 @@ define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_12_copy:
; SSE2: # %bb.0:
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_12_copy:
; SSE3: # %bb.0:
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_12_copy:
@ -585,14 +585,14 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_30:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_30:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_30:
@ -615,14 +615,14 @@ define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_30_copy:
; SSE2: # %bb.0:
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_30_copy:
; SSE3: # %bb.0:
; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: movaps %xmm2, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_30_copy:
@ -1079,17 +1079,17 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE2-LABEL: insert_reg_lo_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v2f64:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v2f64:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v2f64:
@ -1268,8 +1268,8 @@ define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
; SSE-LABEL: shuffle_mem_v2f64_10:
; SSE: # %bb.0:
; SSE-NEXT: movapd (%rdi), %xmm0
; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_mem_v2f64_10:

View File

@ -1443,14 +1443,14 @@ define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: shuffle_v4i32_6701:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_6701:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_6701:
@ -1540,12 +1540,12 @@ define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: shuffle_v4i32_2345:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_2345:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_2345:
@ -2255,17 +2255,17 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE2-LABEL: insert_reg_lo_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v4f32:
; SSE3: # %bb.0:
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v4f32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v4f32:

View File

@ -37,7 +37,7 @@ define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_456789AB:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_456789AB:
@ -1260,7 +1260,7 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_032dXXXX:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
@ -1459,7 +1459,7 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_012dcde3:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]

View File

@ -43,7 +43,7 @@ define <16 x i8> @combine_vpshufb_as_movq(<16 x i8> %a0) {
define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) {
; SSSE3-LABEL: combine_pshufb_as_movsd:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_as_movsd:
@ -668,7 +668,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @combine_pshufb_pshufb_or_as_blend(<16 x i8> %a0, <16 x i8> %a1) {
; SSSE3-LABEL: combine_pshufb_pshufb_or_as_blend:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_pshufb_pshufb_or_as_blend:

View File

@ -2132,12 +2132,12 @@ define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@ -2314,12 +2314,12 @@ define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # %bb.0:
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:

View File

@ -7,7 +7,7 @@
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test1:
@ -45,7 +45,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test3:

View File

@ -30,7 +30,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test2:
@ -106,7 +106,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: test7:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test7:
@ -390,7 +390,7 @@ define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: test24:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test24:
@ -409,7 +409,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: test25:
; SSE2: # %bb.0:
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test25:

View File

@ -223,10 +223,10 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X32-NEXT: psrlq $8, %xmm2
; X32-NEXT: movdqa %xmm0, %xmm1
; X32-NEXT: psrlq $1, %xmm1
; X32-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X32-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X32-NEXT: xorpd %xmm2, %xmm1
; X32-NEXT: movapd %xmm1, %xmm0
; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; X32-NEXT: xorps %xmm2, %xmm1
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: shr2_nosplat:
@ -235,10 +235,10 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
; X64-NEXT: psrlq $8, %xmm2
; X64-NEXT: movdqa %xmm0, %xmm1
; X64-NEXT: psrlq $1, %xmm1
; X64-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; X64-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; X64-NEXT: xorpd %xmm2, %xmm1
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; X64-NEXT: xorps %xmm2, %xmm1
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
entry:
%B = lshr <2 x i64> %A, < i64 8, i64 1>