diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index e39819f4ac6f..e5da23cf29d4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6438,6 +6438,8 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const { MI.getOperand(2).getSubReg() == 0) return 0x6; return 0; + case X86::SHUFPDrri: + return 0x6; } return 0; } @@ -6558,6 +6560,18 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, // We must always return true for MOVHLPSrr. if (Opcode == X86::MOVHLPSrr) return true; + break; + case X86::SHUFPDrri: { + if (Domain == 1) { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned NewImm = 0x44; + if (Imm & 1) NewImm |= 0x0a; + if (Imm & 2) NewImm |= 0xa0; + MI.getOperand(3).setImm(NewImm); + MI.setDesc(get(X86::SHUFPSrri)); + } + return true; + } } return false; } diff --git a/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll b/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll index 31537b581311..b42fd957d7f4 100644 --- a/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll +++ b/llvm/test/CodeGen/X86/coalesce_commute_movsd.ll @@ -9,7 +9,7 @@ define <2 x double> @insert_f64(double %a0, <2 x double> %a1) { ; SSE2-LABEL: insert_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: insert_f64: diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 27d49cad9667..156cf3753546 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -1529,7 +1529,7 @@ define <2 x i64> @combine_vec_sdiv_by_pow2b_v2i64(<2 x i64> %x) { ; SSE2-NEXT: psrlq $2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: combine_vec_sdiv_by_pow2b_v2i64: @@ -1615,7 +1615,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) { ; SSE2-NEXT: psrlq $2, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -1753,7 +1753,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; SSE2-NEXT: psrlq $2, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] ; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: psrlq $62, %xmm4 @@ -1764,7 +1764,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) { ; SSE2-NEXT: psrlq $2, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm4[1] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3] ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] diff --git a/llvm/test/CodeGen/X86/palignr.ll b/llvm/test/CodeGen/X86/palignr.ll index 19d493b0c776..aedd013bf251 100644 --- a/llvm/test/CodeGen/X86/palignr.ll +++ b/llvm/test/CodeGen/X86/palignr.ll @@ -61,8 +61,8 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK-SSE2-LABEL: test4: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] -; CHECK-SSE2-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retl ; ; CHECK-SSSE3-LABEL: test4: @@ -81,8 +81,8 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind { ; CHECK-SSE-LABEL: test5: ; CHECK-SSE: # %bb.0: -; CHECK-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] -; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 ; CHECK-SSE-NEXT: retl ; ; CHECK-AVX-LABEL: test5: diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll index 34f4bf23ced7..438dce60511f 100644 --- a/llvm/test/CodeGen/X86/psubus.ll +++ b/llvm/test/CodeGen/X86/psubus.ll @@ -697,7 +697,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind { ; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 ; SSSE3-NEXT: pshufb %xmm5, %xmm7 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm11[0],xmm7[1],xmm11[1] -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm7[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm7[2,3] ; SSSE3-NEXT: psubd %xmm8, %xmm3 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] ; SSSE3-NEXT: pand %xmm5, %xmm4 @@ -707,7 +707,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind { ; SSSE3-NEXT: pand %xmm5, %xmm1 ; SSSE3-NEXT: packuswb %xmm2, %xmm1 ; SSSE3-NEXT: packuswb %xmm3, %xmm1 -; SSSE3-NEXT: andnpd %xmm1, %xmm0 +; SSSE3-NEXT: pandn %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: test14: diff --git a/llvm/test/CodeGen/X86/sdiv-exact.ll b/llvm/test/CodeGen/X86/sdiv-exact.ll index 3caaf40e7183..3c238d7f1bff 100644 --- a/llvm/test/CodeGen/X86/sdiv-exact.ll +++ b/llvm/test/CodeGen/X86/sdiv-exact.ll @@ -82,9 +82,9 @@ define <4 x i32> @test5(<4 x i32> %x) { ; X86: # %bb.0: ; X86-NEXT: movdqa %xmm0, %xmm1 ; X86-NEXT: psrad $3, %xmm1 -; X86-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] +; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] ; X86-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,3264175145,3264175145] -; X86-NEXT: movapd %xmm1, %xmm0 +; X86-NEXT: movaps %xmm1, %xmm0 ; X86-NEXT: pmuludq %xmm2, %xmm0 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] diff --git a/llvm/test/CodeGen/X86/sse-align-12.ll b/llvm/test/CodeGen/X86/sse-align-12.ll index 15c3cb014aba..36c30e9eb827 100644 --- a/llvm/test/CodeGen/X86/sse-align-12.ll +++ b/llvm/test/CodeGen/X86/sse-align-12.ll @@ -40,8 +40,8 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind { define <2 x double> @c(<2 x double>* %y) nounwind { ; CHECK-LABEL: c: ; CHECK: # %bb.0: -; CHECK-NEXT: movupd (%rdi), %xmm0 -; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; CHECK-NEXT: movups (%rdi), %xmm0 +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %y, align 8 %a = extractelement <2 x double> %x, i32 0 diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index eb1190151eb6..af591a6849f3 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -2179,9 +2179,9 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { ; X86-SSE-LABEL: test_mm_loadr_pd: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00] -; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] -; X86-SSE-NEXT: # xmm0 = xmm0[1,0] +; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] +; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_loadr_pd: @@ -2200,9 +2200,9 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { ; ; X64-SSE-LABEL: test_mm_loadr_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07] -; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] -; X64-SSE-NEXT: # xmm0 = xmm0[1,0] +; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] +; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_loadr_pd: @@ -4728,8 +4728,8 @@ define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_mm_shuffle_pd: ; SSE: # %bb.0: -; SSE-NEXT: shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01] -; SSE-NEXT: # xmm0 = xmm0[1],xmm1[0] +; SSE-NEXT: shufps $78, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x4e] +; SSE-NEXT: # xmm0 = xmm0[2,3],xmm1[0,1] ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_shuffle_pd: @@ -5650,9 +5650,9 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_storer_pd: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] -; X86-SSE-NEXT: # xmm0 = xmm0[1,0] -; X86-SSE-NEXT: movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00] +; X86-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X86-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] +; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_storer_pd: @@ -5673,9 +5673,9 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { ; ; X64-SSE-LABEL: test_mm_storer_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] -; X64-SSE-NEXT: # xmm0 = xmm0[1,0] -; X64-SSE-NEXT: movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07] +; X64-SSE-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e] +; X64-SSE-NEXT: # xmm0 = xmm0[2,3,0,1] +; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_storer_pd: diff --git a/llvm/test/CodeGen/X86/swizzle-2.ll b/llvm/test/CodeGen/X86/swizzle-2.ll index dad6a4d7d4fc..91c18bebe6bc 100644 --- a/llvm/test/CodeGen/X86/swizzle-2.ll +++ b/llvm/test/CodeGen/X86/swizzle-2.ll @@ -192,7 +192,7 @@ define <4 x float> @swizzle_18(<4 x float> %v) { define <4 x float> @swizzle_19(<4 x float> %v) { ; CHECK-LABEL: swizzle_19: ; CHECK: # %bb.0: -; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; CHECK-NEXT: retq %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> @@ -232,7 +232,7 @@ define <4 x float> @swizzle_22(<4 x float> %v) { define <4 x float> @swizzle_23(<4 x float> %v) { ; CHECK-LABEL: swizzle_23: ; CHECK: # %bb.0: -; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; CHECK-NEXT: retq %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/trunc-subvector.ll b/llvm/test/CodeGen/X86/trunc-subvector.ll index abd4fb45ea9d..d52fe0f8ff8e 100644 --- a/llvm/test/CodeGen/X86/trunc-subvector.ll +++ b/llvm/test/CodeGen/X86/trunc-subvector.ll @@ -101,7 +101,7 @@ define <2 x i32> @test5(<8 x i32> %v) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE2-NEXT: retq ; ; AVX2-LABEL: test5: @@ -216,10 +216,10 @@ define <2 x i32> @test9(<8 x i32> %v) { define <2 x i32> @test10(<8 x i32> %v) { ; SSE2-LABEL: test10: ; SSE2: # %bb.0: -; SSE2-NEXT: xorpd %xmm2, %xmm2 +; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE2-NEXT: retq ; ; AVX2-LABEL: test10: diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll index 549e44471d6d..c6bcd299d5f7 100644 --- a/llvm/test/CodeGen/X86/vector-blend.ll +++ b/llvm/test/CodeGen/X86/vector-blend.ll @@ -149,12 +149,12 @@ entry: define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) { ; SSE2-LABEL: vsel_double: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double: @@ -174,12 +174,12 @@ entry: define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) { ; SSE2-LABEL: vsel_i64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_i64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_i64: @@ -336,16 +336,16 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: movaps %xmm5, %xmm1 -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double8: ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: movaps %xmm5, %xmm1 -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] -; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] +; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double8: @@ -371,16 +371,16 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm7, %xmm3 ; SSE2-NEXT: movaps %xmm5, %xmm1 -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_i648: ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movaps %xmm7, %xmm3 ; SSSE3-NEXT: movaps %xmm5, %xmm1 -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm4[1] -; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm6[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] +; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_i648: @@ -404,14 +404,14 @@ entry: define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) { ; SSE2-LABEL: vsel_double4: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: vsel_double4: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: vsel_double4: @@ -513,13 +513,13 @@ define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { ; SSE2-LABEL: constant_blendvpd_avx: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: constant_blendvpd_avx: ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movaps %xmm2, %xmm0 -; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm3[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: constant_blendvpd_avx: @@ -695,12 +695,12 @@ entry: define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) { ; SSE2-LABEL: blend_shufflevector_4xdouble: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_shufflevector_4xdouble: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_shufflevector_4xdouble: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index 4318e40ec662..2a5ac3ebc295 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -137,7 +137,7 @@ define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_10: ; SSE: # %bb.0: -; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_10: @@ -193,8 +193,8 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) { define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: shuffle_v2f64_32: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2f64_32: @@ -222,17 +222,17 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) { define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: shuffle_v2f64_03: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2f64_03: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2f64_03: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2f64_03: @@ -348,17 +348,17 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03: @@ -376,20 +376,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_03_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_03_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movapd %xmm1, %xmm0 -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_03_copy: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movapd %xmm1, %xmm0 -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm2[1] +; SSSE3-NEXT: movaps %xmm1, %xmm0 +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v2i64_03_copy: @@ -408,12 +408,12 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_12: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_12: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_12: @@ -438,14 +438,14 @@ define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_12_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_12_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movapd %xmm1, %xmm0 -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm2[0] +; SSE3-NEXT: movaps %xmm1, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm2[0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_12_copy: @@ -585,14 +585,14 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_30: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_30: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] -; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_30: @@ -615,14 +615,14 @@ define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: shuffle_v2i64_30_copy: ; SSE2: # %bb.0: -; SSE2-NEXT: movapd %xmm2, %xmm0 -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v2i64_30_copy: ; SSE3: # %bb.0: -; SSE3-NEXT: movapd %xmm2, %xmm0 -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE3-NEXT: movaps %xmm2, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v2i64_30_copy: @@ -1079,17 +1079,17 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) { ; SSE2-LABEL: insert_reg_lo_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_reg_lo_v2f64: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_reg_lo_v2f64: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: insert_reg_lo_v2f64: @@ -1268,8 +1268,8 @@ define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) { define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) { ; SSE-LABEL: shuffle_mem_v2f64_10: ; SSE: # %bb.0: -; SSE-NEXT: movapd (%rdi), %xmm0 -; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] +; SSE-NEXT: movaps (%rdi), %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_mem_v2f64_10: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 21b04ccd200d..f37cd88101be 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1443,14 +1443,14 @@ define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: shuffle_v4i32_6701: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] -; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_6701: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0] -; SSE3-NEXT: movapd %xmm1, %xmm0 +; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,3],xmm0[0,1] +; SSE3-NEXT: movaps %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_6701: @@ -1540,12 +1540,12 @@ define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: shuffle_v4i32_2345: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_v4i32_2345: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_v4i32_2345: @@ -2255,17 +2255,17 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) { ; SSE2-LABEL: insert_reg_lo_v4f32: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_reg_lo_v4f32: ; SSE3: # %bb.0: -; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_reg_lo_v4f32: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: insert_reg_lo_v4f32: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 87520abb060f..c756fe7d197f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -37,7 +37,7 @@ define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: shuffle_v8i16_456789AB: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,1] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuffle_v8i16_456789AB: @@ -1260,7 +1260,7 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: shuffle_v8i16_032dXXXX: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,0] ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3] @@ -1459,7 +1459,7 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: shuffle_v8i16_012dcde3: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,1] ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index e01f5b4f576b..b5696e33b7fb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -43,7 +43,7 @@ define <16 x i8> @combine_vpshufb_as_movq(<16 x i8> %a0) { define <2 x double> @combine_pshufb_as_movsd(<2 x double> %a0, <2 x double> %a1) { ; SSSE3-LABEL: combine_pshufb_as_movsd: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: combine_pshufb_as_movsd: @@ -668,7 +668,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea define <16 x i8> @combine_pshufb_pshufb_or_as_blend(<16 x i8> %a0, <16 x i8> %a1) { ; SSSE3-LABEL: combine_pshufb_pshufb_or_as_blend: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: combine_pshufb_pshufb_or_as_blend: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 2594a0a9111d..58b7be1ec802 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2132,12 +2132,12 @@ define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: combine_undef_input_test5: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: combine_undef_input_test5: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: combine_undef_input_test5: @@ -2314,12 +2314,12 @@ define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: combine_undef_input_test15: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: combine_undef_input_test15: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: combine_undef_input_test15: diff --git a/llvm/test/CodeGen/X86/vselect-2.ll b/llvm/test/CodeGen/X86/vselect-2.ll index 040f1d8b6f4a..c751b7a7c87e 100644 --- a/llvm/test/CodeGen/X86/vselect-2.ll +++ b/llvm/test/CodeGen/X86/vselect-2.ll @@ -7,7 +7,7 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) { ; SSE2-LABEL: test1: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test1: @@ -45,7 +45,7 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) { define <4 x float> @test3(<4 x float> %A, <4 x float> %B) { ; SSE2-LABEL: test3: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test3: diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll index 46adb4cc3cab..d89b1352aeef 100644 --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -30,7 +30,7 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b) { define <4 x float> @test2(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: test2: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test2: @@ -106,7 +106,7 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: test7: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test7: @@ -390,7 +390,7 @@ define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) { define <2 x double> @test24(<2 x double> %a, <2 x double> %b) { ; SSE2-LABEL: test24: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test24: @@ -409,7 +409,7 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) { define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { ; SSE2-LABEL: test25: ; SSE2: # %bb.0: -; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test25: diff --git a/llvm/test/CodeGen/X86/x86-shifts.ll b/llvm/test/CodeGen/X86/x86-shifts.ll index 73dbb30a8c75..5688d1d28e0b 100644 --- a/llvm/test/CodeGen/X86/x86-shifts.ll +++ b/llvm/test/CodeGen/X86/x86-shifts.ll @@ -223,10 +223,10 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { ; X32-NEXT: psrlq $8, %xmm2 ; X32-NEXT: movdqa %xmm0, %xmm1 ; X32-NEXT: psrlq $1, %xmm1 -; X32-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; X32-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; X32-NEXT: xorpd %xmm2, %xmm1 -; X32-NEXT: movapd %xmm1, %xmm0 +; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3] +; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] +; X32-NEXT: xorps %xmm2, %xmm1 +; X32-NEXT: movaps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: shr2_nosplat: @@ -235,10 +235,10 @@ define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { ; X64-NEXT: psrlq $8, %xmm2 ; X64-NEXT: movdqa %xmm0, %xmm1 ; X64-NEXT: psrlq $1, %xmm1 -; X64-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; X64-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1] -; X64-NEXT: xorpd %xmm2, %xmm1 -; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3] +; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] +; X64-NEXT: xorps %xmm2, %xmm1 +; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq entry: %B = lshr <2 x i64> %A, < i64 8, i64 1>