diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index e761183974c0..502c138089b9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4838,6 +4838,13 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, bool ForLoadFold = false) { // Set the OpNum parameter to the first source operand. switch (Opcode) { + case X86::MMX_PUNPCKHBWirr: + case X86::MMX_PUNPCKHWDirr: + case X86::MMX_PUNPCKHDQirr: + case X86::MMX_PUNPCKLBWirr: + case X86::MMX_PUNPCKLWDirr: + case X86::MMX_PUNPCKLDQirr: + case X86::MOVHLPSrr: case X86::PACKSSWBrr: case X86::PACKUSWBrr: case X86::PACKSSDWrr: @@ -4850,6 +4857,8 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, case X86::PUNPCKLDQrr: case X86::PUNPCKHQDQrr: case X86::PUNPCKLQDQrr: + case X86::SHUFPDrri: + case X86::SHUFPSrri: // These instructions are sometimes used with an undef first or second // source. Return true here so BreakFalseDeps will assign this source to the // same register as the first source to avoid a false dependency. @@ -4857,6 +4866,8 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, // VEX counterparts. return OpNum == 2 && !ForLoadFold; + case X86::VMOVLHPSrr: + case X86::VMOVLHPSZrr: case X86::VPACKSSWBrr: case X86::VPACKUSWBrr: case X86::VPACKSSDWrr: @@ -4865,6 +4876,16 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, case X86::VPACKUSWBZ128rr: case X86::VPACKSSDWZ128rr: case X86::VPACKUSDWZ128rr: + case X86::VPERM2F128rr: + case X86::VPERM2I128rr: + case X86::VSHUFF32X4Z256rri: + case X86::VSHUFF32X4Zrri: + case X86::VSHUFF64X2Z256rri: + case X86::VSHUFF64X2Zrri: + case X86::VSHUFI32X4Z256rri: + case X86::VSHUFI32X4Zrri: + case X86::VSHUFI64X2Z256rri: + case X86::VSHUFI64X2Zrri: case X86::VPUNPCKHBWrr: case X86::VPUNPCKLBWrr: case X86::VPUNPCKHBWYrr: diff --git a/llvm/test/CodeGen/X86/mmx-build-vector.ll b/llvm/test/CodeGen/X86/mmx-build-vector.ll index dace3cdc7b23..2ce69a626126 100644 --- a/llvm/test/CodeGen/X86/mmx-build-vector.ll +++ b/llvm/test/CodeGen/X86/mmx-build-vector.ll @@ -455,7 +455,7 @@ define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i ; X64-NEXT: movd %ecx, %mm2 ; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3] ; X64-NEXT: movd %esi, %mm1 -; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] +; X64-NEXT: punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3] ; X64-NEXT: punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1] ; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] ; X64-NEXT: paddd %mm1, %mm1 @@ -488,7 +488,7 @@ define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i ; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] ; X86-NEXT: pxor %mm0, %mm0 ; X86-NEXT: pxor %mm1, %mm1 -; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] +; X86-NEXT: punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3] ; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] ; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] ; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] @@ -507,7 +507,7 @@ define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i ; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] ; X64-NEXT: pxor %mm0, %mm0 ; X64-NEXT: pxor %mm1, %mm1 -; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] +; X64-NEXT: punpcklbw %mm1, %mm1 # mm1 = mm1[0,0,1,1,2,2,3,3] ; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] ; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] ; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll index b3accf80744f..032c9e293b0a 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -31,7 +31,7 @@ define void @test_udiv7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86-NEXT: pmuludq %xmm1, %xmm2 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] ; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] ; X86-NEXT: pmuludq %xmm1, %xmm3 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -81,7 +81,7 @@ define void @test_urem7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86-NEXT: pmuludq %xmm1, %xmm2 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] ; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] ; X86-NEXT: pmuludq %xmm1, %xmm3 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -137,7 +137,7 @@ define void @test_sdiv7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86-NEXT: pmuludq %xmm1, %xmm2 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] ; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] ; X86-NEXT: pmuludq %xmm1, %xmm3 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] @@ -198,7 +198,7 @@ define void @test_srem7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86-NEXT: pmuludq %xmm1, %xmm2 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] ; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] ; X86-NEXT: pmuludq %xmm1, %xmm3 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] @@ -358,7 +358,7 @@ define void @test_udiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X86-NEXT: movd %eax, %xmm2 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-NEXT: movd %xmm0, %eax -; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X86-NEXT: movd %xmm1, %esi ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %esi @@ -412,7 +412,7 @@ define void @test_urem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X86-NEXT: movd %edx, %xmm2 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-NEXT: movd %xmm0, %eax -; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X86-NEXT: movd %xmm1, %esi ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %esi @@ -465,7 +465,7 @@ define void @test_sdiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-NEXT: movd %xmm0, %eax ; X86-NEXT: movd %xmm1, %edi -; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X86-NEXT: movd %xmm1, %ebx ; X86-NEXT: cltd ; X86-NEXT: idivl %ebx @@ -524,7 +524,7 @@ define void @test_srem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-NEXT: movd %xmm0, %eax ; X86-NEXT: movd %xmm1, %edi -; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X86-NEXT: movd %xmm1, %ebx ; X86-NEXT: cltd ; X86-NEXT: idivl %ebx diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 1638cb7f4c58..739ea6bae535 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1869,7 +1869,7 @@ define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) define <8 x double> @shuffle_v8f64_uuu2301(<8 x double> %a0, <8 x double> %a1) { ; ALL-LABEL: shuffle_v8f64_uuu2301: ; ALL: # %bb.0: -; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1],zmm1[2,3,0,1] +; ALL-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1,2,3,0,1] ; ALL-NEXT: ret{{[l|q]}} %1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> ret <8 x double> %1