diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cb1067a06239..d628cdfc1fdf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36144,15 +36144,16 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } // vbroadcast(vector load X) -> vbroadcast_load - if (SrcVT == MVT::v2f64 && Src.hasOneUse() && - ISD::isNormalLoad(Src.getNode())) { + if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 || + SrcVT == MVT::v4i32) && + Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) { LoadSDNode *LN = cast(Src); // Unless the load is volatile or atomic. if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; SDValue BcastLd = DAG.getMemIntrinsicNode( - X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(), LN->getPointerInfo(), LN->getOriginalAlign(), LN->getMemOperand()->getFlags()); DCI.CombineTo(N.getNode(), BcastLd); diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 99cd686f50bd..ef503760890f 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -654,8 +654,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu ; X86-AVX512-LABEL: pinsrd_from_shufflevector_i32: ; X86-AVX512: ## %bb.0: ## %entry ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovaps (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x08] -; X86-AVX512-NEXT: vbroadcastss %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc9] +; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x08] ; X86-AVX512-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08] ; X86-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3] ; X86-AVX512-NEXT: retl ## encoding: [0xc3] @@ -678,8 +677,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu ; ; X64-AVX512-LABEL: pinsrd_from_shufflevector_i32: ; X64-AVX512: ## %bb.0: ## %entry -; X64-AVX512-NEXT: vmovaps (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0f] -; X64-AVX512-NEXT: vbroadcastss %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc9] +; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x0f] ; X64-AVX512-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08] ; X64-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3] ; X64-AVX512-NEXT: retq ## encoding: [0xc3]