diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 606df5762b99..d0696c24a110 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12980,14 +12980,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, // is expected to be a win for code size, register pressure, and possibly // uops even if the original vector load is not eliminated. - // 32-bit targets need to load i64 as a f64 and then bitcast the result. - if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) { - BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); - Opcode = (BroadcastVT.is128BitVector() && !Subtarget.hasAVX2()) - ? X86ISD::MOVDDUP - : Opcode; - } - // Reduce the vector load and shuffle to a broadcasted scalar load. LoadSDNode *Ld = cast(V); SDValue BaseAddr = Ld->getOperand(1); @@ -12995,6 +12987,21 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, unsigned Offset = BroadcastIdx * SVT.getStoreSize(); assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + + // Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather + // than MOVDDUP. + // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX? + if (Opcode == X86ISD::VBROADCAST) { + SDVTList Tys = DAG.getVTList(BroadcastVT, MVT::Other); + SDValue Ops[] = {Ld->getChain(), NewAddr}; + V = DAG.getMemIntrinsicNode( + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT, + DAG.getMachineFunction().getMachineMemOperand( + Ld->getMemOperand(), Offset, SVT.getStoreSize())); + DAG.makeEquivalentMemoryOrdering(Ld, V); + return DAG.getBitcast(VT, V); + } + assert(SVT == MVT::f64 && "Unexpected VT!"); V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize()));