diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 58b4e906bfab..dfcfa83d9dba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -169,16 +169,6 @@ namespace { bool CombineToPostIndexedLoadStore(SDNode *N); bool SliceUpLoad(SDNode *N); - /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed - /// load. - /// - /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. - /// \param InVecVT type of the input vector to EVE with bitcasts resolved. - /// \param EltNo index of the vector element to load. - /// \param OriginalLoad load that EVE came from to be replaced. - /// \returns EVE on success SDValue() on failure. - SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( - SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -9792,86 +9782,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } -SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( - SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - EVT ResultVT = EVE->getValueType(0); - EVT VecEltVT = InVecVT.getVectorElementType(); - unsigned Align = OriginalLoad->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( - VecEltVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) - return SDValue(); - - Align = NewAlign; - - SDValue NewPtr = OriginalLoad->getBasePtr(); - SDValue Offset; - EVT PtrType = NewPtr.getValueType(); - MachinePointerInfo MPI; - if (auto *ConstEltNo = dyn_cast(EltNo)) { - int Elt = ConstEltNo->getZExtValue(); - unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; - if (TLI.isBigEndian()) - PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; - Offset = DAG.getConstant(PtrOff, PtrType); - MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); - } else { - Offset = DAG.getNode( - ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, - DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); - if (TLI.isBigEndian()) - Offset = DAG.getNode( - ISD::SUB, SDLoc(EVE), EltNo.getValueType(), - DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); - MPI = OriginalLoad->getPointerInfo(); - } - NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); - - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. - SDValue Load; - SDValue Chain; - if (ResultVT.bitsGT(VecEltVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) - ? ISD::ZEXTLOAD - : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), - NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(), - OriginalLoad->isNonTemporal(), Align, - OriginalLoad->getTBAAInfo()); - Chain = Load.getValue(1); - } else { - Load = DAG.getLoad( - VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, - OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo()); - Chain = Load.getValue(1); - if (ResultVT.bitsLT(VecEltVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); - else - Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); - } - WorkListRemover DeadNodes(*this); - SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explicitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(EVE); - ++OpsNarrowed; - return SDValue(EVE, 0); -} - SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); @@ -9940,38 +9850,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - - // If the result of load has to be truncated, then it's not necessarily - // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) - return SDValue(); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) - return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) - BCNumEltsChanged = true; - InVec = InVec.getOperand(0); - ExtVT = BCVT.getVectorElementType(); - } - - // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) - if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && - ISD::isNormalLoad(InVec.getNode())) { - SDValue Index = N->getOperand(1); - if (LoadSDNode *OrigLoad = dyn_cast(InVec)) - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, - OrigLoad); - } - // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -9982,6 +9860,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ConstEltNo) { int Elt = cast(EltNo)->getZExtValue(); + bool NewLoad = false; + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + NewLoad = true; + } LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; @@ -10024,7 +9926,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; - EltNo = DAG.getConstant(Elt, EltNo.getValueType()); } } @@ -10037,7 +9938,72 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = + TLI.getDataLayout() + ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) + return SDValue(); + + Align = NewAlign; + } + + SDValue NewPtr = LN0->getBasePtr(); + unsigned PtrOff = 0; + + if (Elt) { + PtrOff = LVT.getSizeInBits() * Elt / 8; + EVT PtrType = NewPtr.getValueType(); + if (TLI.isBigEndian()) + PtrOff = VT.getSizeInBits() / 8 - PtrOff; + NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, + DAG.getConstant(PtrOff, PtrType)); + } + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align, LN0->getTBAAInfo()); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explcitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(N); + return SDValue(N, 0); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/vec_splat.ll b/llvm/test/CodeGen/X86/vec_splat.ll index 28f2a9074cb8..a02e3836078c 100644 --- a/llvm/test/CodeGen/X86/vec_splat.ll +++ b/llvm/test/CodeGen/X86/vec_splat.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2 ; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3 -; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind { %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1] @@ -36,23 +35,6 @@ define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind { ; Fold extract of a load into the load's address computation. This avoids spilling to the stack. define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind { - %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i - %2 = load <4 x float>* %1, align 16 - %3 = trunc i64 %j to i32 - %4 = extractelement <4 x float> %2, i32 %3 - %5 = insertelement <4 x float> undef, float %4, i32 0 - %6 = insertelement <4 x float> %5, float %4, i32 1 - %7 = insertelement <4 x float> %6, float %4, i32 2 - %8 = insertelement <4 x float> %7, float %4, i32 3 - ret <4 x float> %8 - -; AVX-LABEL: load_extract_splat -; AVX-NOT: rsp -; AVX: vbroadcastss -} - -; Fold extract of a load into the load's address computation. This avoids spilling to the stack. -define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind { %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i %2 = load <4 x float>* %1, align 16 %3 = extractelement <4 x float> %2, i64 %j @@ -62,7 +44,7 @@ define <4 x float> @load_extract_splat1(<4 x float>* nocapture readonly %ptr, i6 %7 = insertelement <4 x float> %6, float %3, i32 3 ret <4 x float> %7 -; AVX-LABEL: load_extract_splat1 +; AVX-LABEL: load_extract_splat ; AVX-NOT: movs ; AVX: vbroadcastss }