diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index dc6197d5d958..d25c9e684716 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7549,16 +7549,12 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, return SDValue(); } -// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does -// not enforce equality of the chain operands. -static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, +static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { - EVT VT = LS->getMemoryVT(); if (VT.getSizeInBits() / 8 != Bytes) return false; - SDValue Loc = LS->getBasePtr(); SDValue BaseLoc = Base->getBasePtr(); if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) @@ -7589,6 +7585,64 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, return false; } +// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does +// not enforce equality of the chain operands. +static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, + unsigned Bytes, int Dist, + SelectionDAG &DAG) { + if (LSBaseSDNode *LS = dyn_cast(N)) { + EVT VT = LS->getMemoryVT(); + SDValue Loc = LS->getBasePtr(); + return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); + } + + if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { + EVT VT; + switch (cast(N->getOperand(1))->getZExtValue()) { + default: return false; + case Intrinsic::ppc_altivec_lvx: + case Intrinsic::ppc_altivec_lvxl: + VT = MVT::v4i32; + break; + case Intrinsic::ppc_altivec_lvebx: + VT = MVT::i8; + break; + case Intrinsic::ppc_altivec_lvehx: + VT = MVT::i16; + break; + case Intrinsic::ppc_altivec_lvewx: + VT = MVT::i32; + break; + } + + return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); + } + + if (N->getOpcode() == ISD::INTRINSIC_VOID) { + EVT VT; + switch (cast(N->getOperand(1))->getZExtValue()) { + default: return false; + case Intrinsic::ppc_altivec_stvx: + case Intrinsic::ppc_altivec_stvxl: + VT = MVT::v4i32; + break; + case Intrinsic::ppc_altivec_stvebx: + VT = MVT::i8; + break; + case Intrinsic::ppc_altivec_stvehx: + VT = MVT::i16; + break; + case Intrinsic::ppc_altivec_stvewx: + VT = MVT::i32; + break; + } + + return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); + } + + return false; +} + // Return true is there is a nearyby consecutive load to the one provided // (regardless of alignment). We search up and down the chain, looking though // token factors and other loads (but nothing else). As a result, a true result @@ -7610,7 +7664,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { if (!Visited.insert(ChainNext)) continue; - if (LoadSDNode *ChainLD = dyn_cast(ChainNext)) { + if (MemSDNode *ChainLD = dyn_cast(ChainNext)) { if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; @@ -7641,14 +7695,14 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { if (!Visited.insert(LoadRoot)) continue; - if (LoadSDNode *ChainLD = dyn_cast(LoadRoot)) + if (MemSDNode *ChainLD = dyn_cast(LoadRoot)) if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; for (SDNode::use_iterator UI = LoadRoot->use_begin(), UE = LoadRoot->use_end(); UI != UE; ++UI) - if (((isa(*UI) && - cast(*UI)->getChain().getNode() == LoadRoot) || + if (((isa(*UI) && + cast(*UI)->getChain().getNode() == LoadRoot) || UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) Queue.push_back(*UI); } diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll b/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll new file mode 100644 index 000000000000..7e0963f54b33 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll @@ -0,0 +1,48 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1 + +define <4 x i32> @test1(<4 x i32>* %h) #0 { +entry: + %h1 = getelementptr <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv) + + %v0 = load <4 x i32>* %h, align 8 + + %a = add <4 x i32> %v0, %vl + ret <4 x i32> %a + +; CHECK-LABEL: @test1 +; CHECK: li [[REG:[0-9]+]], 16 +; CHECK-NOT: li {{[0-9]+}}, 15 +; CHECK-DAG: lvx {{[0-9]+}}, 0, 3 +; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]] +; CHECK: blr +} + +declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0 + +define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 { +entry: + %h1 = getelementptr <4 x i32>* %h, i64 1 + %hv = bitcast <4 x i32>* %h1 to i8* + call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv) + + %v0 = load <4 x i32>* %h, align 8 + + ret <4 x i32> %v0 + +; CHECK-LABEL: @test2 +; CHECK: li [[REG:[0-9]+]], 16 +; CHECK-NOT: li {{[0-9]+}}, 15 +; CHECK-DAG: lvx {{[0-9]+}}, 0, 3 +; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]] +; CHECK: blr +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +