forked from OSchip/llvm-project
[PowerPC] Recognize consecutive memory accesses from intrinsics
When generating unaligned vector loads, we need to search for other loads or stores nearby offset by one vector width. If we find one, then we know that we can safely generate another aligned load at that address. Otherwise, we must generate the next load using an offset of the vector width minus one byte (so we don't read off the end of the allocation if the base unaligned address happened to be aligned at runtime). We had previously done this using only other vector loads and stores, but did not consider the PowerPC-specific vector load/store intrinsics. Now we'll also consider vector intrinsics. By itself, this change is a feature enhancement, but is a necessary step toward fixing the underlying problem behind PR19991. llvm-svn: 214469
This commit is contained in:
parent
71ff3f223f
commit
3604bf7fe7
|
@ -7549,16 +7549,12 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
|
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
|
||||||
// not enforce equality of the chain operands.
|
|
||||||
static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
|
|
||||||
unsigned Bytes, int Dist,
|
unsigned Bytes, int Dist,
|
||||||
SelectionDAG &DAG) {
|
SelectionDAG &DAG) {
|
||||||
EVT VT = LS->getMemoryVT();
|
|
||||||
if (VT.getSizeInBits() / 8 != Bytes)
|
if (VT.getSizeInBits() / 8 != Bytes)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
SDValue Loc = LS->getBasePtr();
|
|
||||||
SDValue BaseLoc = Base->getBasePtr();
|
SDValue BaseLoc = Base->getBasePtr();
|
||||||
if (Loc.getOpcode() == ISD::FrameIndex) {
|
if (Loc.getOpcode() == ISD::FrameIndex) {
|
||||||
if (BaseLoc.getOpcode() != ISD::FrameIndex)
|
if (BaseLoc.getOpcode() != ISD::FrameIndex)
|
||||||
|
@ -7589,6 +7585,64 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
|
||||||
|
// not enforce equality of the chain operands.
|
||||||
|
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
|
||||||
|
unsigned Bytes, int Dist,
|
||||||
|
SelectionDAG &DAG) {
|
||||||
|
if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
|
||||||
|
EVT VT = LS->getMemoryVT();
|
||||||
|
SDValue Loc = LS->getBasePtr();
|
||||||
|
return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
|
||||||
|
EVT VT;
|
||||||
|
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||||
|
default: return false;
|
||||||
|
case Intrinsic::ppc_altivec_lvx:
|
||||||
|
case Intrinsic::ppc_altivec_lvxl:
|
||||||
|
VT = MVT::v4i32;
|
||||||
|
break;
|
||||||
|
case Intrinsic::ppc_altivec_lvebx:
|
||||||
|
VT = MVT::i8;
|
||||||
|
break;
|
||||||
|
case Intrinsic::ppc_altivec_lvehx:
|
||||||
|
VT = MVT::i16;
|
||||||
|
break;
|
||||||
|
case Intrinsic::ppc_altivec_lvewx:
|
||||||
|
VT = MVT::i32;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (N->getOpcode() == ISD::INTRINSIC_VOID) {
|
||||||
|
EVT VT;
|
||||||
|
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||||
|
default: return false;
|
||||||
|
case Intrinsic::ppc_altivec_stvx:
|
||||||
|
case Intrinsic::ppc_altivec_stvxl:
|
||||||
|
VT = MVT::v4i32;
|
||||||
|
break;
|
||||||
|
case Intrinsic::ppc_altivec_stvebx:
|
||||||
|
VT = MVT::i8;
|
||||||
|
break;
|
||||||
|
case Intrinsic::ppc_altivec_stvehx:
|
||||||
|
VT = MVT::i16;
|
||||||
|
break;
|
||||||
|
case Intrinsic::ppc_altivec_stvewx:
|
||||||
|
VT = MVT::i32;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Return true is there is a nearyby consecutive load to the one provided
|
// Return true is there is a nearyby consecutive load to the one provided
|
||||||
// (regardless of alignment). We search up and down the chain, looking though
|
// (regardless of alignment). We search up and down the chain, looking though
|
||||||
// token factors and other loads (but nothing else). As a result, a true result
|
// token factors and other loads (but nothing else). As a result, a true result
|
||||||
|
@ -7610,7 +7664,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
|
||||||
if (!Visited.insert(ChainNext))
|
if (!Visited.insert(ChainNext))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
|
if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
|
||||||
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
|
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -7641,14 +7695,14 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
|
||||||
if (!Visited.insert(LoadRoot))
|
if (!Visited.insert(LoadRoot))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
|
if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
|
||||||
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
|
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
for (SDNode::use_iterator UI = LoadRoot->use_begin(),
|
for (SDNode::use_iterator UI = LoadRoot->use_begin(),
|
||||||
UE = LoadRoot->use_end(); UI != UE; ++UI)
|
UE = LoadRoot->use_end(); UI != UE; ++UI)
|
||||||
if (((isa<LoadSDNode>(*UI) &&
|
if (((isa<MemSDNode>(*UI) &&
|
||||||
cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
|
cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
|
||||||
UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
|
UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
|
||||||
Queue.push_back(*UI);
|
Queue.push_back(*UI);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
|
||||||
|
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||||
|
target triple = "powerpc64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
|
||||||
|
|
||||||
|
define <4 x i32> @test1(<4 x i32>* %h) #0 {
|
||||||
|
entry:
|
||||||
|
%h1 = getelementptr <4 x i32>* %h, i64 1
|
||||||
|
%hv = bitcast <4 x i32>* %h1 to i8*
|
||||||
|
%vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
|
||||||
|
|
||||||
|
%v0 = load <4 x i32>* %h, align 8
|
||||||
|
|
||||||
|
%a = add <4 x i32> %v0, %vl
|
||||||
|
ret <4 x i32> %a
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test1
|
||||||
|
; CHECK: li [[REG:[0-9]+]], 16
|
||||||
|
; CHECK-NOT: li {{[0-9]+}}, 15
|
||||||
|
; CHECK-DAG: lvx {{[0-9]+}}, 0, 3
|
||||||
|
; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]]
|
||||||
|
; CHECK: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
|
||||||
|
|
||||||
|
define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
|
||||||
|
entry:
|
||||||
|
%h1 = getelementptr <4 x i32>* %h, i64 1
|
||||||
|
%hv = bitcast <4 x i32>* %h1 to i8*
|
||||||
|
call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
|
||||||
|
|
||||||
|
%v0 = load <4 x i32>* %h, align 8
|
||||||
|
|
||||||
|
ret <4 x i32> %v0
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test2
|
||||||
|
; CHECK: li [[REG:[0-9]+]], 16
|
||||||
|
; CHECK-NOT: li {{[0-9]+}}, 15
|
||||||
|
; CHECK-DAG: lvx {{[0-9]+}}, 0, 3
|
||||||
|
; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]]
|
||||||
|
; CHECK: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { nounwind readonly }
|
||||||
|
|
Loading…
Reference in New Issue