forked from OSchip/llvm-project
[LoopReroll] Make root-finding more aggressive.
Allow using an instruction other than a mul or phi as the base for root-finding. For example, the included testcase includes a loop which requires using a getelementptr as the base for root-finding. Differential Revision: https://reviews.llvm.org/D26529 llvm-svn: 287588
This commit is contained in:
parent
6cad0115e1
commit
c0bba1a96d
|
@ -371,11 +371,12 @@ namespace {
|
||||||
protected:
|
protected:
|
||||||
typedef MapVector<Instruction*, BitVector> UsesTy;
|
typedef MapVector<Instruction*, BitVector> UsesTy;
|
||||||
|
|
||||||
bool findRootsRecursive(Instruction *IVU,
|
void findRootsRecursive(Instruction *IVU,
|
||||||
SmallInstructionSet SubsumedInsts);
|
SmallInstructionSet SubsumedInsts);
|
||||||
bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
|
bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
|
||||||
bool collectPossibleRoots(Instruction *Base,
|
bool collectPossibleRoots(Instruction *Base,
|
||||||
std::map<int64_t,Instruction*> &Roots);
|
std::map<int64_t,Instruction*> &Roots);
|
||||||
|
bool validateRootSet(DAGRootSet &DRS);
|
||||||
|
|
||||||
bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
|
bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
|
||||||
void collectInLoopUserSet(const SmallInstructionVector &Roots,
|
void collectInLoopUserSet(const SmallInstructionVector &Roots,
|
||||||
|
@ -827,7 +828,8 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
|
||||||
Roots[V] = cast<Instruction>(I);
|
Roots[V] = cast<Instruction>(I);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Roots.empty())
|
// Make sure we have at least two roots.
|
||||||
|
if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// If we found non-loop-inc, non-root users of Base, assume they are
|
// If we found non-loop-inc, non-root users of Base, assume they are
|
||||||
|
@ -861,40 +863,61 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopReroll::DAGRootTracker::
|
void LoopReroll::DAGRootTracker::
|
||||||
findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
|
findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
|
||||||
// Does the user look like it could be part of a root set?
|
// Does the user look like it could be part of a root set?
|
||||||
// All its users must be simple arithmetic ops.
|
// All its users must be simple arithmetic ops.
|
||||||
if (I->getNumUses() > IL_MaxRerollIterations)
|
if (I->getNumUses() > IL_MaxRerollIterations)
|
||||||
return false;
|
return;
|
||||||
|
|
||||||
if ((I->getOpcode() == Instruction::Mul ||
|
if (I != IV && findRootsBase(I, SubsumedInsts))
|
||||||
I->getOpcode() == Instruction::PHI) &&
|
return;
|
||||||
I != IV &&
|
|
||||||
findRootsBase(I, SubsumedInsts))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
SubsumedInsts.insert(I);
|
SubsumedInsts.insert(I);
|
||||||
|
|
||||||
for (User *V : I->users()) {
|
for (User *V : I->users()) {
|
||||||
Instruction *I = dyn_cast<Instruction>(V);
|
Instruction *I = cast<Instruction>(V);
|
||||||
if (is_contained(LoopIncs, I))
|
if (is_contained(LoopIncs, I))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!I || !isSimpleArithmeticOp(I) ||
|
if (!isSimpleArithmeticOp(I))
|
||||||
!findRootsRecursive(I, SubsumedInsts))
|
continue;
|
||||||
return false;
|
|
||||||
|
// The recursive call makes a copy of SubsumedInsts.
|
||||||
|
findRootsRecursive(I, SubsumedInsts);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
|
||||||
|
if (DRS.Roots.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Consider a DAGRootSet with N-1 roots (so N different values including
|
||||||
|
// BaseInst).
|
||||||
|
// Define d = Roots[0] - BaseInst, which should be the same as
|
||||||
|
// Roots[I] - Roots[I-1] for all I in [1..N).
|
||||||
|
// Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
|
||||||
|
// loop iteration J.
|
||||||
|
//
|
||||||
|
// Now, For the loop iterations to be consecutive:
|
||||||
|
// D = d * N
|
||||||
|
const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
|
||||||
|
if (!ADR)
|
||||||
|
return false;
|
||||||
|
unsigned N = DRS.Roots.size() + 1;
|
||||||
|
const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
|
||||||
|
const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
|
||||||
|
if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
|
||||||
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LoopReroll::DAGRootTracker::
|
bool LoopReroll::DAGRootTracker::
|
||||||
findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
|
findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
|
||||||
|
// The base of a RootSet must be an AddRec, so it can be erased.
|
||||||
// The base instruction needs to be a multiply so
|
const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
|
||||||
// that we can erase it.
|
if (!IVU_ADR || IVU_ADR->getLoop() != L)
|
||||||
if (IVU->getOpcode() != Instruction::Mul &&
|
|
||||||
IVU->getOpcode() != Instruction::PHI)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
std::map<int64_t, Instruction*> V;
|
std::map<int64_t, Instruction*> V;
|
||||||
|
@ -910,6 +933,8 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
|
||||||
DAGRootSet DRS;
|
DAGRootSet DRS;
|
||||||
DRS.BaseInst = nullptr;
|
DRS.BaseInst = nullptr;
|
||||||
|
|
||||||
|
SmallVector<DAGRootSet, 16> PotentialRootSets;
|
||||||
|
|
||||||
for (auto &KV : V) {
|
for (auto &KV : V) {
|
||||||
if (!DRS.BaseInst) {
|
if (!DRS.BaseInst) {
|
||||||
DRS.BaseInst = KV.second;
|
DRS.BaseInst = KV.second;
|
||||||
|
@ -920,13 +945,22 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
|
||||||
DRS.Roots.push_back(KV.second);
|
DRS.Roots.push_back(KV.second);
|
||||||
} else {
|
} else {
|
||||||
// Linear sequence terminated.
|
// Linear sequence terminated.
|
||||||
RootSets.push_back(DRS);
|
if (!validateRootSet(DRS))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Construct a new DAGRootSet with the next sequence.
|
||||||
|
PotentialRootSets.push_back(DRS);
|
||||||
DRS.BaseInst = KV.second;
|
DRS.BaseInst = KV.second;
|
||||||
DRS.SubsumedInsts = SubsumedInsts;
|
|
||||||
DRS.Roots.clear();
|
DRS.Roots.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
RootSets.push_back(DRS);
|
|
||||||
|
if (!validateRootSet(DRS))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
PotentialRootSets.push_back(DRS);
|
||||||
|
|
||||||
|
RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -940,8 +974,7 @@ bool LoopReroll::DAGRootTracker::findRoots() {
|
||||||
if (isLoopIncrement(IVU, IV))
|
if (isLoopIncrement(IVU, IV))
|
||||||
LoopIncs.push_back(cast<Instruction>(IVU));
|
LoopIncs.push_back(cast<Instruction>(IVU));
|
||||||
}
|
}
|
||||||
if (!findRootsRecursive(IV, SmallInstructionSet()))
|
findRootsRecursive(IV, SmallInstructionSet());
|
||||||
return false;
|
|
||||||
LoopIncs.push_back(IV);
|
LoopIncs.push_back(IV);
|
||||||
} else {
|
} else {
|
||||||
if (!findRootsBase(IV, SmallInstructionSet()))
|
if (!findRootsBase(IV, SmallInstructionSet()))
|
||||||
|
@ -961,31 +994,6 @@ bool LoopReroll::DAGRootTracker::findRoots() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// And ensure all loop iterations are consecutive. We rely on std::map
|
|
||||||
// providing ordered traversal.
|
|
||||||
for (auto &V : RootSets) {
|
|
||||||
const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(V.BaseInst));
|
|
||||||
if (!ADR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Consider a DAGRootSet with N-1 roots (so N different values including
|
|
||||||
// BaseInst).
|
|
||||||
// Define d = Roots[0] - BaseInst, which should be the same as
|
|
||||||
// Roots[I] - Roots[I-1] for all I in [1..N).
|
|
||||||
// Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
|
|
||||||
// loop iteration J.
|
|
||||||
//
|
|
||||||
// Now, For the loop iterations to be consecutive:
|
|
||||||
// D = d * N
|
|
||||||
|
|
||||||
unsigned N = V.Roots.size() + 1;
|
|
||||||
const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
|
|
||||||
const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
|
|
||||||
if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
|
|
||||||
DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Scale = RootSets[0].Roots.size() + 1;
|
Scale = RootSets[0].Roots.size() + 1;
|
||||||
|
|
||||||
if (Scale > IL_MaxRerollIterations) {
|
if (Scale > IL_MaxRerollIterations) {
|
||||||
|
@ -1498,8 +1506,8 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
|
||||||
{ // Limit the lifetime of SCEVExpander.
|
{ // Limit the lifetime of SCEVExpander.
|
||||||
const DataLayout &DL = Header->getModule()->getDataLayout();
|
const DataLayout &DL = Header->getModule()->getDataLayout();
|
||||||
SCEVExpander Expander(*SE, DL, "reroll");
|
SCEVExpander Expander(*SE, DL, "reroll");
|
||||||
Value *NewIV =
|
Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
|
||||||
Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front());
|
Header->getFirstNonPHIOrDbg());
|
||||||
|
|
||||||
for (auto &KV : Uses)
|
for (auto &KV : Uses)
|
||||||
if (KV.second.find_first() == 0)
|
if (KV.second.find_first() == 0)
|
||||||
|
|
|
@ -575,6 +575,37 @@ for.end: ; preds = %for.body
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @gep-indexing(i32* nocapture %x) {
|
||||||
|
entry:
|
||||||
|
%call = tail call i32 @foo(i32 0) #1
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body, %entry
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%0 = mul nsw i64 %indvars.iv, 3
|
||||||
|
%arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
|
||||||
|
store i32 %call, i32* %arrayidx, align 4
|
||||||
|
%arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1
|
||||||
|
store i32 %call, i32* %arrayidx4, align 4
|
||||||
|
%arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2
|
||||||
|
store i32 %call, i32* %arrayidx9, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 500
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
; CHECK-LABEL: @gep-indexing
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
; CHECK-NEXT: %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv
|
||||||
|
; CHECK-NEXT: store i32 %call, i32* %scevgep, align 4
|
||||||
|
; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
; CHECK-NEXT: %exitcond2 = icmp eq i32* %scevgep, %scevgep1
|
||||||
|
; CHECK-NEXT: br i1 %exitcond2, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
|
define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
|
||||||
; CHECK-LABEL: @unordered_atomic_ops(
|
; CHECK-LABEL: @unordered_atomic_ops(
|
||||||
|
|
Loading…
Reference in New Issue