forked from OSchip/llvm-project
[SCEVExpander] Make findExistingExpansion smarter
Summary: Extending findExistingExpansion can use existing value in ExprValueMap. This patch gives 0.3~0.5% performance improvements on benchmarks(test-suite, spec2000, spec2006, commercial benchmark) Reviewers: mzolotukhin, sanjoy, zzheng Differential Revision: http://reviews.llvm.org/D15559 llvm-svn: 260938
This commit is contained in:
parent
e89a179598
commit
6ebdc14cf1
|
@ -263,6 +263,9 @@ namespace llvm {
|
||||||
const SCEV *const *op_end,
|
const SCEV *const *op_end,
|
||||||
PointerType *PTy, Type *Ty, Value *V);
|
PointerType *PTy, Type *Ty, Value *V);
|
||||||
|
|
||||||
|
/// \brief Find a previous Value in ExprValueMap for expand.
|
||||||
|
Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
|
||||||
|
|
||||||
Value *expand(const SCEV *S);
|
Value *expand(const SCEV *S);
|
||||||
|
|
||||||
/// \brief Insert code to directly compute the specified SCEV expression
|
/// \brief Insert code to directly compute the specified SCEV expression
|
||||||
|
|
|
@ -1598,6 +1598,34 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
|
||||||
return V;
|
return V;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
|
||||||
|
const Instruction *InsertPt) {
|
||||||
|
SetVector<Value *> *Set = SE.getSCEVValues(S);
|
||||||
|
// If the expansion is not in CanonicalMode, and the SCEV contains any
|
||||||
|
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
|
||||||
|
if (CanonicalMode || !SE.containsAddRecurrence(S)) {
|
||||||
|
// If S is scConstant, it may be worse to reuse an existing Value.
|
||||||
|
if (S->getSCEVType() != scConstant && Set) {
|
||||||
|
// Choose a Value from the set which dominates the insertPt.
|
||||||
|
// insertPt should be inside the Value's parent loop so as not to break
|
||||||
|
// the LCSSA form.
|
||||||
|
for (auto const &Ent : *Set) {
|
||||||
|
Instruction *EntInst = nullptr;
|
||||||
|
if (Ent && isa<Instruction>(Ent) &&
|
||||||
|
(EntInst = cast<Instruction>(Ent)) &&
|
||||||
|
S->getType() == Ent->getType() &&
|
||||||
|
EntInst->getFunction() == InsertPt->getFunction() &&
|
||||||
|
SE.DT.dominates(EntInst, InsertPt) &&
|
||||||
|
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
|
||||||
|
SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
|
||||||
|
return Ent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
|
// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
|
||||||
// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
|
// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
|
||||||
// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
|
// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
|
||||||
|
@ -1643,31 +1671,8 @@ Value *SCEVExpander::expand(const SCEV *S) {
|
||||||
Builder.SetInsertPoint(InsertPt);
|
Builder.SetInsertPoint(InsertPt);
|
||||||
|
|
||||||
// Expand the expression into instructions.
|
// Expand the expression into instructions.
|
||||||
SetVector<Value *> *Set = SE.getSCEVValues(S);
|
Value *V = FindValueInExprValueMap(S, InsertPt);
|
||||||
Value *V = nullptr;
|
|
||||||
// If the expansion is not in CanonicalMode, and the SCEV contains any
|
|
||||||
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
|
|
||||||
if (CanonicalMode || !SE.containsAddRecurrence(S)) {
|
|
||||||
// If S is scConstant, it may be worse to reuse an existing Value.
|
|
||||||
if (S->getSCEVType() != scConstant && Set) {
|
|
||||||
// Choose a Value from the set which dominates the insertPt.
|
|
||||||
// insertPt should be inside the Value's parent loop so as not to break
|
|
||||||
// the LCSSA form.
|
|
||||||
for (auto const &Ent : *Set) {
|
|
||||||
Instruction *EntInst = nullptr;
|
|
||||||
if (Ent && isa<Instruction>(Ent) &&
|
|
||||||
(EntInst = cast<Instruction>(Ent)) &&
|
|
||||||
S->getType() == Ent->getType() &&
|
|
||||||
EntInst->getFunction() == InsertPt->getFunction() &&
|
|
||||||
SE.DT.dominates(EntInst, InsertPt) &&
|
|
||||||
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
|
|
||||||
SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
|
|
||||||
V = Ent;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!V)
|
if (!V)
|
||||||
V = visit(S);
|
V = visit(S);
|
||||||
|
|
||||||
|
@ -1877,6 +1882,11 @@ Value *SCEVExpander::findExistingExpansion(const SCEV *S,
|
||||||
return RHS;
|
return RHS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use expand's logic which is used for reusing a previous Value in
|
||||||
|
// ExprValueMap.
|
||||||
|
if (Value *Val = FindValueInExprValueMap(S, At))
|
||||||
|
return Val;
|
||||||
|
|
||||||
// There is potential to make this significantly smarter, but this simple
|
// There is potential to make this significantly smarter, but this simple
|
||||||
// heuristic already gets some interesting cases.
|
// heuristic already gets some interesting cases.
|
||||||
|
|
||||||
|
|
|
@ -311,9 +311,12 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
BasicBlock *Header = L->getHeader();
|
BasicBlock *Header = L->getHeader();
|
||||||
|
BasicBlock *PH = L->getLoopPreheader();
|
||||||
|
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
||||||
const DataLayout &DL = Header->getModule()->getDataLayout();
|
const DataLayout &DL = Header->getModule()->getDataLayout();
|
||||||
SCEVExpander Expander(*SE, DL, "loop-unroll");
|
SCEVExpander Expander(*SE, DL, "loop-unroll");
|
||||||
if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
|
if (!AllowExpensiveTripCount &&
|
||||||
|
Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// We only handle cases when the unroll factor is a power of 2.
|
// We only handle cases when the unroll factor is a power of 2.
|
||||||
|
@ -331,13 +334,12 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
|
||||||
if (Loop *ParentLoop = L->getParentLoop())
|
if (Loop *ParentLoop = L->getParentLoop())
|
||||||
SE->forgetLoop(ParentLoop);
|
SE->forgetLoop(ParentLoop);
|
||||||
|
|
||||||
BasicBlock *PH = L->getLoopPreheader();
|
|
||||||
BasicBlock *Latch = L->getLoopLatch();
|
BasicBlock *Latch = L->getLoopLatch();
|
||||||
// It helps to split the original preheader twice, one for the end of the
|
// It helps to split the original preheader twice, one for the end of the
|
||||||
// prolog code and one for a new loop preheader.
|
// prolog code and one for a new loop preheader.
|
||||||
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
|
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
|
||||||
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
|
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
|
||||||
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
PreHeaderBR = cast<BranchInst>(PH->getTerminator());
|
||||||
|
|
||||||
// Compute the number of extra iterations required, which is:
|
// Compute the number of extra iterations required, which is:
|
||||||
// extra iterations = run-time trip count % (loop unroll factor + 1)
|
// extra iterations = run-time trip count % (loop unroll factor + 1)
|
||||||
|
|
|
@ -24,4 +24,38 @@ loopexit: ; preds = %loop
|
||||||
ret i32 0
|
ret i32 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; Though SCEV for loop tripcount contains division,
|
||||||
|
;; it shouldn't be considered expensive, since the division already
|
||||||
|
;; exists in the code and we don't need to expand it once more.
|
||||||
|
;; Thus, it shouldn't prevent us from unrolling the loop.
|
||||||
|
|
||||||
|
define i32 @test2(i64* %loc, i64 %conv7) {
|
||||||
|
; CHECK-LABEL: @test2(
|
||||||
|
; CHECK: udiv
|
||||||
|
; CHECK: udiv
|
||||||
|
; CHECK-NOT: udiv
|
||||||
|
; CHECK-LABEL: for.body.prol
|
||||||
|
entry:
|
||||||
|
%rem0 = load i64, i64* %loc, align 8
|
||||||
|
%ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
|
||||||
|
br label %bb1
|
||||||
|
bb1:
|
||||||
|
%div11 = udiv i64 %ExpensiveComputation, %conv7
|
||||||
|
%cmp.i38 = icmp ugt i64 %div11, 1
|
||||||
|
%div12 = select i1 %cmp.i38, i64 %div11, i64 1
|
||||||
|
br label %for.body
|
||||||
|
for.body:
|
||||||
|
%rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ]
|
||||||
|
%k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ]
|
||||||
|
%mul1 = mul i64 %rem1, 48271
|
||||||
|
%rem2 = urem i64 %mul1, 2147483647
|
||||||
|
%dec = add i64 %k1, -1
|
||||||
|
%cmp = icmp eq i64 %dec, 0
|
||||||
|
br i1 %cmp, label %exit, label %for.body
|
||||||
|
exit:
|
||||||
|
%rem3 = phi i64 [ %rem2, %for.body ]
|
||||||
|
store i64 %rem3, i64* %loc, align 8
|
||||||
|
ret i32 0
|
||||||
|
}
|
||||||
|
|
||||||
!0 = !{i64 1, i64 100}
|
!0 = !{i64 1, i64 100}
|
||||||
|
|
Loading…
Reference in New Issue