forked from OSchip/llvm-project
Revert "[SimplifyCFG] accumulate bonus insts cost"
This reverts commit e5581df60a
.
This causes major compile-time regressions, about 2-3% end-to-end
on CTMark.
This commit is contained in:
parent
4973eee122
commit
dd61726d5b
|
@ -16,7 +16,6 @@
|
|||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/ValueMap.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
|
||||
#include <cstdint>
|
||||
|
@ -165,26 +164,6 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
|
|||
/// values, but instcombine orders them so it usually won't matter.
|
||||
bool EliminateDuplicatePHINodes(BasicBlock *BB);
|
||||
|
||||
/// Class to track cost of simplify CFG transformations.
|
||||
class SimplifyCFGCostTracker {
|
||||
/// Number of bonus instructions due to folding branches into predecessors.
|
||||
/// E.g. folding
|
||||
/// if (cond1) return false;
|
||||
/// if (cond2) return false;
|
||||
/// return true;
|
||||
/// into
|
||||
/// if (cond1 | cond2) return false;
|
||||
/// return true;
|
||||
/// In this case cond2 is always executed whereas originally it may be
|
||||
/// evicted due to early exit of cond1. 'cond2' is called bonus instructions
|
||||
/// and such bonus instructions could accumulate for unrolled loops, therefore
|
||||
/// use a value map to accumulate their costs across transformations.
|
||||
ValueMap<BasicBlock *, unsigned> NumBonusInsts;
|
||||
|
||||
public:
|
||||
void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount);
|
||||
unsigned getNumBonusInsts(BasicBlock *Parent);
|
||||
};
|
||||
/// This function is used to do simplification of a CFG. For example, it
|
||||
/// adjusts branches to branches to eliminate the extra hop, it eliminates
|
||||
/// unreachable basic blocks, and does other peephole optimization of the CFG.
|
||||
|
@ -195,8 +174,7 @@ extern cl::opt<bool> RequireAndPreserveDomTree;
|
|||
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
|
||||
DomTreeUpdater *DTU = nullptr,
|
||||
const SimplifyCFGOptions &Options = {},
|
||||
ArrayRef<WeakVH> LoopHeaders = {},
|
||||
SimplifyCFGCostTracker *CostTracker = nullptr);
|
||||
ArrayRef<WeakVH> LoopHeaders = {});
|
||||
|
||||
/// This function is used to flatten a CFG. For example, it uses parallel-and
|
||||
/// and parallel-or mode to collapse if-conditions and merge if-regions with
|
||||
|
@ -206,8 +184,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
|
|||
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
|
||||
/// branches to us and one of our successors, fold the setcc into the
|
||||
/// predecessor and use logical operations to pick the right destination.
|
||||
bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker,
|
||||
DomTreeUpdater *DTU = nullptr,
|
||||
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr,
|
||||
MemorySSAUpdater *MSSAU = nullptr,
|
||||
const TargetTransformInfo *TTI = nullptr,
|
||||
unsigned BonusInstThreshold = 1);
|
||||
|
|
|
@ -221,8 +221,7 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F,
|
|||
/// iterating until no more changes are made.
|
||||
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
|
||||
DomTreeUpdater *DTU,
|
||||
const SimplifyCFGOptions &Options,
|
||||
SimplifyCFGCostTracker &CostTracker) {
|
||||
const SimplifyCFGOptions &Options) {
|
||||
bool Changed = false;
|
||||
bool LocalChange = true;
|
||||
|
||||
|
@ -253,7 +252,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
|
|||
while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
|
||||
++BBIt;
|
||||
}
|
||||
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) {
|
||||
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
|
||||
LocalChange = true;
|
||||
++NumSimpl;
|
||||
}
|
||||
|
@ -267,13 +266,11 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
|
|||
DominatorTree *DT,
|
||||
const SimplifyCFGOptions &Options) {
|
||||
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
|
||||
SimplifyCFGCostTracker CostTracker;
|
||||
|
||||
bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
|
||||
EverChanged |=
|
||||
tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr);
|
||||
EverChanged |=
|
||||
iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker);
|
||||
EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
|
||||
|
||||
// If neither pass changed anything, we're done.
|
||||
if (!EverChanged) return false;
|
||||
|
@ -287,8 +284,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
|
|||
return true;
|
||||
|
||||
do {
|
||||
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options,
|
||||
CostTracker);
|
||||
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
|
||||
EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
|
||||
} while (EverChanged);
|
||||
|
||||
|
|
|
@ -480,7 +480,6 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
|
|||
DominatorTree *DT, LoopInfo *LI,
|
||||
ScalarEvolution *SE, AssumptionCache *AC,
|
||||
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
|
||||
SimplifyCFGCostTracker CostTracker;
|
||||
bool Changed = false;
|
||||
if (MSSAU && VerifyMemorySSA)
|
||||
MSSAU->getMemorySSA()->verifyMemorySSA();
|
||||
|
@ -667,7 +666,7 @@ ReprocessLoop:
|
|||
// The block has now been cleared of all instructions except for
|
||||
// a comparison and a conditional branch. SimplifyCFG may be able
|
||||
// to fold it now.
|
||||
if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU))
|
||||
if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
|
||||
continue;
|
||||
|
||||
// Success. The block is now dead, so remove it from the loop,
|
||||
|
|
|
@ -207,21 +207,6 @@ STATISTIC(NumInvokes,
|
|||
STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
|
||||
STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
|
||||
|
||||
namespace llvm {
|
||||
|
||||
void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB,
|
||||
unsigned InstCount) {
|
||||
auto Loc = NumBonusInsts.find(BB);
|
||||
if (Loc == NumBonusInsts.end())
|
||||
Loc = NumBonusInsts.insert({BB, 0}).first;
|
||||
Loc->second = Loc->second + InstCount;
|
||||
}
|
||||
unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) {
|
||||
return NumBonusInsts.lookup(BB);
|
||||
}
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
namespace {
|
||||
|
||||
// The first field contains the value that the switch produces when a certain
|
||||
|
@ -258,10 +243,6 @@ class SimplifyCFGOpt {
|
|||
ArrayRef<WeakVH> LoopHeaders;
|
||||
const SimplifyCFGOptions &Options;
|
||||
bool Resimplify;
|
||||
// Accumulates number of bonus instructions due to merging basic blocks
|
||||
// of common destination.
|
||||
SimplifyCFGCostTracker *CostTracker;
|
||||
SimplifyCFGCostTracker LocalCostTracker;
|
||||
|
||||
Value *isValueEqualityComparison(Instruction *TI);
|
||||
BasicBlock *GetValueEqualityComparisonCases(
|
||||
|
@ -305,10 +286,8 @@ class SimplifyCFGOpt {
|
|||
public:
|
||||
SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
|
||||
const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
|
||||
const SimplifyCFGOptions &Opts,
|
||||
SimplifyCFGCostTracker *CostTracker_)
|
||||
: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts),
|
||||
CostTracker(CostTracker_ ? CostTracker_ : &LocalCostTracker) {
|
||||
const SimplifyCFGOptions &Opts)
|
||||
: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
|
||||
assert((!DTU || !DTU->hasPostDomTree()) &&
|
||||
"SimplifyCFG is not yet capable of maintaining validity of a "
|
||||
"PostDomTree, so don't ask for it.");
|
||||
|
@ -3645,9 +3624,8 @@ static bool isVectorOp(Instruction &I) {
|
|||
/// If this basic block is simple enough, and if a predecessor branches to us
|
||||
/// and one of our successors, fold the block into the predecessor and use
|
||||
/// logical operations to pick the right destination.
|
||||
bool llvm::FoldBranchToCommonDest(BranchInst *BI,
|
||||
SimplifyCFGCostTracker &CostTracker,
|
||||
DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU,
|
||||
bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
|
||||
MemorySSAUpdater *MSSAU,
|
||||
const TargetTransformInfo *TTI,
|
||||
unsigned BonusInstThreshold) {
|
||||
// If this block ends with an unconditional branch,
|
||||
|
@ -3719,6 +3697,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
|
|||
// as "bonus instructions", and only allow this transformation when the
|
||||
// number of the bonus instructions we'll need to create when cloning into
|
||||
// each predecessor does not exceed a certain threshold.
|
||||
unsigned NumBonusInsts = 0;
|
||||
bool SawVectorOp = false;
|
||||
const unsigned PredCount = Preds.size();
|
||||
for (Instruction &I : *BB) {
|
||||
|
@ -3737,13 +3716,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
|
|||
// predecessor. Ignore free instructions.
|
||||
if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
|
||||
TargetTransformInfo::TCC_Free) {
|
||||
for (auto PredBB : Preds) {
|
||||
CostTracker.updateNumBonusInsts(PredBB, PredCount);
|
||||
// Early exits once we reach the limit.
|
||||
if (CostTracker.getNumBonusInsts(PredBB) >
|
||||
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
|
||||
return false;
|
||||
}
|
||||
NumBonusInsts += PredCount;
|
||||
|
||||
// Early exits once we reach the limit.
|
||||
if (NumBonusInsts >
|
||||
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
|
||||
return false;
|
||||
}
|
||||
|
||||
auto IsBCSSAUse = [BB, &I](Use &U) {
|
||||
|
@ -3757,12 +3735,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
|
|||
if (!all_of(I.uses(), IsBCSSAUse))
|
||||
return false;
|
||||
}
|
||||
for (auto PredBB : Preds) {
|
||||
if (CostTracker.getNumBonusInsts(PredBB) >
|
||||
BonusInstThreshold *
|
||||
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
|
||||
return false;
|
||||
}
|
||||
if (NumBonusInsts >
|
||||
BonusInstThreshold *
|
||||
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
|
||||
return false;
|
||||
|
||||
// Ok, we have the budget. Perform the transformation.
|
||||
for (BasicBlock *PredBlock : Preds) {
|
||||
|
@ -6913,7 +6889,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
|
|||
// branches to us and our successor, fold the comparison into the
|
||||
// predecessor and use logical operations to update the incoming value
|
||||
// for PHI nodes in common successor.
|
||||
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
|
||||
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
|
||||
Options.BonusInstThreshold))
|
||||
return requestResimplify();
|
||||
return false;
|
||||
|
@ -6982,7 +6958,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
|
|||
// If this basic block is ONLY a compare and a branch, and if a predecessor
|
||||
// branches to us and one of our successors, fold the comparison into the
|
||||
// predecessor and use logical operations to pick the right destination.
|
||||
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
|
||||
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
|
||||
Options.BonusInstThreshold))
|
||||
return requestResimplify();
|
||||
|
||||
|
@ -7281,9 +7257,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
|
|||
|
||||
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
|
||||
DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
|
||||
ArrayRef<WeakVH> LoopHeaders,
|
||||
SimplifyCFGCostTracker *CostTracker) {
|
||||
ArrayRef<WeakVH> LoopHeaders) {
|
||||
return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
|
||||
Options, CostTracker)
|
||||
Options)
|
||||
.run(BB);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg<bonus-inst-threshold=3>,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
|
||||
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
|
||||
|
||||
define void @basic(i32 %K, i32 %N) {
|
||||
; CHECK-LABEL: @basic(
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s
|
||||
; RUN: opt -O2 -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64--"
|
||||
|
|
|
@ -3,12 +3,9 @@
|
|||
|
||||
%struct.S = type { [4 x i32] }
|
||||
|
||||
; Check the second basic block is folded into the first basic block
|
||||
; since it has one bonus intruction. The third basic block is not
|
||||
; folded into the first basic block since the accumulated bonus
|
||||
; instructions will exceed the default threshold of 1. The fourth basic
|
||||
; block is foled into the third basic block since the accumulated
|
||||
; bonus instruction cost is 1.
|
||||
; Check the second, third, and fourth basic blocks are folded into
|
||||
; the first basic block since each has one bonus intruction, which
|
||||
; does not exceed the default bouns instruction threshold of 1.
|
||||
|
||||
define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
|
||||
; CHECK-LABEL: @test1(
|
||||
|
@ -18,18 +15,14 @@ define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
|
|||
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]]
|
||||
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0
|
||||
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]]
|
||||
; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]]
|
||||
; CHECK: for.cond.1:
|
||||
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]]
|
||||
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0
|
||||
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]]
|
||||
; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]]
|
||||
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0
|
||||
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]]
|
||||
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true
|
||||
; CHECK-NEXT: br label [[CLEANUP]]
|
||||
; CHECK: cleanup:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ]
|
||||
; CHECK-NEXT: ret i1 [[CMP]]
|
||||
; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]]
|
||||
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true
|
||||
; CHECK-NEXT: ret i1 [[SPEC_SELECT]]
|
||||
;
|
||||
entry:
|
||||
%mul0 = mul i32 %0, %0
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL
|
||||
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE
|
||||
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE
|
||||
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
|
||||
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
|
||||
; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL
|
||||
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=3>' -S | FileCheck %s --check-prefix=AGGRESSIVE
|
||||
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=6>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
|
||||
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=2>' -S | FileCheck %s --check-prefix=AGGRESSIVE
|
||||
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=4>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
|
||||
|
||||
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
|
||||
; NORMAL-LABEL: @foo(
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s
|
||||
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s
|
||||
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s
|
||||
|
||||
declare void @sideeffect0()
|
||||
declare void @sideeffect1()
|
||||
|
@ -10,7 +10,7 @@ declare i1 @gen1()
|
|||
|
||||
; Here we'd want to duplicate %v3_adj into two predecessors,
|
||||
; but -bonus-inst-threshold=1 says that we can only clone it into one.
|
||||
; With -bonus-inst-threshold=3 we can clone it into both though.
|
||||
; With -bonus-inst-threshold=2 we can clone it into both though.
|
||||
define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) {
|
||||
; THR1-LABEL: @two_preds_with_extra_op(
|
||||
; THR1-NEXT: entry:
|
||||
|
|
Loading…
Reference in New Issue