Revert "[SimplifyCFG] accumulate bonus insts cost"

This reverts commit e5581df60a.

This causes major compile-time regressions, about 2-3% end-to-end
on CTMark.
This commit is contained in:
Nikita Popov 2022-09-19 14:44:39 +02:00
parent 4973eee122
commit dd61726d5b
9 changed files with 41 additions and 101 deletions

View File

@ -16,7 +16,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
#include <cstdint>
@ -165,26 +164,6 @@ bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
/// values, but instcombine orders them so it usually won't matter.
bool EliminateDuplicatePHINodes(BasicBlock *BB);
/// Class to track cost of simplify CFG transformations.
class SimplifyCFGCostTracker {
/// Number of bonus instructions due to folding branches into predecessors.
/// E.g. folding
/// if (cond1) return false;
/// if (cond2) return false;
/// return true;
/// into
/// if (cond1 | cond2) return false;
/// return true;
/// In this case cond2 is always executed whereas originally it may be
/// evicted due to early exit of cond1. 'cond2' is called bonus instructions
/// and such bonus instructions could accumulate for unrolled loops, therefore
/// use a value map to accumulate their costs across transformations.
ValueMap<BasicBlock *, unsigned> NumBonusInsts;
public:
void updateNumBonusInsts(BasicBlock *Parent, unsigned InstCount);
unsigned getNumBonusInsts(BasicBlock *Parent);
};
/// This function is used to do simplification of a CFG. For example, it
/// adjusts branches to branches to eliminate the extra hop, it eliminates
/// unreachable basic blocks, and does other peephole optimization of the CFG.
@ -195,8 +174,7 @@ extern cl::opt<bool> RequireAndPreserveDomTree;
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU = nullptr,
const SimplifyCFGOptions &Options = {},
ArrayRef<WeakVH> LoopHeaders = {},
SimplifyCFGCostTracker *CostTracker = nullptr);
ArrayRef<WeakVH> LoopHeaders = {});
/// This function is used to flatten a CFG. For example, it uses parallel-and
/// and parallel-or mode to collapse if-conditions and merge if-regions with
@ -206,8 +184,7 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr);
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
/// branches to us and one of our successors, fold the setcc into the
/// predecessor and use logical operations to pick the right destination.
bool FoldBranchToCommonDest(BranchInst *BI, SimplifyCFGCostTracker &CostTracker,
DomTreeUpdater *DTU = nullptr,
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr,
MemorySSAUpdater *MSSAU = nullptr,
const TargetTransformInfo *TTI = nullptr,
unsigned BonusInstThreshold = 1);

View File

@ -221,8 +221,7 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU,
const SimplifyCFGOptions &Options,
SimplifyCFGCostTracker &CostTracker) {
const SimplifyCFGOptions &Options) {
bool Changed = false;
bool LocalChange = true;
@ -253,7 +252,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt))
++BBIt;
}
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders, &CostTracker)) {
if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) {
LocalChange = true;
++NumSimpl;
}
@ -267,13 +266,11 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
DominatorTree *DT,
const SimplifyCFGOptions &Options) {
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
SimplifyCFGCostTracker CostTracker;
bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr);
EverChanged |=
tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr);
EverChanged |=
iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options, CostTracker);
EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@ -287,8 +284,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI,
return true;
do {
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options,
CostTracker);
EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options);
EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr);
} while (EverChanged);

View File

@ -480,7 +480,6 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
SimplifyCFGCostTracker CostTracker;
bool Changed = false;
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@ -667,7 +666,7 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
if (!FoldBranchToCommonDest(BI, CostTracker, /*DTU=*/nullptr, MSSAU))
if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
continue;
// Success. The block is now dead, so remove it from the loop,

View File

@ -207,21 +207,6 @@ STATISTIC(NumInvokes,
STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
namespace llvm {
void SimplifyCFGCostTracker::updateNumBonusInsts(BasicBlock *BB,
unsigned InstCount) {
auto Loc = NumBonusInsts.find(BB);
if (Loc == NumBonusInsts.end())
Loc = NumBonusInsts.insert({BB, 0}).first;
Loc->second = Loc->second + InstCount;
}
unsigned SimplifyCFGCostTracker::getNumBonusInsts(BasicBlock *BB) {
return NumBonusInsts.lookup(BB);
}
} // namespace llvm
namespace {
// The first field contains the value that the switch produces when a certain
@ -258,10 +243,6 @@ class SimplifyCFGOpt {
ArrayRef<WeakVH> LoopHeaders;
const SimplifyCFGOptions &Options;
bool Resimplify;
// Accumulates number of bonus instructions due to merging basic blocks
// of common destination.
SimplifyCFGCostTracker *CostTracker;
SimplifyCFGCostTracker LocalCostTracker;
Value *isValueEqualityComparison(Instruction *TI);
BasicBlock *GetValueEqualityComparisonCases(
@ -305,10 +286,8 @@ class SimplifyCFGOpt {
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
const SimplifyCFGOptions &Opts,
SimplifyCFGCostTracker *CostTracker_)
: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts),
CostTracker(CostTracker_ ? CostTracker_ : &LocalCostTracker) {
const SimplifyCFGOptions &Opts)
: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
assert((!DTU || !DTU->hasPostDomTree()) &&
"SimplifyCFG is not yet capable of maintaining validity of a "
"PostDomTree, so don't ask for it.");
@ -3645,9 +3624,8 @@ static bool isVectorOp(Instruction &I) {
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
bool llvm::FoldBranchToCommonDest(BranchInst *BI,
SimplifyCFGCostTracker &CostTracker,
DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU,
bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
MemorySSAUpdater *MSSAU,
const TargetTransformInfo *TTI,
unsigned BonusInstThreshold) {
// If this block ends with an unconditional branch,
@ -3719,6 +3697,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
// as "bonus instructions", and only allow this transformation when the
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
bool SawVectorOp = false;
const unsigned PredCount = Preds.size();
for (Instruction &I : *BB) {
@ -3737,13 +3716,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
// predecessor. Ignore free instructions.
if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
TargetTransformInfo::TCC_Free) {
for (auto PredBB : Preds) {
CostTracker.updateNumBonusInsts(PredBB, PredCount);
// Early exits once we reach the limit.
if (CostTracker.getNumBonusInsts(PredBB) >
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
return false;
}
NumBonusInsts += PredCount;
// Early exits once we reach the limit.
if (NumBonusInsts >
BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
return false;
}
auto IsBCSSAUse = [BB, &I](Use &U) {
@ -3757,12 +3735,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI,
if (!all_of(I.uses(), IsBCSSAUse))
return false;
}
for (auto PredBB : Preds) {
if (CostTracker.getNumBonusInsts(PredBB) >
BonusInstThreshold *
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
return false;
}
if (NumBonusInsts >
BonusInstThreshold *
(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
return false;
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
@ -6913,7 +6889,7 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
return false;
@ -6982,7 +6958,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
if (FoldBranchToCommonDest(BI, *CostTracker, DTU, /*MSSAU=*/nullptr, &TTI,
if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
Options.BonusInstThreshold))
return requestResimplify();
@ -7281,9 +7257,8 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
ArrayRef<WeakVH> LoopHeaders,
SimplifyCFGCostTracker *CostTracker) {
ArrayRef<WeakVH> LoopHeaders) {
return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
Options, CostTracker)
Options)
.run(BB);
}

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg<bonus-inst-threshold=3>,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll<peeling;no-runtime>,simplifycfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
define void @basic(i32 %K, i32 %N) {
; CHECK-LABEL: @basic(

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -bonus-inst-threshold=4 -O2 -S < %s | FileCheck %s
; RUN: opt -O2 -S < %s | FileCheck %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--"

View File

@ -3,12 +3,9 @@
%struct.S = type { [4 x i32] }
; Check the second basic block is folded into the first basic block
; since it has one bonus intruction. The third basic block is not
; folded into the first basic block since the accumulated bonus
; instructions will exceed the default threshold of 1. The fourth basic
; block is foled into the third basic block since the accumulated
; bonus instruction cost is 1.
; Check the second, third, and fourth basic blocks are folded into
; the first basic block since each has one bonus intruction, which
; does not exceed the default bouns instruction threshold of 1.
define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
; CHECK-LABEL: @test1(
@ -18,18 +15,14 @@ define i1 @test1(i32 %0, i32 %1, i32 %2, i32 %3) {
; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP1:%.*]], [[TMP1]]
; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL1]], 0
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[FOR_COND_1:%.*]]
; CHECK: for.cond.1:
; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[TMP2:%.*]], [[TMP2]]
; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[MUL2]], 0
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]]
; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[TMP3:%.*]], [[TMP3]]
; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[MUL3]], 0
; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP2_2]], i1 true, i1 [[CMP2_3]]
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true
; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:
; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND_1]] ]
; CHECK-NEXT: ret i1 [[CMP]]
; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]]
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true
; CHECK-NEXT: ret i1 [[SPEC_SELECT]]
;
entry:
%mul0 = mul i32 %0, %0

View File

@ -1,9 +1,9 @@
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=NORMAL
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=3 | FileCheck %s --check-prefix=AGGRESSIVE
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=6 | FileCheck %s --check-prefix=WAYAGGRESSIVE
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
; RUN: opt %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S -bonus-inst-threshold=4 | FileCheck %s --check-prefix=WAYAGGRESSIVE
; RUN: opt %s -passes=simplifycfg -S | FileCheck %s --check-prefix=NORMAL
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=3>' -S | FileCheck %s --check-prefix=AGGRESSIVE
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=6>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=2>' -S | FileCheck %s --check-prefix=AGGRESSIVE
; RUN: opt %s -passes='simplifycfg<bonus-inst-threshold=4>' -S | FileCheck %s --check-prefix=WAYAGGRESSIVE
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
; NORMAL-LABEL: @foo(

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=1 | FileCheck --check-prefixes=ALL,THR1 %s
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=3 | FileCheck --check-prefixes=ALL,THR2 %s
; RUN: opt < %s -S -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=2 | FileCheck --check-prefixes=ALL,THR2 %s
declare void @sideeffect0()
declare void @sideeffect1()
@ -10,7 +10,7 @@ declare i1 @gen1()
; Here we'd want to duplicate %v3_adj into two predecessors,
; but -bonus-inst-threshold=1 says that we can only clone it into one.
; With -bonus-inst-threshold=3 we can clone it into both though.
; With -bonus-inst-threshold=2 we can clone it into both though.
define void @two_preds_with_extra_op(i8 %v0, i8 %v1, i8 %v2, i8 %v3) {
; THR1-LABEL: @two_preds_with_extra_op(
; THR1-NEXT: entry: