forked from OSchip/llvm-project
[HotColdSplit] Split more aggressively before/after cold invokes
While a cold invoke itself and its unwind destination can't be extracted, code which unconditionally executes before/after the invoke may still be profitable to extract. With cost model changes from D57125 applied, this gives a 3.5% increase in split text across LNT+externals on arm64 at -Os. llvm-svn: 352160
This commit is contained in:
parent
5cf6665373
commit
65de025d64
|
@ -119,7 +119,11 @@ bool unlikelyExecuted(BasicBlock &BB) {
|
||||||
|
|
||||||
/// Check whether it's safe to outline \p BB.
|
/// Check whether it's safe to outline \p BB.
|
||||||
static bool mayExtractBlock(const BasicBlock &BB) {
|
static bool mayExtractBlock(const BasicBlock &BB) {
|
||||||
return !BB.hasAddressTaken() && !BB.isEHPad();
|
// EH pads are unsafe to outline because doing so breaks EH type tables. It
|
||||||
|
// follows that invoke instructions cannot be extracted, because CodeExtractor
|
||||||
|
// requires unwind destinations to be within the extraction region.
|
||||||
|
return !BB.hasAddressTaken() && !BB.isEHPad() &&
|
||||||
|
!isa<InvokeInst>(BB.getTerminator());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check whether \p Region is profitable to outline.
|
/// Check whether \p Region is profitable to outline.
|
||||||
|
@ -283,6 +287,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
|
||||||
namespace {
|
namespace {
|
||||||
/// A maximal outlining region. This contains all blocks post-dominated by a
|
/// A maximal outlining region. This contains all blocks post-dominated by a
|
||||||
/// sink block, the sink block itself, and all blocks dominated by the sink.
|
/// sink block, the sink block itself, and all blocks dominated by the sink.
|
||||||
|
/// If sink-predecessors and sink-successors cannot be extracted in one region,
|
||||||
|
/// the static constructor returns a list of suitable extraction regions.
|
||||||
class OutliningRegion {
|
class OutliningRegion {
|
||||||
/// A list of (block, score) pairs. A block's score is non-zero iff it's a
|
/// A list of (block, score) pairs. A block's score is non-zero iff it's a
|
||||||
/// viable sub-region entry point. Blocks with higher scores are better entry
|
/// viable sub-region entry point. Blocks with higher scores are better entry
|
||||||
|
@ -297,12 +303,9 @@ class OutliningRegion {
|
||||||
/// Whether the entire function is cold.
|
/// Whether the entire function is cold.
|
||||||
bool EntireFunctionCold = false;
|
bool EntireFunctionCold = false;
|
||||||
|
|
||||||
/// Whether or not \p BB could be the entry point of an extracted region.
|
|
||||||
static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
|
|
||||||
|
|
||||||
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
|
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
|
||||||
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
|
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
|
||||||
return isViableEntryPoint(BB) ? Score : 0;
|
return mayExtractBlock(BB) ? Score : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// These scores should be lower than the score for predecessor blocks,
|
/// These scores should be lower than the score for predecessor blocks,
|
||||||
|
@ -318,21 +321,23 @@ public:
|
||||||
OutliningRegion(OutliningRegion &&) = default;
|
OutliningRegion(OutliningRegion &&) = default;
|
||||||
OutliningRegion &operator=(OutliningRegion &&) = default;
|
OutliningRegion &operator=(OutliningRegion &&) = default;
|
||||||
|
|
||||||
static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
|
static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
|
||||||
const PostDominatorTree &PDT) {
|
const DominatorTree &DT,
|
||||||
OutliningRegion ColdRegion;
|
const PostDominatorTree &PDT) {
|
||||||
|
std::vector<OutliningRegion> Regions;
|
||||||
SmallPtrSet<BasicBlock *, 4> RegionBlocks;
|
SmallPtrSet<BasicBlock *, 4> RegionBlocks;
|
||||||
|
|
||||||
|
Regions.emplace_back();
|
||||||
|
OutliningRegion *ColdRegion = &Regions.back();
|
||||||
|
|
||||||
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
|
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
|
||||||
RegionBlocks.insert(BB);
|
RegionBlocks.insert(BB);
|
||||||
ColdRegion.Blocks.emplace_back(BB, Score);
|
ColdRegion->Blocks.emplace_back(BB, Score);
|
||||||
assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// The ancestor farthest-away from SinkBB, and also post-dominated by it.
|
// The ancestor farthest-away from SinkBB, and also post-dominated by it.
|
||||||
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
|
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
|
||||||
ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
|
ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
|
||||||
unsigned BestScore = SinkScore;
|
unsigned BestScore = SinkScore;
|
||||||
|
|
||||||
// Visit SinkBB's ancestors using inverse DFS.
|
// Visit SinkBB's ancestors using inverse DFS.
|
||||||
|
@ -345,8 +350,8 @@ public:
|
||||||
// If the predecessor is cold and has no predecessors, the entire
|
// If the predecessor is cold and has no predecessors, the entire
|
||||||
// function must be cold.
|
// function must be cold.
|
||||||
if (SinkPostDom && pred_empty(&PredBB)) {
|
if (SinkPostDom && pred_empty(&PredBB)) {
|
||||||
ColdRegion.EntireFunctionCold = true;
|
ColdRegion->EntireFunctionCold = true;
|
||||||
return ColdRegion;
|
return Regions;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If SinkBB does not post-dominate a predecessor, do not mark the
|
// If SinkBB does not post-dominate a predecessor, do not mark the
|
||||||
|
@ -361,7 +366,7 @@ public:
|
||||||
// considered as entry points before the sink block.
|
// considered as entry points before the sink block.
|
||||||
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
|
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
|
||||||
if (PredScore > BestScore) {
|
if (PredScore > BestScore) {
|
||||||
ColdRegion.SuggestedEntryPoint = &PredBB;
|
ColdRegion->SuggestedEntryPoint = &PredBB;
|
||||||
BestScore = PredScore;
|
BestScore = PredScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -369,10 +374,19 @@ public:
|
||||||
++PredIt;
|
++PredIt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add SinkBB to the cold region. It's considered as an entry point before
|
// If the sink can be added to the cold region, do so. It's considered as
|
||||||
// any sink-successor blocks.
|
// an entry point before any sink-successor blocks.
|
||||||
if (mayExtractBlock(SinkBB))
|
//
|
||||||
|
// Otherwise, split cold sink-successor blocks using a separate region.
|
||||||
|
// This satisfies the requirement that all extraction blocks other than the
|
||||||
|
// first have predecessors within the extraction region.
|
||||||
|
if (mayExtractBlock(SinkBB)) {
|
||||||
addBlockToRegion(&SinkBB, SinkScore);
|
addBlockToRegion(&SinkBB, SinkScore);
|
||||||
|
} else {
|
||||||
|
Regions.emplace_back();
|
||||||
|
ColdRegion = &Regions.back();
|
||||||
|
BestScore = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Find all successors of SinkBB dominated by SinkBB using DFS.
|
// Find all successors of SinkBB dominated by SinkBB using DFS.
|
||||||
auto SuccIt = ++df_begin(&SinkBB);
|
auto SuccIt = ++df_begin(&SinkBB);
|
||||||
|
@ -393,7 +407,7 @@ public:
|
||||||
|
|
||||||
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
|
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
|
||||||
if (SuccScore > BestScore) {
|
if (SuccScore > BestScore) {
|
||||||
ColdRegion.SuggestedEntryPoint = &SuccBB;
|
ColdRegion->SuggestedEntryPoint = &SuccBB;
|
||||||
BestScore = SuccScore;
|
BestScore = SuccScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -401,7 +415,7 @@ public:
|
||||||
++SuccIt;
|
++SuccIt;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ColdRegion;
|
return Regions;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether this region has nothing to extract.
|
/// Whether this region has nothing to extract.
|
||||||
|
@ -496,28 +510,30 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
|
||||||
if (!PDT)
|
if (!PDT)
|
||||||
PDT = make_unique<PostDominatorTree>(F);
|
PDT = make_unique<PostDominatorTree>(F);
|
||||||
|
|
||||||
auto Region = OutliningRegion::create(*BB, *DT, *PDT);
|
auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
|
||||||
if (Region.empty())
|
for (OutliningRegion &Region : Regions) {
|
||||||
continue;
|
if (Region.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
if (Region.isEntireFunctionCold()) {
|
if (Region.isEntireFunctionCold()) {
|
||||||
LLVM_DEBUG(dbgs() << "Entire function is cold\n");
|
LLVM_DEBUG(dbgs() << "Entire function is cold\n");
|
||||||
return markFunctionCold(F);
|
return markFunctionCold(F);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this outlining region intersects with another, drop the new region.
|
||||||
|
//
|
||||||
|
// TODO: It's theoretically possible to outline more by only keeping the
|
||||||
|
// largest region which contains a block, but the extra bookkeeping to do
|
||||||
|
// this is tricky/expensive.
|
||||||
|
bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
|
||||||
|
return !ColdBlocks.insert(Block.first).second;
|
||||||
|
});
|
||||||
|
if (RegionsOverlap)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
OutliningWorklist.emplace_back(std::move(Region));
|
||||||
|
++NumColdRegionsFound;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this outlining region intersects with another, drop the new region.
|
|
||||||
//
|
|
||||||
// TODO: It's theoretically possible to outline more by only keeping the
|
|
||||||
// largest region which contains a block, but the extra bookkeeping to do
|
|
||||||
// this is tricky/expensive.
|
|
||||||
bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
|
|
||||||
return !ColdBlocks.insert(Block.first).second;
|
|
||||||
});
|
|
||||||
if (RegionsOverlap)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
OutliningWorklist.emplace_back(std::move(Region));
|
|
||||||
++NumColdRegionsFound;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Outline single-entry cold regions, splitting up larger regions as needed.
|
// Outline single-entry cold regions, splitting up larger regions as needed.
|
||||||
|
|
|
@ -54,6 +54,31 @@ normal:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @baz() personality i8 0 {
|
||||||
|
entry:
|
||||||
|
br i1 undef, label %exit, label %cold1
|
||||||
|
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
|
||||||
|
cold1:
|
||||||
|
; The predecessor of a cold invoke may still be extracted (see baz.cold.2).
|
||||||
|
call void @sideeffect(i32 0)
|
||||||
|
br label %cold2
|
||||||
|
|
||||||
|
cold2:
|
||||||
|
invoke void @sink() to label %cold3 unwind label %cold4
|
||||||
|
|
||||||
|
cold3:
|
||||||
|
; The successor of a cold invoke may still be extracted (see baz.cold.1).
|
||||||
|
call void @sideeffect(i32 1)
|
||||||
|
ret void
|
||||||
|
|
||||||
|
cold4:
|
||||||
|
landingpad i8 cleanup
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: define {{.*}}@foo.cold.1(
|
; CHECK-LABEL: define {{.*}}@foo.cold.1(
|
||||||
; CHECK: sideeffect(i32 0)
|
; CHECK: sideeffect(i32 0)
|
||||||
; CHECK: sink
|
; CHECK: sink
|
||||||
|
@ -61,6 +86,12 @@ normal:
|
||||||
; CHECK-LABEL: define {{.*}}@bar.cold.1(
|
; CHECK-LABEL: define {{.*}}@bar.cold.1(
|
||||||
; CHECK: sideeffect(i32 1)
|
; CHECK: sideeffect(i32 1)
|
||||||
|
|
||||||
|
; CHECK-LABEL: define {{.*}}@baz.cold.1(
|
||||||
|
; CHECK: sideeffect(i32 1)
|
||||||
|
|
||||||
|
; CHECK-LABEL: define {{.*}}@baz.cold.2(
|
||||||
|
; CHECK: sideeffect(i32 0)
|
||||||
|
|
||||||
declare void @sideeffect(i32)
|
declare void @sideeffect(i32)
|
||||||
|
|
||||||
declare void @sink() cold
|
declare void @sink() cold
|
||||||
|
|
Loading…
Reference in New Issue