[HotColdSplit] Split more aggressively before/after cold invokes

While a cold invoke itself and its unwind destination can't be
extracted, code which unconditionally executes before/after the invoke
may still be profitable to extract.

With cost model changes from D57125 applied, this gives a 3.5% increase
in split text across LNT+externals on arm64 at -Os.

llvm-svn: 352160
This commit is contained in:
Vedant Kumar 2019-01-25 03:22:23 +00:00
parent 5cf6665373
commit 65de025d64
2 changed files with 87 additions and 40 deletions

View File

@ -119,7 +119,11 @@ bool unlikelyExecuted(BasicBlock &BB) {
/// Check whether it's safe to outline \p BB. /// Check whether it's safe to outline \p BB.
static bool mayExtractBlock(const BasicBlock &BB) { static bool mayExtractBlock(const BasicBlock &BB) {
return !BB.hasAddressTaken() && !BB.isEHPad(); // EH pads are unsafe to outline because doing so breaks EH type tables. It
// follows that invoke instructions cannot be extracted, because CodeExtractor
// requires unwind destinations to be within the extraction region.
return !BB.hasAddressTaken() && !BB.isEHPad() &&
!isa<InvokeInst>(BB.getTerminator());
} }
/// Check whether \p Region is profitable to outline. /// Check whether \p Region is profitable to outline.
@ -283,6 +287,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
namespace { namespace {
/// A maximal outlining region. This contains all blocks post-dominated by a /// A maximal outlining region. This contains all blocks post-dominated by a
/// sink block, the sink block itself, and all blocks dominated by the sink. /// sink block, the sink block itself, and all blocks dominated by the sink.
/// If sink-predecessors and sink-successors cannot be extracted in one region,
/// the static constructor returns a list of suitable extraction regions.
class OutliningRegion { class OutliningRegion {
/// A list of (block, score) pairs. A block's score is non-zero iff it's a /// A list of (block, score) pairs. A block's score is non-zero iff it's a
/// viable sub-region entry point. Blocks with higher scores are better entry /// viable sub-region entry point. Blocks with higher scores are better entry
@ -297,12 +303,9 @@ class OutliningRegion {
/// Whether the entire function is cold. /// Whether the entire function is cold.
bool EntireFunctionCold = false; bool EntireFunctionCold = false;
/// Whether or not \p BB could be the entry point of an extracted region.
static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise. /// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) { static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
return isViableEntryPoint(BB) ? Score : 0; return mayExtractBlock(BB) ? Score : 0;
} }
/// These scores should be lower than the score for predecessor blocks, /// These scores should be lower than the score for predecessor blocks,
@ -318,21 +321,23 @@ public:
OutliningRegion(OutliningRegion &&) = default; OutliningRegion(OutliningRegion &&) = default;
OutliningRegion &operator=(OutliningRegion &&) = default; OutliningRegion &operator=(OutliningRegion &&) = default;
static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT, static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
const PostDominatorTree &PDT) { const DominatorTree &DT,
OutliningRegion ColdRegion; const PostDominatorTree &PDT) {
std::vector<OutliningRegion> Regions;
SmallPtrSet<BasicBlock *, 4> RegionBlocks; SmallPtrSet<BasicBlock *, 4> RegionBlocks;
Regions.emplace_back();
OutliningRegion *ColdRegion = &Regions.back();
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) { auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
RegionBlocks.insert(BB); RegionBlocks.insert(BB);
ColdRegion.Blocks.emplace_back(BB, Score); ColdRegion->Blocks.emplace_back(BB, Score);
assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
}; };
// The ancestor farthest-away from SinkBB, and also post-dominated by it. // The ancestor farthest-away from SinkBB, and also post-dominated by it.
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock); unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr; ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
unsigned BestScore = SinkScore; unsigned BestScore = SinkScore;
// Visit SinkBB's ancestors using inverse DFS. // Visit SinkBB's ancestors using inverse DFS.
@ -345,8 +350,8 @@ public:
// If the predecessor is cold and has no predecessors, the entire // If the predecessor is cold and has no predecessors, the entire
// function must be cold. // function must be cold.
if (SinkPostDom && pred_empty(&PredBB)) { if (SinkPostDom && pred_empty(&PredBB)) {
ColdRegion.EntireFunctionCold = true; ColdRegion->EntireFunctionCold = true;
return ColdRegion; return Regions;
} }
// If SinkBB does not post-dominate a predecessor, do not mark the // If SinkBB does not post-dominate a predecessor, do not mark the
@ -361,7 +366,7 @@ public:
// considered as entry points before the sink block. // considered as entry points before the sink block.
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength()); unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
if (PredScore > BestScore) { if (PredScore > BestScore) {
ColdRegion.SuggestedEntryPoint = &PredBB; ColdRegion->SuggestedEntryPoint = &PredBB;
BestScore = PredScore; BestScore = PredScore;
} }
@ -369,10 +374,19 @@ public:
++PredIt; ++PredIt;
} }
// Add SinkBB to the cold region. It's considered as an entry point before // If the sink can be added to the cold region, do so. It's considered as
// any sink-successor blocks. // an entry point before any sink-successor blocks.
if (mayExtractBlock(SinkBB)) //
// Otherwise, split cold sink-successor blocks using a separate region.
// This satisfies the requirement that all extraction blocks other than the
// first have predecessors within the extraction region.
if (mayExtractBlock(SinkBB)) {
addBlockToRegion(&SinkBB, SinkScore); addBlockToRegion(&SinkBB, SinkScore);
} else {
Regions.emplace_back();
ColdRegion = &Regions.back();
BestScore = 0;
}
// Find all successors of SinkBB dominated by SinkBB using DFS. // Find all successors of SinkBB dominated by SinkBB using DFS.
auto SuccIt = ++df_begin(&SinkBB); auto SuccIt = ++df_begin(&SinkBB);
@ -393,7 +407,7 @@ public:
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock); unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
if (SuccScore > BestScore) { if (SuccScore > BestScore) {
ColdRegion.SuggestedEntryPoint = &SuccBB; ColdRegion->SuggestedEntryPoint = &SuccBB;
BestScore = SuccScore; BestScore = SuccScore;
} }
@ -401,7 +415,7 @@ public:
++SuccIt; ++SuccIt;
} }
return ColdRegion; return Regions;
} }
/// Whether this region has nothing to extract. /// Whether this region has nothing to extract.
@ -496,28 +510,30 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
if (!PDT) if (!PDT)
PDT = make_unique<PostDominatorTree>(F); PDT = make_unique<PostDominatorTree>(F);
auto Region = OutliningRegion::create(*BB, *DT, *PDT); auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
if (Region.empty()) for (OutliningRegion &Region : Regions) {
continue; if (Region.empty())
continue;
if (Region.isEntireFunctionCold()) { if (Region.isEntireFunctionCold()) {
LLVM_DEBUG(dbgs() << "Entire function is cold\n"); LLVM_DEBUG(dbgs() << "Entire function is cold\n");
return markFunctionCold(F); return markFunctionCold(F);
}
// If this outlining region intersects with another, drop the new region.
//
// TODO: It's theoretically possible to outline more by only keeping the
// largest region which contains a block, but the extra bookkeeping to do
// this is tricky/expensive.
bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
return !ColdBlocks.insert(Block.first).second;
});
if (RegionsOverlap)
continue;
OutliningWorklist.emplace_back(std::move(Region));
++NumColdRegionsFound;
} }
// If this outlining region intersects with another, drop the new region.
//
// TODO: It's theoretically possible to outline more by only keeping the
// largest region which contains a block, but the extra bookkeeping to do
// this is tricky/expensive.
bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
return !ColdBlocks.insert(Block.first).second;
});
if (RegionsOverlap)
continue;
OutliningWorklist.emplace_back(std::move(Region));
++NumColdRegionsFound;
} }
// Outline single-entry cold regions, splitting up larger regions as needed. // Outline single-entry cold regions, splitting up larger regions as needed.

View File

@ -54,6 +54,31 @@ normal:
ret void ret void
} }
define void @baz() personality i8 0 {
entry:
br i1 undef, label %exit, label %cold1
exit:
ret void
cold1:
; The predecessor of a cold invoke may still be extracted (see baz.cold.2).
call void @sideeffect(i32 0)
br label %cold2
cold2:
invoke void @sink() to label %cold3 unwind label %cold4
cold3:
; The successor of a cold invoke may still be extracted (see baz.cold.1).
call void @sideeffect(i32 1)
ret void
cold4:
landingpad i8 cleanup
ret void
}
; CHECK-LABEL: define {{.*}}@foo.cold.1( ; CHECK-LABEL: define {{.*}}@foo.cold.1(
; CHECK: sideeffect(i32 0) ; CHECK: sideeffect(i32 0)
; CHECK: sink ; CHECK: sink
@ -61,6 +86,12 @@ normal:
; CHECK-LABEL: define {{.*}}@bar.cold.1( ; CHECK-LABEL: define {{.*}}@bar.cold.1(
; CHECK: sideeffect(i32 1) ; CHECK: sideeffect(i32 1)
; CHECK-LABEL: define {{.*}}@baz.cold.1(
; CHECK: sideeffect(i32 1)
; CHECK-LABEL: define {{.*}}@baz.cold.2(
; CHECK: sideeffect(i32 0)
declare void @sideeffect(i32) declare void @sideeffect(i32)
declare void @sink() cold declare void @sink() cold