Hot cold splitting pass

Find cold blocks based on profile information (or optionally with static analysis).
Forward propagate profile information to all cold-blocks.
Outline a cold region.
Set calling conv and prof hint for the callsite of the outlined function.

Worked in collaboration with: Sebastian Pop <s.pop@samsung.com>
Differential Revision: https://reviews.llvm.org/D50658

llvm-svn: 341669
This commit is contained in:
Aditya Kumar 2018-09-07 15:03:49 +00:00
parent 47b2ed2e16
commit 801394a3d7
7 changed files with 419 additions and 0 deletions

View File

@ -161,6 +161,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerPass(PassRegistry&);
void initializeIPCPPass(PassRegistry&);
void initializeIPSCCPLegacyPassPass(PassRegistry&);

View File

@ -201,6 +201,11 @@ Pass *createReversePostOrderFunctionAttrsPass();
///
ModulePass *createMergeFunctionsPass();
//===----------------------------------------------------------------------===//
/// createHotColdSplittingPass - This pass outlines cold blocks into a separate
/// function(s).
ModulePass *createHotColdSplittingPass();
//===----------------------------------------------------------------------===//
/// createPartialInliningPass - This pass inlines parts of functions.
///

View File

@ -15,6 +15,7 @@ add_llvm_library(LLVMipo
GlobalDCE.cpp
GlobalOpt.cpp
GlobalSplit.cpp
HotColdSplitting.cpp
IPConstantPropagation.cpp
IPO.cpp
InferFunctionAttrs.cpp

View File

@ -0,0 +1,370 @@
//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Outline cold regions to a separate function.
// TODO: Update BFI and BPI
// TODO: Add all the outlined functions to a separate section.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
#define DEBUG_TYPE "hotcoldsplit"
STATISTIC(NumColdSESEFound,
"Number of cold single entry single exit (SESE) regions found.");
STATISTIC(NumColdSESEOutlined,
"Number of cold single entry single exit (SESE) regions outlined.");
using namespace llvm;
static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
namespace {
struct PostDomTree : PostDomTreeBase<BasicBlock> {
PostDomTree(Function &F) { recalculate(F); }
};
typedef DenseSet<const BasicBlock *> DenseSetBB;
// From: https://reviews.llvm.org/D22558
// Exit is not part of the region.
static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
DominatorTree *DT, PostDomTree *PDT,
SmallVectorImpl<BasicBlock *> &Region) {
if (!DT->dominates(Entry, Exit))
return false;
if (!PDT->dominates(Exit, Entry))
return false;
Region.push_back(Entry);
for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
if (*I == Exit) {
I.skipChildren();
continue;
}
if (!DT->dominates(Entry, *I))
return false;
Region.push_back(*I);
++I;
}
return true;
}
bool blockEndsInUnreachable(const BasicBlock &BB) {
if (BB.empty())
return true;
const TerminatorInst *I = BB.getTerminator();
if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I))
return true;
// Unreachable blocks do not have any successor.
return succ_empty(&BB);
}
static
bool unlikelyExecuted(const BasicBlock &BB) {
if (blockEndsInUnreachable(BB))
return true;
// Exception handling blocks are unlikely executed.
if (BB.isEHPad())
return true;
for (const Instruction &I : BB)
if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
// The block is cold if it calls functions tagged as cold or noreturn.
if (CI->hasFnAttr(Attribute::Cold) ||
CI->hasFnAttr(Attribute::NoReturn))
return true;
// Assume that inline assembly is hot code.
if (isa<InlineAsm>(CI->getCalledValue()))
return false;
}
return false;
}
static DenseSetBB getHotBlocks(Function &F) {
// First mark all function basic blocks as hot or cold.
DenseSet<const BasicBlock *> ColdBlocks;
for (BasicBlock &BB : F)
if (unlikelyExecuted(BB))
ColdBlocks.insert(&BB);
// Forward propagation.
DenseSetBB AllColdBlocks;
SmallVector<const BasicBlock *, 8> WL;
DenseSetBB Visited; // Track hot blocks.
const BasicBlock *It = &F.front();
const TerminatorInst *TI = It->getTerminator();
if (!ColdBlocks.count(It)) {
Visited.insert(It);
// Breadth First Search to mark edges not reachable from cold.
WL.push_back(It);
while (WL.size() > 0) {
It = WL.pop_back_val();
for (const BasicBlock *Succ : successors(TI)) {
// Do not visit blocks that are cold.
if (!ColdBlocks.count(Succ) && !Visited.count(Succ)) {
Visited.insert(Succ);
WL.push_back(Succ);
}
}
}
}
return Visited;
}
class HotColdSplitting {
public:
HotColdSplitting(ProfileSummaryInfo *ProfSI,
function_ref<BlockFrequencyInfo *(Function &)> GBFI,
std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
: PSI(ProfSI), GetBFI(GBFI), GetORE(GORE) {}
bool run(Module &M);
private:
bool shouldOutlineFrom(const Function &F) const;
Function *outlineColdBlocks(Function &F,
const DenseSet<const BasicBlock *> &ColdBlock,
DominatorTree *DT, PostDomTree *PDT);
Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
DominatorTree *DT, BlockFrequencyInfo *BFI,
OptimizationRemarkEmitter &ORE);
bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
const BasicBlock *Exit) const {
if (!Exit)
return false;
// TODO: Find a better metric to compute the size of region being outlined.
if (Region.size() == 1)
return false;
// Regions with landing pads etc.
for (const BasicBlock *BB : Region) {
if (BB->isEHPad() || BB->hasAddressTaken())
return false;
}
return true;
}
SmallPtrSet<const Function *, 2> OutlinedFunctions;
ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
};
class HotColdSplittingLegacyPass : public ModulePass {
public:
static char ID;
HotColdSplittingLegacyPass() : ModulePass(ID) {
initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
bool runOnModule(Module &M) override;
};
} // end anonymous namespace
// Returns false if the function should not be considered for hot-cold split
// optimization. Already outlined functions have coldcc so no need to check
// for them here.
bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
if (F.size() <= 2)
return false;
if (F.hasAddressTaken())
return false;
if (F.hasFnAttribute(Attribute::AlwaysInline))
return false;
if (F.hasFnAttribute(Attribute::NoInline))
return false;
if (F.getCallingConv() == CallingConv::Cold)
return false;
if (PSI->isFunctionEntryCold(&F))
return false;
return true;
}
Function *
HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
DominatorTree *DT, BlockFrequencyInfo *BFI,
OptimizationRemarkEmitter &ORE) {
LLVM_DEBUG(for (auto *BB : Region)
llvm::dbgs() << "\nExtracting: " << *BB;);
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, DT, /*AggregateArgs*/ false, nullptr, nullptr,
/* AllowVarargs */ false);
SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);
// Do not extract regions that have live exit variables.
if (Outputs.size() > 0)
return nullptr;
if (Function *OutF = CE.extractCodeRegion()) {
User *U = *OutF->user_begin();
CallInst *CI = cast<CallInst>(U);
CallSite CS(CI);
NumColdSESEOutlined++;
OutF->setCallingConv(CallingConv::Cold);
CS.setCallingConv(CallingConv::Cold);
CI->setIsNoInline();
LLVM_DEBUG(llvm::dbgs() << "Outlined Region at block: " << Region.front());
return OutF;
}
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
&*Region[0]->begin())
<< "Failed to extract region at block "
<< ore::NV("Block", Region.front());
});
return nullptr;
}
// Return the function created after outlining, nullptr otherwise.
Function *HotColdSplitting::outlineColdBlocks(Function &F,
const DenseSetBB &HotBlock,
DominatorTree *DT,
PostDomTree *PDT) {
auto BFI = GetBFI(F);
auto &ORE = (*GetORE)(F);
// Walking the dominator tree allows us to find the largest
// cold region.
BasicBlock *Begin = DT->getRootNode()->getBlock();
for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
BasicBlock *BB = *I;
if (PSI->isColdBB(BB, BFI) || !HotBlock.count(BB)) {
SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
auto *BBNode = (*PDT)[BB];
auto Exit = BBNode->getIDom()->getBlock();
// We might need a virtual exit which post-dominates all basic blocks.
if (!Exit)
continue;
BasicBlock *ExitColdRegion = nullptr;
// Estimated cold region between a BB and its dom-frontier.
while (isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
isOutlineCandidate(Region, Exit)) {
ExitColdRegion = Exit;
ValidColdRegion = Region;
Region.clear();
// Update Exit recursively to its dom-frontier.
Exit = (*PDT)[Exit]->getIDom()->getBlock();
}
if (ExitColdRegion) {
++NumColdSESEFound;
// Candidate for outlining. FIXME: Continue outlining.
// FIXME: Shouldn't need uniquing, debug isSingleEntrySingleExit
//std::sort(ValidColdRegion.begin(), ValidColdRegion.end());
auto last = std::unique(ValidColdRegion.begin(), ValidColdRegion.end());
ValidColdRegion.erase(last, ValidColdRegion.end());
return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
}
}
}
return nullptr;
}
bool HotColdSplitting::run(Module &M) {
for (auto &F : M) {
if (!shouldOutlineFrom(F))
continue;
DominatorTree DT(F);
PostDomTree PDT(F);
PDT.recalculate(F);
DenseSetBB HotBlocks;
if (EnableStaticAnalyis) // Static analysis of cold blocks.
HotBlocks = getHotBlocks(F);
auto Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
if (Outlined)
OutlinedFunctions.insert(Outlined);
}
return true;
}
bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto GBFI = [this](Function &F) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
std::unique_ptr<OptimizationRemarkEmitter> ORE;
std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
[&ORE](Function &F) -> OptimizationRemarkEmitter & {
ORE.reset(new OptimizationRemarkEmitter(&F));
return *ORE.get();
};
return HotColdSplitting(PSI, GBFI, &GetORE).run(M);
}
char HotColdSplittingLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
"Hot Cold Splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
"Hot Cold Splitting", false, false)
ModulePass *llvm::createHotColdSplittingPass() {
return new HotColdSplittingLegacyPass();
}

View File

@ -34,6 +34,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeGlobalDCELegacyPassPass(Registry);
initializeGlobalOptLegacyPassPass(Registry);
initializeGlobalSplitPass(Registry);
initializeHotColdSplittingLegacyPassPass(Registry);
initializeIPCPPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
initializeSimpleInlinerPass(Registry);

View File

@ -104,6 +104,10 @@ static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
cl::desc("Enable hot-cold splitting pass"));
static cl::opt<bool> RunPGOInstrGen(
"profile-generate", cl::init(false), cl::Hidden,
cl::desc("Enable PGO instrumentation."));
@ -497,6 +501,9 @@ void PassManagerBuilder::populateModulePassManager(
// Infer attributes about declarations if possible.
MPM.add(createInferFunctionAttrsLegacyPass());
if (EnableHotColdSplit)
MPM.add(createHotColdSplittingPass());
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
if (OptLevel > 2)

View File

@ -0,0 +1,34 @@
; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
source_filename = "bugpoint-output-054409e.bc"
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.13.0"
declare i32 @__gxx_personality_v0(...)
; Outlined function is called from a basic block named codeRepl
; CHECK: codeRepl:
; CHECK-NEXT: call coldcc void @foo
; Check that no recursive outlining is done.
; CHECK-NOT: codeRepl:
; Function Attrs: ssp uwtable
define hidden void @foo() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
br i1 undef, label %if.then, label %if.end
if.then: ; preds = %entry
unreachable
if.end: ; preds = %entry
br label %if.then12
if.then12: ; preds = %if.end
br label %cleanup40
cleanup40: ; preds = %if.then12
br label %return
return: ; preds = %cleanup40
ret void
}