diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index ce8a5060a3a1..621ac7dc8ab8 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -66,10 +66,10 @@ using namespace llvm; static cl::opt EnableStaticAnalyis("hot-cold-static-analysis", cl::init(true), cl::Hidden); -static cl::opt MinOutliningInstCount( - "min-outlining-inst-count", cl::init(3), cl::Hidden, - cl::desc("Minimum number of instructions needed for a single-block region " - "to be an outlining candidate")); +static cl::opt + MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden, + cl::desc("Code size threshold for outlining within a " + "single BB (as a multiple of TCC_Basic)")); namespace { @@ -135,14 +135,18 @@ static bool mayExtractBlock(const BasicBlock &BB) { return !BB.hasAddressTaken(); } -/// Check whether \p BB has at least \p Min non-debug, non-terminator -/// instructions. -static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) { - unsigned Count = 0; +/// Check whether \p BB is profitable to outline (i.e. its code size cost meets +/// the threshold set in \p MinOutliningThreshold). +static bool isProfitableToOutline(const BasicBlock &BB, + TargetTransformInfo &TTI) { + int Cost = 0; for (const Instruction &I : BB) { if (isa(&I) || &I == BB.getTerminator()) continue; - if (++Count >= Min) + + Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + + if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic)) return true; } return false; @@ -156,8 +160,10 @@ static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) { /// /// Return an empty sequence if the cold region is too small to outline, or if /// the cold region has no warm predecessors. -static BlockSequence -findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) { +static BlockSequence findMaximalColdRegion(BasicBlock &SinkBB, + TargetTransformInfo &TTI, + DominatorTree &DT, + PostDomTree &PDT) { // The maximal cold region. BlockSequence ColdRegion = {}; @@ -241,8 +247,7 @@ findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) { ++SuccIt; } - if (ColdRegion.size() == 1 && - !hasMinimumInstCount(*ColdRegion[0], MinOutliningInstCount)) + if (ColdRegion.size() == 1 && !isProfitableToOutline(*ColdRegion[0], TTI)) return {}; return ColdRegion; @@ -251,6 +256,7 @@ findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) { /// Get the largest cold region in \p F. static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI, BlockFrequencyInfo *BFI, + TargetTransformInfo &TTI, DominatorTree &DT, PostDomTree &PDT) { // Keep track of the largest cold region. BlockSequence LargestColdRegion = {}; @@ -270,7 +276,7 @@ static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI, }); // Find a maximal cold region we can outline. - BlockSequence ColdRegion = findMaximalColdRegion(BB, DT, PDT); + BlockSequence ColdRegion = findMaximalColdRegion(BB, TTI, DT, PDT); if (ColdRegion.empty()) { LLVM_DEBUG(dbgs() << " Skipping (block not profitable to extract)\n"); continue; @@ -305,7 +311,7 @@ public: private: bool shouldOutlineFrom(const Function &F) const; Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT, - BlockFrequencyInfo *BFI, + BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, OptimizationRemarkEmitter &ORE, unsigned Count); SmallPtrSet OutlinedFunctions; ProfileSummaryInfo *PSI; @@ -365,6 +371,7 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const { Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, DominatorTree &DT, BlockFrequencyInfo *BFI, + TargetTransformInfo &TTI, OptimizationRemarkEmitter &ORE, unsigned Count) { assert(!Region.empty()); @@ -393,7 +400,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, CallInst *CI = cast(U); CallSite CS(CI); NumColdRegionsOutlined++; - if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) { + if (TTI.useColdCCForColdCall(*OutF)) { OutF->setCallingConv(CallingConv::Cold); CS.setCallingConv(CallingConv::Cold); } @@ -437,14 +444,15 @@ bool HotColdSplitting::run(Module &M) { PostDomTree PDT(F); PDT.recalculate(F); BlockFrequencyInfo *BFI = GetBFI(F); + TargetTransformInfo &TTI = GetTTI(F); - BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, DT, PDT); + BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, TTI, DT, PDT); if (ColdRegion.empty()) continue; OptimizationRemarkEmitter &ORE = (*GetORE)(F); Function *Outlined = - extractColdRegion(ColdRegion, DT, BFI, ORE, /*Count=*/1); + extractColdRegion(ColdRegion, DT, BFI, TTI, ORE, /*Count=*/1); if (Outlined) { OutlinedFunctions.insert(Outlined); Changed = true; diff --git a/llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg b/llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg new file mode 100644 index 000000000000..e71f3cc4c41e --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll b/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll new file mode 100644 index 000000000000..5b0cceae2af3 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/X86/outline-expensive.ll @@ -0,0 +1,25 @@ +; The magic number 6 comes from (1 * TCC_Expensive) + (1 * CostOfCallX86). +; RUN: opt -hotcoldsplit -min-outlining-thresh=6 -S < %s | FileCheck %s + +; Test that we outline even though there are only two cold instructions. TTI +; should determine that they are expensive in terms of code size. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; CHECK-LABEL: @fun +; CHECK: call void @fun.cold.1 +define void @fun(i32 %x) { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: + ret void + +if.else: + %y = sdiv i32 %x, 111 + call void @sink(i32 %y) + ret void +} + +declare void @sink(i32 %x) cold diff --git a/llvm/test/Transforms/HotColdSplit/do-not-split.ll b/llvm/test/Transforms/HotColdSplit/do-not-split.ll index 213681383ea1..d5a8c44cc04f 100644 --- a/llvm/test/Transforms/HotColdSplit/do-not-split.ll +++ b/llvm/test/Transforms/HotColdSplit/do-not-split.ll @@ -1,6 +1,9 @@ ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s ; RUN: opt -passes=hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + ; Check that these functions are not split. Outlined functions are called from a ; basic block named codeRepl. diff --git a/llvm/test/Transforms/HotColdSplit/minsize.ll b/llvm/test/Transforms/HotColdSplit/minsize.ll index eb42ad14af20..69cd0979b945 100644 --- a/llvm/test/Transforms/HotColdSplit/minsize.ll +++ b/llvm/test/Transforms/HotColdSplit/minsize.ll @@ -1,8 +1,10 @@ ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + ; CHECK-LABEL: @fun ; CHECK: call void @fun.cold.1 - define void @fun() { entry: br i1 undef, label %if.then, label %if.else diff --git a/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll b/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll index b77201fe0d38..becfaf8e63d3 100644 --- a/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll +++ b/llvm/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll @@ -1,5 +1,8 @@ ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + ; CHECK-LABEL: define {{.*}}@foo.cold ; CHECK-NOT: llvm.dbg.value