forked from OSchip/llvm-project
[llvm][CodeGen] Machine Function Splitter
We introduce a codegen optimization pass which splits functions into hot and cold parts. This pass leverages the basic block sections feature recently introduced in LLVM from the Propeller project. The pass targets functions with profile coverage, identifies cold blocks and moves them to a separate section. The linker groups all cold blocks across functions together, decreasing fragmentation and improving icache and itlb utilization. We evaluated the Machine Function Splitter pass on clang bootstrap and SPECInt 2017. For clang bootstrap we observe a mean 2.33% runtime improvement with a ~32% reduction in itlb and stlb misses. Additionally, L1 icache misses reduced by 9.5% while L2 instruction misses reduced by 20%. For SPECInt we report the change in IntRate the C/C++ benchmarks. All benchmarks apart from mcf and x264 improve, on average by 0.6% with the max for deepsjeng at 1.6%. Benchmark % Change 500.perlbench_r 0.78 502.gcc_r 0.82 505.mcf_r -0.30 520.omnetpp_r 0.18 523.xalancbmk_r 0.37 525.x264_r -0.46 531.deepsjeng_r 1.61 541.leela_r 0.83 557.xz_r 0.15 Differential Revision: https://reviews.llvm.org/D85368
This commit is contained in:
parent
064981f0ce
commit
94faadaca4
|
@ -0,0 +1,27 @@
|
|||
//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
|
||||
#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineFunction;
|
||||
class MachineBasicBlock;
|
||||
|
||||
using MachineBasicBlockComparator =
|
||||
function_ref<bool(const MachineBasicBlock &, const MachineBasicBlock &)>;
|
||||
|
||||
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF,
|
||||
MachineBasicBlockComparator MBBCmp);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H
|
|
@ -114,6 +114,8 @@ bool getEnableAddrsig();
|
|||
|
||||
bool getEmitCallSiteInfo();
|
||||
|
||||
bool getEnableMachineFunctionSplitter();
|
||||
|
||||
bool getEnableDebugEntryValues();
|
||||
|
||||
bool getValueTrackingVariableLocations();
|
||||
|
|
|
@ -494,7 +494,8 @@ public:
|
|||
/// Returns true if this function has basic block sections enabled.
|
||||
bool hasBBSections() const {
|
||||
return (BBSectionsType == BasicBlockSection::All ||
|
||||
BBSectionsType == BasicBlockSection::List);
|
||||
BBSectionsType == BasicBlockSection::List ||
|
||||
BBSectionsType == BasicBlockSection::Preset);
|
||||
}
|
||||
|
||||
/// Returns true if basic block labels are to be generated for this function.
|
||||
|
|
|
@ -50,6 +50,10 @@ namespace llvm {
|
|||
/// selectively enable basic block sections.
|
||||
MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf);
|
||||
|
||||
/// createMachineFunctionSplitterPass - This pass splits machine functions
|
||||
/// using profile information.
|
||||
MachineFunctionPass *createMachineFunctionSplitterPass();
|
||||
|
||||
/// MachineFunctionPrinter pass - This pass prints out the machine function to
|
||||
/// the given stream as a debugging tool.
|
||||
MachineFunctionPass *
|
||||
|
|
|
@ -280,6 +280,7 @@ void initializeMachineCopyPropagationPass(PassRegistry&);
|
|||
void initializeMachineDominanceFrontierPass(PassRegistry&);
|
||||
void initializeMachineDominatorTreePass(PassRegistry&);
|
||||
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
|
||||
void initializeMachineFunctionSplitterPass(PassRegistry &);
|
||||
void initializeMachineLICMPass(PassRegistry&);
|
||||
void initializeMachineLoopInfoPass(PassRegistry&);
|
||||
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
|
||||
|
|
|
@ -67,6 +67,9 @@ namespace llvm {
|
|||
Labels, // Do not use Basic Block Sections but label basic blocks. This
|
||||
// is useful when associating profile counts from virtual addresses
|
||||
// to basic blocks.
|
||||
Preset, // Similar to list but the blocks are identified by passes which
|
||||
// seek to use Basic Block Sections, e.g. MachineFunctionSplitter.
|
||||
// This option cannot be set via the command line.
|
||||
None // Do not use Basic Block Sections.
|
||||
};
|
||||
|
||||
|
@ -124,10 +127,11 @@ namespace llvm {
|
|||
TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
|
||||
EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
|
||||
EmitStackSizeSection(false), EnableMachineOutliner(false),
|
||||
SupportsDefaultOutlining(false), EmitAddrsig(false),
|
||||
EmitCallSiteInfo(false), SupportsDebugEntryValues(false),
|
||||
EnableDebugEntryValues(false), ValueTrackingVariableLocations(false),
|
||||
ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
|
||||
EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
|
||||
EmitAddrsig(false), EmitCallSiteInfo(false),
|
||||
SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
|
||||
ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false),
|
||||
XRayOmitFunctionIndex(false),
|
||||
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
|
||||
|
||||
/// DisableFramePointerElim - This returns true if frame pointer elimination
|
||||
|
@ -257,6 +261,9 @@ namespace llvm {
|
|||
/// Enables the MachineOutliner pass.
|
||||
unsigned EnableMachineOutliner : 1;
|
||||
|
||||
/// Enables the MachineFunctionSplitter pass.
|
||||
unsigned EnableMachineFunctionSplitter : 1;
|
||||
|
||||
/// Set if the target supports default outlining behaviour.
|
||||
unsigned SupportsDefaultOutlining : 1;
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
|
@ -226,9 +227,9 @@ static bool getBBClusterInfoForFunction(
|
|||
// and "Cold" succeeding all other clusters.
|
||||
// FuncBBClusterInfo represent the cluster information for basic blocks. If this
|
||||
// is empty, it means unique sections for all basic blocks in the function.
|
||||
static bool assignSectionsAndSortBasicBlocks(
|
||||
MachineFunction &MF,
|
||||
const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
|
||||
static void
|
||||
assignSections(MachineFunction &MF,
|
||||
const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
|
||||
assert(MF.hasBBSections() && "BB Sections is not set for function.");
|
||||
// This variable stores the section ID of the cluster containing eh_pads (if
|
||||
// all eh_pads are one cluster). If more than one cluster contain eh_pads, we
|
||||
|
@ -271,47 +272,16 @@ static bool assignSectionsAndSortBasicBlocks(
|
|||
for (auto &MBB : MF)
|
||||
if (MBB.isEHPad())
|
||||
MBB.setSectionID(EHPadsSectionID.getValue());
|
||||
}
|
||||
|
||||
void llvm::sortBasicBlocksAndUpdateBranches(
|
||||
MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
|
||||
SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
|
||||
MF.getNumBlockIDs());
|
||||
for (auto &MBB : MF)
|
||||
PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
|
||||
|
||||
// We make sure that the cluster including the entry basic block precedes all
|
||||
// other clusters.
|
||||
auto EntryBBSectionID = MF.front().getSectionID();
|
||||
|
||||
// Helper function for ordering BB sections as follows:
|
||||
// * Entry section (section including the entry block).
|
||||
// * Regular sections (in increasing order of their Number).
|
||||
// ...
|
||||
// * Exception section
|
||||
// * Cold section
|
||||
auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS,
|
||||
const MBBSectionID &RHS) {
|
||||
// We make sure that the section containing the entry block precedes all the
|
||||
// other sections.
|
||||
if (LHS == EntryBBSectionID || RHS == EntryBBSectionID)
|
||||
return LHS == EntryBBSectionID;
|
||||
return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type;
|
||||
};
|
||||
|
||||
// We sort all basic blocks to make sure the basic blocks of every cluster are
|
||||
// contiguous and ordered accordingly. Furthermore, clusters are ordered in
|
||||
// increasing order of their section IDs, with the exception and the
|
||||
// cold section placed at the end of the function.
|
||||
MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
|
||||
auto XSectionID = X.getSectionID();
|
||||
auto YSectionID = Y.getSectionID();
|
||||
if (XSectionID != YSectionID)
|
||||
return MBBSectionOrder(XSectionID, YSectionID);
|
||||
// If the two basic block are in the same section, the order is decided by
|
||||
// their position within the section.
|
||||
if (XSectionID.Type == MBBSectionID::SectionType::Default)
|
||||
return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
|
||||
FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
|
||||
return X.getNumber() < Y.getNumber();
|
||||
});
|
||||
MF.sort(MBBCmp);
|
||||
|
||||
// Set IsBeginSection and IsEndSection according to the assigned section IDs.
|
||||
MF.assignBeginEndSections();
|
||||
|
@ -320,8 +290,6 @@ static bool assignSectionsAndSortBasicBlocks(
|
|||
// insert explicit fallthrough branches when required and optimize branches
|
||||
// when possible.
|
||||
updateBranches(MF, PreLayoutFallThroughs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
@ -347,7 +315,46 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
|
|||
return true;
|
||||
MF.setBBSectionsType(BBSectionsType);
|
||||
MF.createBBLabels();
|
||||
assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo);
|
||||
assignSections(MF, FuncBBClusterInfo);
|
||||
|
||||
// We make sure that the cluster including the entry basic block precedes all
|
||||
// other clusters.
|
||||
auto EntryBBSectionID = MF.front().getSectionID();
|
||||
|
||||
// Helper function for ordering BB sections as follows:
|
||||
// * Entry section (section including the entry block).
|
||||
// * Regular sections (in increasing order of their Number).
|
||||
// ...
|
||||
// * Exception section
|
||||
// * Cold section
|
||||
auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS,
|
||||
const MBBSectionID &RHS) {
|
||||
// We make sure that the section containing the entry block precedes all the
|
||||
// other sections.
|
||||
if (LHS == EntryBBSectionID || RHS == EntryBBSectionID)
|
||||
return LHS == EntryBBSectionID;
|
||||
return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type;
|
||||
};
|
||||
|
||||
// We sort all basic blocks to make sure the basic blocks of every cluster are
|
||||
// contiguous and ordered accordingly. Furthermore, clusters are ordered in
|
||||
// increasing order of their section IDs, with the exception and the
|
||||
// cold section placed at the end of the function.
|
||||
auto Comparator = [&](const MachineBasicBlock &X,
|
||||
const MachineBasicBlock &Y) {
|
||||
auto XSectionID = X.getSectionID();
|
||||
auto YSectionID = Y.getSectionID();
|
||||
if (XSectionID != YSectionID)
|
||||
return MBBSectionOrder(XSectionID, YSectionID);
|
||||
// If the two basic block are in the same section, the order is decided by
|
||||
// their position within the section.
|
||||
if (XSectionID.Type == MBBSectionID::SectionType::Default)
|
||||
return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
|
||||
FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
|
||||
return X.getNumber() < Y.getNumber();
|
||||
};
|
||||
|
||||
sortBasicBlocksAndUpdateBranches(MF, Comparator);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -81,6 +81,7 @@ add_llvm_component_library(LLVMCodeGen
|
|||
MachineFunction.cpp
|
||||
MachineFunctionPass.cpp
|
||||
MachineFunctionPrinterPass.cpp
|
||||
MachineFunctionSplitter.cpp
|
||||
MachineInstrBundle.cpp
|
||||
MachineInstr.cpp
|
||||
MachineLICM.cpp
|
||||
|
|
|
@ -84,6 +84,7 @@ CGOPT(DebuggerKind, DebuggerTuningOpt)
|
|||
CGOPT(bool, EnableStackSizeSection)
|
||||
CGOPT(bool, EnableAddrsig)
|
||||
CGOPT(bool, EmitCallSiteInfo)
|
||||
CGOPT(bool, EnableMachineFunctionSplitter)
|
||||
CGOPT(bool, EnableDebugEntryValues)
|
||||
CGOPT(bool, ValueTrackingVariableLocations)
|
||||
CGOPT(bool, ForceDwarfFrameSection)
|
||||
|
@ -407,6 +408,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
|
|||
cl::init(false));
|
||||
CGBINDOPT(ValueTrackingVariableLocations);
|
||||
|
||||
static cl::opt<bool> EnableMachineFunctionSplitter(
|
||||
"split-machine-functions",
|
||||
cl::desc("Split out cold basic blocks from machine functions based on "
|
||||
"profile information"),
|
||||
cl::init(false));
|
||||
CGBINDOPT(EnableMachineFunctionSplitter);
|
||||
|
||||
static cl::opt<bool> ForceDwarfFrameSection(
|
||||
"force-dwarf-frame-section",
|
||||
cl::desc("Always emit a debug frame section."), cl::init(false));
|
||||
|
@ -479,6 +487,7 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
|
|||
Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
|
||||
Options.ExceptionModel = getExceptionModel();
|
||||
Options.EmitStackSizeSection = getEnableStackSizeSection();
|
||||
Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
|
||||
Options.EmitAddrsig = getEnableAddrsig();
|
||||
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
|
||||
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// \file
|
||||
// Uses profile information to split out cold blocks.
|
||||
//
|
||||
// This pass splits out cold machine basic blocks from the parent function. This
|
||||
// implementation leverages the basic block section framework. Blocks marked
|
||||
// cold by this pass are grouped together in a separate section prefixed with
|
||||
// ".text.unlikely.*". The linker can then group these together as a cold
|
||||
// section. The split part of the function is a contiguous region identified by
|
||||
// the symbol "foo.cold". Grouping all cold blocks across functions together
|
||||
// decreases fragmentation and improves icache and itlb utilization. Note that
|
||||
// the overall changes to the binary size are negligible; only a small number of
|
||||
// additional jump instructions may be introduced.
|
||||
//
|
||||
// For the original RFC of this pass please see
|
||||
// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<unsigned>
|
||||
PercentileCutoff("mfs-psi-cutoff",
|
||||
cl::desc("Percentile profile summary cutoff used to "
|
||||
"determine cold blocks. Unused if set to zero."),
|
||||
cl::init(0), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> ColdCountThreshold(
|
||||
"mfs-count-threshold",
|
||||
cl::desc(
|
||||
"Minimum number of times a block must be executed to be retained."),
|
||||
cl::init(1), cl::Hidden);
|
||||
|
||||
namespace {
|
||||
|
||||
class MachineFunctionSplitter : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
MachineFunctionSplitter() : MachineFunctionPass(ID) {
|
||||
initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "Machine Function Splitter Transformation";
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
static bool isColdBlock(MachineBasicBlock &MBB,
|
||||
const MachineBlockFrequencyInfo *MBFI,
|
||||
ProfileSummaryInfo *PSI) {
|
||||
Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
|
||||
if (!Count.hasValue())
|
||||
return true;
|
||||
|
||||
if (PercentileCutoff > 0) {
|
||||
return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
|
||||
}
|
||||
return (*Count < ColdCountThreshold);
|
||||
}
|
||||
|
||||
bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
|
||||
// TODO: We only target functions with profile data. Static information may
|
||||
// also be considered but we don't see performance improvements yet.
|
||||
if (!MF.getFunction().hasProfileData())
|
||||
return false;
|
||||
|
||||
// TODO: We don't split functions where a section attribute has been set
|
||||
// since the split part may not be placed in a contiguous region. It may also
|
||||
// be more beneficial to augment the linker to ensure contiguous layout of
|
||||
// split functions within the same section as specified by the attribute.
|
||||
if (!MF.getFunction().getSection().empty())
|
||||
return false;
|
||||
|
||||
// We don't want to proceed further for cold functions
|
||||
// or functions of unknown hotness. Lukewarm functions have no prefix.
|
||||
Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
|
||||
if (SectionPrefix.hasValue() &&
|
||||
(SectionPrefix.getValue().equals(".unlikely") ||
|
||||
SectionPrefix.getValue().equals(".unknown"))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Renumbering blocks here preserves the order of the blocks as
|
||||
// sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
|
||||
// blocks. Preserving the order of blocks is essential to retaining decisions
|
||||
// made by prior passes such as MachineBlockPlacement.
|
||||
MF.RenumberBlocks();
|
||||
MF.setBBSectionsType(BasicBlockSection::Preset);
|
||||
auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
|
||||
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
|
||||
for (auto &MBB : MF) {
|
||||
// FIXME: We retain the entry block and conservatively keep all landing pad
|
||||
// blocks as part of the original function. Once D73739 is submitted, we can
|
||||
// improve the handling of ehpads.
|
||||
if ((MBB.pred_empty() || MBB.isEHPad()))
|
||||
continue;
|
||||
if (isColdBlock(MBB, MBFI, PSI))
|
||||
MBB.setSectionID(MBBSectionID::ColdSectionID);
|
||||
}
|
||||
|
||||
auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
|
||||
return X.getSectionID().Type < Y.getSectionID().Type;
|
||||
};
|
||||
llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<MachineModuleInfoWrapperPass>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
}
|
||||
|
||||
char MachineFunctionSplitter::ID = 0;
|
||||
INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
|
||||
"Split machine functions using profile information", false,
|
||||
false)
|
||||
|
||||
MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
|
||||
return new MachineFunctionSplitter();
|
||||
}
|
|
@ -212,6 +212,12 @@ static cl::opt<std::string>
|
|||
cl::desc("Stop compilation before a specific pass"),
|
||||
cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
|
||||
|
||||
/// Enable the machine function splitter pass.
|
||||
static cl::opt<bool> EnableMachineFunctionSplitter(
|
||||
"enable-split-machine-functions", cl::Hidden,
|
||||
cl::desc("Split out cold blocks from machine functions based on profile "
|
||||
"information."));
|
||||
|
||||
/// Allow standard passes to be disabled by command line options. This supports
|
||||
/// simple binary flags that either suppress the pass or do nothing.
|
||||
/// i.e. -disable-mypass=false has no effect.
|
||||
|
@ -1014,8 +1020,14 @@ void TargetPassConfig::addMachinePasses() {
|
|||
addPass(createMachineOutlinerPass(RunOnAllFunctions));
|
||||
}
|
||||
|
||||
if (TM->getBBSectionsType() != llvm::BasicBlockSection::None)
|
||||
// Machine function splitter uses the basic block sections feature. Both
|
||||
// cannot be enabled at the same time.
|
||||
if (TM->Options.EnableMachineFunctionSplitter ||
|
||||
EnableMachineFunctionSplitter) {
|
||||
addPass(createMachineFunctionSplitterPass());
|
||||
} else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
|
||||
addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf()));
|
||||
}
|
||||
|
||||
// Add passes that directly emit MI after all other MI passes.
|
||||
addPreEmitPass2();
|
||||
|
|
|
@ -0,0 +1,218 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefix=MFS-DEFAULTS
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2
|
||||
|
||||
define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
|
||||
;; Check that cold block is moved to .text.unlikely.
|
||||
; MFS-DEFAULTS-LABEL: foo1
|
||||
; MFS-DEFAULTS: .section .text.unlikely.foo1
|
||||
; MFS-DEFAULTS-NEXT: foo1.cold:
|
||||
; MFS-DEFAULTS-NOT: callq bar
|
||||
; MFS-DEFAULTS-NEXT: callq baz
|
||||
br i1 %0, label %2, label %4, !prof !17
|
||||
|
||||
2: ; preds = %1
|
||||
%3 = call i32 @bar()
|
||||
br label %6
|
||||
|
||||
4: ; preds = %1
|
||||
%5 = call i32 @baz()
|
||||
br label %6
|
||||
|
||||
6: ; preds = %4, %2
|
||||
%7 = tail call i32 @qux()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo2(i1 zeroext %0) nounwind !prof !23 !section_prefix !16 {
|
||||
;; Check that function marked unlikely is not split.
|
||||
; MFS-DEFAULTS-LABEL: foo2
|
||||
; MFS-DEFAULTS-NOT: foo2.cold:
|
||||
br i1 %0, label %2, label %4, !prof !17
|
||||
|
||||
2: ; preds = %1
|
||||
%3 = call i32 @bar()
|
||||
br label %6
|
||||
|
||||
4: ; preds = %1
|
||||
%5 = call i32 @baz()
|
||||
br label %6
|
||||
|
||||
6: ; preds = %4, %2
|
||||
%7 = tail call i32 @qux()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo3(i1 zeroext %0) nounwind !section_prefix !15 {
|
||||
;; Check that function without profile data is not split.
|
||||
; MFS-DEFAULTS-LABEL: foo3
|
||||
; MFS-DEFAULTS-NOT: foo3.cold:
|
||||
br i1 %0, label %2, label %4
|
||||
|
||||
2: ; preds = %1
|
||||
%3 = call i32 @bar()
|
||||
br label %6
|
||||
|
||||
4: ; preds = %1
|
||||
%5 = call i32 @baz()
|
||||
br label %6
|
||||
|
||||
6: ; preds = %4, %2
|
||||
%7 = tail call i32 @qux()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 {
|
||||
;; Check that count threshold works.
|
||||
; MFS-OPTS1-LABEL: foo4
|
||||
; MFS-OPTS1: .section .text.unlikely.foo4
|
||||
; MFS-OPTS1-NEXT: foo4.cold:
|
||||
; MFS-OPTS1-NOT: callq bar
|
||||
; MFS-OPTS1-NOT: callq baz
|
||||
; MFS-OPTS1-NEXT: callq bam
|
||||
br i1 %0, label %3, label %7, !prof !18
|
||||
|
||||
3:
|
||||
%4 = call i32 @bar()
|
||||
br label %7
|
||||
|
||||
5:
|
||||
%6 = call i32 @baz()
|
||||
br label %7
|
||||
|
||||
7:
|
||||
br i1 %1, label %8, label %10, !prof !19
|
||||
|
||||
8:
|
||||
%9 = call i32 @bam()
|
||||
br label %12
|
||||
|
||||
10:
|
||||
%11 = call i32 @baz()
|
||||
br label %12
|
||||
|
||||
12:
|
||||
%13 = tail call i32 @qux()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo5(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 {
|
||||
;; Check that profile summary info cutoff works.
|
||||
; MFS-OPTS2-LABEL: foo5
|
||||
; MFS-OPTS2: .section .text.unlikely.foo5
|
||||
; MFS-OPTS2-NEXT: foo5.cold:
|
||||
; MFS-OPTS2-NOT: callq bar
|
||||
; MFS-OPTS2-NOT: callq baz
|
||||
; MFS-OPTS2-NEXT: callq bam
|
||||
br i1 %0, label %3, label %7, !prof !21
|
||||
|
||||
3:
|
||||
%4 = call i32 @bar()
|
||||
br label %7
|
||||
|
||||
5:
|
||||
%6 = call i32 @baz()
|
||||
br label %7
|
||||
|
||||
7:
|
||||
br i1 %1, label %8, label %10, !prof !22
|
||||
|
||||
8:
|
||||
%9 = call i32 @bam()
|
||||
br label %12
|
||||
|
||||
10:
|
||||
%11 = call i32 @baz()
|
||||
br label %12
|
||||
|
||||
12:
|
||||
%13 = call i32 @qux()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo6(i1 zeroext %0) nounwind section "nosplit" !prof !14 {
|
||||
;; Check that function with section attribute is not split.
|
||||
; MFS-DEFAULTS-LABEL: foo6
|
||||
; MFS-DEFAULTS-NOT: foo6.cold:
|
||||
br i1 %0, label %2, label %4, !prof !17
|
||||
|
||||
2: ; preds = %1
|
||||
%3 = call i32 @bar()
|
||||
br label %6
|
||||
|
||||
4: ; preds = %1
|
||||
%5 = call i32 @baz()
|
||||
br label %6
|
||||
|
||||
6: ; preds = %4, %2
|
||||
%7 = tail call i32 @qux()
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @foo7(i1 zeroext %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !14 {
|
||||
;; Check that cold ehpads are not split out.
|
||||
; MFS-DEFAULTS-LABEL: foo7
|
||||
; MFS-DEFAULTS: .section .text.unlikely.foo7,"ax",@progbits
|
||||
; MFS-DEFAULTS-NEXT: foo7.cold:
|
||||
; MFS-DEFAULTS-NOT: callq _Unwind_Resume
|
||||
; MFS-DEFAULTS: callq baz
|
||||
entry:
|
||||
invoke void @_Z1fv()
|
||||
to label %try.cont unwind label %lpad
|
||||
|
||||
lpad:
|
||||
%1 = landingpad { i8*, i32 }
|
||||
cleanup
|
||||
catch i8* bitcast (i8** @_ZTIi to i8*)
|
||||
resume { i8*, i32 } %1
|
||||
|
||||
try.cont:
|
||||
br i1 %0, label %2, label %4, !prof !17
|
||||
|
||||
2: ; preds = try.cont
|
||||
%3 = call i32 @bar()
|
||||
br label %6
|
||||
|
||||
4: ; preds = %1
|
||||
%5 = call i32 @baz()
|
||||
br label %6
|
||||
|
||||
6: ; preds = %4, %2
|
||||
%7 = tail call i32 @qux()
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
declare i32 @bar()
|
||||
declare i32 @baz()
|
||||
declare i32 @bam()
|
||||
declare i32 @qux()
|
||||
declare void @_Z1fv()
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
||||
@_ZTIi = external constant i8*
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 5}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999900, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 7000}
|
||||
!15 = !{!"function_section_prefix", !".hot"}
|
||||
!16 = !{!"function_section_prefix", !".unlikely"}
|
||||
!17 = !{!"branch_weights", i32 7000, i32 0}
|
||||
!18 = !{!"branch_weights", i32 3000, i32 4000}
|
||||
!19 = !{!"branch_weights", i32 1000, i32 6000}
|
||||
!20 = !{!"function_entry_count", i64 10000}
|
||||
!21 = !{!"branch_weights", i32 6000, i32 4000}
|
||||
!22 = !{!"branch_weights", i32 80, i32 9920}
|
||||
!23 = !{!"function_entry_count", i64 7}
|
Loading…
Reference in New Issue