diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h new file mode 100644 index 000000000000..52a8617cd879 --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -0,0 +1,27 @@ +//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H +#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H + +#include "llvm/ADT/STLExtras.h" + +namespace llvm { + +class MachineFunction; +class MachineBasicBlock; + +using MachineBasicBlockComparator = + function_ref; + +void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, + MachineBasicBlockComparator MBBCmp); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 39df106b97ef..2061d6a6cbd2 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -114,6 +114,8 @@ bool getEnableAddrsig(); bool getEmitCallSiteInfo(); +bool getEnableMachineFunctionSplitter(); + bool getEnableDebugEntryValues(); bool getValueTrackingVariableLocations(); diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index d5100d8c7b71..0ea2da9910f3 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -494,7 +494,8 @@ public: /// Returns true if this function has basic block sections enabled. bool hasBBSections() const { return (BBSectionsType == BasicBlockSection::All || - BBSectionsType == BasicBlockSection::List); + BBSectionsType == BasicBlockSection::List || + BBSectionsType == BasicBlockSection::Preset); } /// Returns true if basic block labels are to be generated for this function. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index ccec44edeabb..6bd1b553c506 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -50,6 +50,10 @@ namespace llvm { /// selectively enable basic block sections. MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); + /// createMachineFunctionSplitterPass - This pass splits machine functions + /// using profile information. + MachineFunctionPass *createMachineFunctionSplitterPass(); + /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index e8637b5cd454..48ee4c2fcc84 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -280,6 +280,7 @@ void initializeMachineCopyPropagationPass(PassRegistry&); void initializeMachineDominanceFrontierPass(PassRegistry&); void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); +void initializeMachineFunctionSplitterPass(PassRegistry &); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 063fa9375114..96c7e8b42cdd 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -67,6 +67,9 @@ namespace llvm { Labels, // Do not use Basic Block Sections but label basic blocks. This // is useful when associating profile counts from virtual addresses // to basic blocks. + Preset, // Similar to list but the blocks are identified by passes which + // seek to use Basic Block Sections, e.g. MachineFunctionSplitter. + // This option cannot be set via the command line. None // Do not use Basic Block Sections. }; @@ -124,10 +127,11 @@ namespace llvm { TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), - SupportsDefaultOutlining(false), EmitAddrsig(false), - EmitCallSiteInfo(false), SupportsDebugEntryValues(false), - EnableDebugEntryValues(false), ValueTrackingVariableLocations(false), - ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), + EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), + EmitAddrsig(false), EmitCallSiteInfo(false), + SupportsDebugEntryValues(false), EnableDebugEntryValues(false), + ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), + XRayOmitFunctionIndex(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -257,6 +261,9 @@ namespace llvm { /// Enables the MachineOutliner pass. unsigned EnableMachineOutliner : 1; + /// Enables the MachineFunctionSplitter pass. + unsigned EnableMachineFunctionSplitter : 1; + /// Set if the target supports default outlining behaviour. unsigned SupportsDefaultOutlining : 1; diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp index cd8173eb753d..a3c366004c7f 100644 --- a/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -69,6 +69,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -226,9 +227,9 @@ static bool getBBClusterInfoForFunction( // and "Cold" succeeding all other clusters. // FuncBBClusterInfo represent the cluster information for basic blocks. If this // is empty, it means unique sections for all basic blocks in the function. -static bool assignSectionsAndSortBasicBlocks( - MachineFunction &MF, - const std::vector> &FuncBBClusterInfo) { +static void +assignSections(MachineFunction &MF, + const std::vector> &FuncBBClusterInfo) { assert(MF.hasBBSections() && "BB Sections is not set for function."); // This variable stores the section ID of the cluster containing eh_pads (if // all eh_pads are one cluster). If more than one cluster contain eh_pads, we @@ -271,47 +272,16 @@ static bool assignSectionsAndSortBasicBlocks( for (auto &MBB : MF) if (MBB.isEHPad()) MBB.setSectionID(EHPadsSectionID.getValue()); +} +void llvm::sortBasicBlocksAndUpdateBranches( + MachineFunction &MF, MachineBasicBlockComparator MBBCmp) { SmallVector PreLayoutFallThroughs( MF.getNumBlockIDs()); for (auto &MBB : MF) PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough(); - // We make sure that the cluster including the entry basic block precedes all - // other clusters. - auto EntryBBSectionID = MF.front().getSectionID(); - - // Helper function for ordering BB sections as follows: - // * Entry section (section including the entry block). - // * Regular sections (in increasing order of their Number). - // ... - // * Exception section - // * Cold section - auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS, - const MBBSectionID &RHS) { - // We make sure that the section containing the entry block precedes all the - // other sections. - if (LHS == EntryBBSectionID || RHS == EntryBBSectionID) - return LHS == EntryBBSectionID; - return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type; - }; - - // We sort all basic blocks to make sure the basic blocks of every cluster are - // contiguous and ordered accordingly. Furthermore, clusters are ordered in - // increasing order of their section IDs, with the exception and the - // cold section placed at the end of the function. - MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { - auto XSectionID = X.getSectionID(); - auto YSectionID = Y.getSectionID(); - if (XSectionID != YSectionID) - return MBBSectionOrder(XSectionID, YSectionID); - // If the two basic block are in the same section, the order is decided by - // their position within the section. - if (XSectionID.Type == MBBSectionID::SectionType::Default) - return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < - FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; - return X.getNumber() < Y.getNumber(); - }); + MF.sort(MBBCmp); // Set IsBeginSection and IsEndSection according to the assigned section IDs. MF.assignBeginEndSections(); @@ -320,8 +290,6 @@ static bool assignSectionsAndSortBasicBlocks( // insert explicit fallthrough branches when required and optimize branches // when possible. updateBranches(MF, PreLayoutFallThroughs); - - return true; } bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { @@ -347,7 +315,46 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; MF.setBBSectionsType(BBSectionsType); MF.createBBLabels(); - assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo); + assignSections(MF, FuncBBClusterInfo); + + // We make sure that the cluster including the entry basic block precedes all + // other clusters. + auto EntryBBSectionID = MF.front().getSectionID(); + + // Helper function for ordering BB sections as follows: + // * Entry section (section including the entry block). + // * Regular sections (in increasing order of their Number). + // ... + // * Exception section + // * Cold section + auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS, + const MBBSectionID &RHS) { + // We make sure that the section containing the entry block precedes all the + // other sections. + if (LHS == EntryBBSectionID || RHS == EntryBBSectionID) + return LHS == EntryBBSectionID; + return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type; + }; + + // We sort all basic blocks to make sure the basic blocks of every cluster are + // contiguous and ordered accordingly. Furthermore, clusters are ordered in + // increasing order of their section IDs, with the exception and the + // cold section placed at the end of the function. + auto Comparator = [&](const MachineBasicBlock &X, + const MachineBasicBlock &Y) { + auto XSectionID = X.getSectionID(); + auto YSectionID = Y.getSectionID(); + if (XSectionID != YSectionID) + return MBBSectionOrder(XSectionID, YSectionID); + // If the two basic block are in the same section, the order is decided by + // their position within the section. + if (XSectionID.Type == MBBSectionID::SectionType::Default) + return FuncBBClusterInfo[X.getNumber()]->PositionInCluster < + FuncBBClusterInfo[Y.getNumber()]->PositionInCluster; + return X.getNumber() < Y.getNumber(); + }; + + sortBasicBlocksAndUpdateBranches(MF, Comparator); return true; } diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index a90cb455858a..40512013e4e1 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -81,6 +81,7 @@ add_llvm_component_library(LLVMCodeGen MachineFunction.cpp MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp + MachineFunctionSplitter.cpp MachineInstrBundle.cpp MachineInstr.cpp MachineLICM.cpp diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index a47608cf6dd9..0a7c7a9ad025 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -84,6 +84,7 @@ CGOPT(DebuggerKind, DebuggerTuningOpt) CGOPT(bool, EnableStackSizeSection) CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) +CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) CGOPT(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) @@ -407,6 +408,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(ValueTrackingVariableLocations); + static cl::opt EnableMachineFunctionSplitter( + "split-machine-functions", + cl::desc("Split out cold basic blocks from machine functions based on " + "profile information"), + cl::init(false)); + CGBINDOPT(EnableMachineFunctionSplitter); + static cl::opt ForceDwarfFrameSection( "force-dwarf-frame-section", cl::desc("Always emit a debug frame section."), cl::init(false)); @@ -479,6 +487,7 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; Options.ExceptionModel = getExceptionModel(); Options.EmitStackSizeSection = getEnableStackSizeSection(); + Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter(); Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp new file mode 100644 index 000000000000..1a1bd901c92d --- /dev/null +++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -0,0 +1,148 @@ +//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Uses profile information to split out cold blocks. +// +// This pass splits out cold machine basic blocks from the parent function. This +// implementation leverages the basic block section framework. Blocks marked +// cold by this pass are grouped together in a separate section prefixed with +// ".text.unlikely.*". The linker can then group these together as a cold +// section. The split part of the function is a contiguous region identified by +// the symbol "foo.cold". Grouping all cold blocks across functions together +// decreases fragmentation and improves icache and itlb utilization. Note that +// the overall changes to the binary size are negligible; only a small number of +// additional jump instructions may be introduced. +// +// For the original RFC of this pass please see +// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/BasicBlockSectionUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt + PercentileCutoff("mfs-psi-cutoff", + cl::desc("Percentile profile summary cutoff used to " + "determine cold blocks. Unused if set to zero."), + cl::init(0), cl::Hidden); + +static cl::opt ColdCountThreshold( + "mfs-count-threshold", + cl::desc( + "Minimum number of times a block must be executed to be retained."), + cl::init(1), cl::Hidden); + +namespace { + +class MachineFunctionSplitter : public MachineFunctionPass { +public: + static char ID; + MachineFunctionSplitter() : MachineFunctionPass(ID) { + initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Machine Function Splitter Transformation"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &F) override; +}; +} // end anonymous namespace + +static bool isColdBlock(MachineBasicBlock &MBB, + const MachineBlockFrequencyInfo *MBFI, + ProfileSummaryInfo *PSI) { + Optional Count = MBFI->getBlockProfileCount(&MBB); + if (!Count.hasValue()) + return true; + + if (PercentileCutoff > 0) { + return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); + } + return (*Count < ColdCountThreshold); +} + +bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { + // TODO: We only target functions with profile data. Static information may + // also be considered but we don't see performance improvements yet. + if (!MF.getFunction().hasProfileData()) + return false; + + // TODO: We don't split functions where a section attribute has been set + // since the split part may not be placed in a contiguous region. It may also + // be more beneficial to augment the linker to ensure contiguous layout of + // split functions within the same section as specified by the attribute. + if (!MF.getFunction().getSection().empty()) + return false; + + // We don't want to proceed further for cold functions + // or functions of unknown hotness. Lukewarm functions have no prefix. + Optional SectionPrefix = MF.getFunction().getSectionPrefix(); + if (SectionPrefix.hasValue() && + (SectionPrefix.getValue().equals(".unlikely") || + SectionPrefix.getValue().equals(".unknown"))) { + return false; + } + + // Renumbering blocks here preserves the order of the blocks as + // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort + // blocks. Preserving the order of blocks is essential to retaining decisions + // made by prior passes such as MachineBlockPlacement. + MF.RenumberBlocks(); + MF.setBBSectionsType(BasicBlockSection::Preset); + auto *MBFI = &getAnalysis(); + auto *PSI = &getAnalysis().getPSI(); + + for (auto &MBB : MF) { + // FIXME: We retain the entry block and conservatively keep all landing pad + // blocks as part of the original function. Once D73739 is submitted, we can + // improve the handling of ehpads. + if ((MBB.pred_empty() || MBB.isEHPad())) + continue; + if (isColdBlock(MBB, MBFI, PSI)) + MBB.setSectionID(MBBSectionID::ColdSectionID); + } + + auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { + return X.getSectionID().Type < Y.getSectionID().Type; + }; + llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); + + return true; +} + +void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +char MachineFunctionSplitter::ID = 0; +INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", + "Split machine functions using profile information", false, + false) + +MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { + return new MachineFunctionSplitter(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 9cdc828c7fdd..19db8eb480ca 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -212,6 +212,12 @@ static cl::opt cl::desc("Stop compilation before a specific pass"), cl::value_desc("pass-name"), cl::init(""), cl::Hidden); +/// Enable the machine function splitter pass. +static cl::opt EnableMachineFunctionSplitter( + "enable-split-machine-functions", cl::Hidden, + cl::desc("Split out cold blocks from machine functions based on profile " + "information.")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1014,8 +1020,14 @@ void TargetPassConfig::addMachinePasses() { addPass(createMachineOutlinerPass(RunOnAllFunctions)); } - if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) + // Machine function splitter uses the basic block sections feature. Both + // cannot be enabled at the same time. + if (TM->Options.EnableMachineFunctionSplitter || + EnableMachineFunctionSplitter) { + addPass(createMachineFunctionSplitterPass()); + } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + } // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); diff --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll new file mode 100644 index 000000000000..ff95dafc9bcc --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll @@ -0,0 +1,218 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefix=MFS-DEFAULTS +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2 + +define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 { +;; Check that cold block is moved to .text.unlikely. +; MFS-DEFAULTS-LABEL: foo1 +; MFS-DEFAULTS: .section .text.unlikely.foo1 +; MFS-DEFAULTS-NEXT: foo1.cold: +; MFS-DEFAULTS-NOT: callq bar +; MFS-DEFAULTS-NEXT: callq baz + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define void @foo2(i1 zeroext %0) nounwind !prof !23 !section_prefix !16 { +;; Check that function marked unlikely is not split. +; MFS-DEFAULTS-LABEL: foo2 +; MFS-DEFAULTS-NOT: foo2.cold: + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define void @foo3(i1 zeroext %0) nounwind !section_prefix !15 { +;; Check that function without profile data is not split. +; MFS-DEFAULTS-LABEL: foo3 +; MFS-DEFAULTS-NOT: foo3.cold: + br i1 %0, label %2, label %4 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 { +;; Check that count threshold works. +; MFS-OPTS1-LABEL: foo4 +; MFS-OPTS1: .section .text.unlikely.foo4 +; MFS-OPTS1-NEXT: foo4.cold: +; MFS-OPTS1-NOT: callq bar +; MFS-OPTS1-NOT: callq baz +; MFS-OPTS1-NEXT: callq bam + br i1 %0, label %3, label %7, !prof !18 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !19 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = tail call i32 @qux() + ret void +} + +define void @foo5(i1 zeroext %0, i1 zeroext %1) nounwind !prof !20 { +;; Check that profile summary info cutoff works. +; MFS-OPTS2-LABEL: foo5 +; MFS-OPTS2: .section .text.unlikely.foo5 +; MFS-OPTS2-NEXT: foo5.cold: +; MFS-OPTS2-NOT: callq bar +; MFS-OPTS2-NOT: callq baz +; MFS-OPTS2-NEXT: callq bam + br i1 %0, label %3, label %7, !prof !21 + +3: + %4 = call i32 @bar() + br label %7 + +5: + %6 = call i32 @baz() + br label %7 + +7: + br i1 %1, label %8, label %10, !prof !22 + +8: + %9 = call i32 @bam() + br label %12 + +10: + %11 = call i32 @baz() + br label %12 + +12: + %13 = call i32 @qux() + ret void +} + +define void @foo6(i1 zeroext %0) nounwind section "nosplit" !prof !14 { +;; Check that function with section attribute is not split. +; MFS-DEFAULTS-LABEL: foo6 +; MFS-DEFAULTS-NOT: foo6.cold: + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = %1 + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret void +} + +define i32 @foo7(i1 zeroext %0) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !prof !14 { +;; Check that cold ehpads are not split out. +; MFS-DEFAULTS-LABEL: foo7 +; MFS-DEFAULTS: .section .text.unlikely.foo7,"ax",@progbits +; MFS-DEFAULTS-NEXT: foo7.cold: +; MFS-DEFAULTS-NOT: callq _Unwind_Resume +; MFS-DEFAULTS: callq baz +entry: + invoke void @_Z1fv() + to label %try.cont unwind label %lpad + +lpad: + %1 = landingpad { i8*, i32 } + cleanup + catch i8* bitcast (i8** @_ZTIi to i8*) + resume { i8*, i32 } %1 + +try.cont: + br i1 %0, label %2, label %4, !prof !17 + +2: ; preds = try.cont + %3 = call i32 @bar() + br label %6 + +4: ; preds = %1 + %5 = call i32 @baz() + br label %6 + +6: ; preds = %4, %2 + %7 = tail call i32 @qux() + ret i32 %7 +} + +declare i32 @bar() +declare i32 @baz() +declare i32 @bam() +declare i32 @qux() +declare void @_Z1fv() +declare i32 @__gxx_personality_v0(...) + +@_ZTIi = external constant i8* + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 10000} +!4 = !{!"MaxCount", i64 10} +!5 = !{!"MaxInternalCount", i64 1} +!6 = !{!"MaxFunctionCount", i64 1000} +!7 = !{!"NumCounts", i64 3} +!8 = !{!"NumFunctions", i64 5} +!9 = !{!"DetailedSummary", !10} +!10 = !{!11, !12, !13} +!11 = !{i32 10000, i64 100, i32 1} +!12 = !{i32 999900, i64 100, i32 1} +!13 = !{i32 999999, i64 1, i32 2} +!14 = !{!"function_entry_count", i64 7000} +!15 = !{!"function_section_prefix", !".hot"} +!16 = !{!"function_section_prefix", !".unlikely"} +!17 = !{!"branch_weights", i32 7000, i32 0} +!18 = !{!"branch_weights", i32 3000, i32 4000} +!19 = !{!"branch_weights", i32 1000, i32 6000} +!20 = !{!"function_entry_count", i64 10000} +!21 = !{!"branch_weights", i32 6000, i32 4000} +!22 = !{!"branch_weights", i32 80, i32 9920} +!23 = !{!"function_entry_count", i64 7}