From bf899e891387d07dfd12de195ce2a16f62afd5e0 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Tue, 15 Sep 2020 18:05:38 -0500 Subject: [PATCH] [IRSim][IROutliner] Adding the extraction basics for the IROutliner. Extracting the similar regions is the first step in the IROutliner. Using the IRSimilarityIdentifier, we collect the SimilarityGroups and sort them by how many instructions will be removed. Each IRSimilarityCandidate is used to define an OutlinableRegion. Each region is ordered by their occurrence in the Module and the regions that are not compatible with previously outlined regions are discarded. Each region is then extracted with the CodeExtractor into its own function. We test that correctly extract in: test/Transforms/IROutliner/extraction.ll test/Transforms/IROutliner/address-taken.ll test/Transforms/IROutliner/outlining-same-globals.ll test/Transforms/IROutliner/outlining-same-constants.ll test/Transforms/IROutliner/outlining-different-structure.ll Reviewers: paquette, jroelofs, yroux Differential Revision: https://reviews.llvm.org/D86975 --- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/Transforms/IPO.h | 5 + llvm/include/llvm/Transforms/IPO/IROutliner.h | 186 +++++++++ llvm/lib/Passes/PassBuilder.cpp | 9 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/IPO.cpp | 1 + llvm/lib/Transforms/IPO/IROutliner.cpp | 369 ++++++++++++++++++ .../lib/Transforms/IPO/PassManagerBuilder.cpp | 6 + llvm/test/Transforms/IROutliner/extraction.ll | 127 ++++++ .../IROutliner/outlining-address-taken.ll | 91 +++++ .../outlining-different-structure.ll | 68 ++++ .../IROutliner/outlining-same-constants.ll | 67 ++++ .../IROutliner/outlining-same-globals.ll | 48 +++ 14 files changed, 980 insertions(+) create mode 100644 llvm/include/llvm/Transforms/IPO/IROutliner.h create mode 100644 llvm/lib/Transforms/IPO/IROutliner.cpp create mode 100644 llvm/test/Transforms/IROutliner/extraction.ll create mode 100644 llvm/test/Transforms/IROutliner/outlining-address-taken.ll create mode 100644 llvm/test/Transforms/IROutliner/outlining-different-structure.ll create mode 100644 llvm/test/Transforms/IROutliner/outlining-same-constants.ll create mode 100644 llvm/test/Transforms/IROutliner/outlining-same-globals.ll diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 0d0eaf0bca83..0ec87fb09e49 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -183,6 +183,7 @@ void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIRCELegacyPassPass(PassRegistry&); +void initializeIROutlinerLegacyPassPass(PassRegistry&); void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&); void initializeIRTranslatorPass(PassRegistry&); void initializeIVUsersWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 1918ad76a270..af357181597a 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -215,6 +215,11 @@ ModulePass *createMergeFunctionsPass(); /// function(s). ModulePass *createHotColdSplittingPass(); +//===----------------------------------------------------------------------===// +/// createIROutlinerPass - This pass finds similar code regions and factors +/// those regions out into functions. +ModulePass *createIROutlinerPass(); + //===----------------------------------------------------------------------===// /// createPartialInliningPass - This pass inlines parts of functions. /// diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h new file mode 100644 index 000000000000..dd169602f004 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -0,0 +1,186 @@ +//===- IROutliner.h - Extract similar IR regions into functions ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// The interface file for the IROutliner which is used by the IROutliner Pass. +// +// The outliner uses the IRSimilarityIdentifier to identify the similar regions +// of code. It evaluates each set of IRSimilarityCandidates with an estimate of +// whether it will provide code size reduction. Each region is extracted using +// the code extractor. These extracted functions are consolidated into a single +// function and called from the extracted call site. +// +// For example: +// \code +// %1 = add i32 %a, %b +// %2 = add i32 %b, %a +// %3 = add i32 %b, %a +// %4 = add i32 %a, %b +// \endcode +// would become function +// \code +// define internal void outlined_ir_function(i32 %0, i32 %1) { +// %1 = add i32 %0, %1 +// %2 = add i32 %1, %0 +// ret void +// } +// \endcode +// with calls: +// \code +// call void outlined_ir_function(i32 %a, i32 %b) +// call void outlined_ir_function(i32 %b, i32 %a) +// \endcode +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H +#define LLVM_TRANSFORMS_IPO_IROUTLINER_H + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include + +struct OutlinableGroup; + +namespace llvm { +using namespace IRSimilarity; + +class Module; +class TargetTransformInfo; +class OptimizationRemarkEmitter; + +/// The OutlinableRegion holds all the information for a specific region, or +/// sequence of instructions. This includes what values need to be hoisted to +/// arguments from the extracted function, inputs and outputs to the region, and +/// mapping from the extracted function arguments to overall function arguments. +struct OutlinableRegion { + /// Describes the region of code. + IRSimilarityCandidate *Candidate; + + /// If this region is outlined, the front and back IRInstructionData could + /// potentially become invalidated if the only new instruction is a call. + /// This ensures that we replace in the instruction in the IRInstructionData. + IRInstructionData *NewFront = nullptr; + IRInstructionData *NewBack = nullptr; + + /// Used to create an outlined function. + CodeExtractor *CE = nullptr; + + /// The call site of the extracted region. + CallInst *Call = nullptr; + + /// The function for the extracted region. + Function *ExtractedFunction = nullptr; + + /// Flag for whether we have split out the IRSimilarityCanidate. That is, + /// make the region contained the IRSimilarityCandidate its own BasicBlock. + bool CandidateSplit = false; + + /// Flag for whether we should not consider this region for extraction. + bool IgnoreRegion = false; + + /// The BasicBlock that is before the start of the region BasicBlock, + /// only defined when the region has been split. + BasicBlock *PrevBB = nullptr; + + /// The BasicBlock that contains the starting instruction of the region. + BasicBlock *StartBB = nullptr; + + /// The BasicBlock that contains the ending instruction of the region. + BasicBlock *EndBB = nullptr; + + /// The BasicBlock that is after the start of the region BasicBlock, + /// only defined when the region has been split. + BasicBlock *FollowBB = nullptr; + + /// The Outlinable Group that contains this region and structurally similar + /// regions to this region. + OutlinableGroup *Parent = nullptr; + + OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group) + : Candidate(&C), Parent(&Group) { + StartBB = C.getStartBB(); + EndBB = C.getEndBB(); + } + + /// For the contained region, split the parent BasicBlock at the starting and + /// ending instructions of the contained IRSimilarityCandidate. + void splitCandidate(); + + /// For the contained region, reattach the BasicBlock at the starting and + /// ending instructions of the contained IRSimilarityCandidate, or if the + /// function has been extracted, the start and end of the BasicBlock + /// containing the called function. + void reattachCandidate(); +}; + +/// This class is a pass that identifies similarity in a Module, extracts +/// instances of the similarity, and then consolidating the similar regions +/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass +/// to identify the similar regions of code, and then extracts the similar +/// sections into a single function. See the above for an example as to +/// how code is extracted and consolidated into a single function. +class IROutliner { +public: + IROutliner(function_ref GTTI, + function_ref GIRSI) + : getTTI(GTTI), getIRSI(GIRSI) {} + bool run(Module &M); + +private: + /// Find repeated similar code sequences in \p M and outline them into new + /// Functions. + /// + /// \param [in] M - The module to outline from. + /// \returns The number of Functions created. + unsigned doOutline(Module &M); + + /// Remove all the IRSimilarityCandidates from \p CandidateVec that have + /// instructions contained in a previously outlined region and put the + /// remaining regions in \p CurrentGroup. + /// + /// \param [in] CandidateVec - List of similarity candidates for regions with + /// the same similarity structure. + /// \param [in,out] CurrentGroup - Contains the potential sections to + /// be outlined. + void + pruneIncompatibleRegions(std::vector &CandidateVec, + OutlinableGroup &CurrentGroup); + + /// Extract \p Region into its own function. + /// + /// \param [in] Region - The region to be extracted into its own function. + /// \returns True if it was successfully outlined. + bool extractSection(OutlinableRegion &Region); + + /// The set of outlined Instructions, identified by their location in the + /// sequential ordering of instructions in a Module. + DenseSet Outlined; + + /// TargetTransformInfo lambda for target specific information. + function_ref getTTI; + + /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier. + function_ref getIRSI; + + /// The memory allocator used to allocate the OutlinableRegions and + /// CodeExtractors. + BumpPtrAllocator OutlinerAllocator; +}; + +/// Pass to outline similar regions. +class IROutlinerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 2a9b0c1e4f6b..528f40b1de85 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -103,6 +103,7 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/IPO/IROutliner.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" @@ -279,6 +280,7 @@ extern cl::opt EnableConstraintElimination; extern cl::opt EnableGVNHoist; extern cl::opt EnableGVNSink; extern cl::opt EnableHotColdSplit; +extern cl::opt EnableIROutliner; extern cl::opt EnableOrderFileInstrumentation; extern cl::opt EnableCHR; extern cl::opt EnableUnrollAndJam; @@ -1256,6 +1258,13 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, if (EnableHotColdSplit && !LTOPreLink) MPM.addPass(HotColdSplittingPass()); + // Search the code for similar regions of code. If enough similar regions can + // be found where extracting the regions into their own function will decrease + // the size of the program, we extract the regions, a deduplicate the + // structurally similar regions. + if (EnableIROutliner) + MPM.addPass(IROutlinerPass()); + // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 7f0f51ad0977..c9bc0553ebcb 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -68,6 +68,7 @@ MODULE_PASS("instrprof", InstrProfiling()) MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) +MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) MODULE_PASS("loop-extract", LoopExtractorPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 66e772c61b0a..840c1aabecbb 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_component_library(LLVMipo GlobalSplit.cpp HotColdSplitting.cpp IPO.cpp + IROutliner.cpp InferFunctionAttrs.cpp InlineSimple.cpp Inliner.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index 52c4d3229bb7..f4c12dd7f4cd 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeGlobalOptLegacyPassPass(Registry); initializeGlobalSplitPass(Registry); initializeHotColdSplittingLegacyPassPass(Registry); + initializeIROutlinerLegacyPassPass(Registry); initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp new file mode 100644 index 000000000000..f5fdad76df9b --- /dev/null +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -0,0 +1,369 @@ +//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +// Implementation for the IROutliner which is used by the IROutliner Pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/IROutliner.h" +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO.h" +#include +#include +#include + +#define DEBUG_TYPE "iroutliner" + +using namespace llvm; +using namespace IRSimilarity; + +/// The OutlinableGroup holds all the overarching information for outlining +/// a set of regions that are structurally similar to one another, such as the +/// types of the overall function, the output blocks, the sets of stores needed +/// and a list of the different regions. This information is used in the +/// deduplication of extracted regions with the same structure. +struct OutlinableGroup { + /// The sections that could be outlined + std::vector Regions; + + /// Flag for whether we should not consider this group of OutlinableRegions + /// for extraction. + bool IgnoreGroup = false; +}; + +/// Move the contents of \p SourceBB to before the last instruction of \p +/// TargetBB. +/// \param SourceBB - the BasicBlock to pull Instructions from. +/// \param TargetBB - the BasicBlock to put Instruction into. +static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) { + BasicBlock::iterator BBCurr, BBEnd, BBNext; + for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd; + BBCurr = BBNext) { + BBNext = std::next(BBCurr); + BBCurr->moveBefore(TargetBB, TargetBB.end()); + } +} + +void OutlinableRegion::splitCandidate() { + assert(!CandidateSplit && "Candidate already split!"); + + Instruction *StartInst = (*Candidate->begin()).Inst; + Instruction *EndInst = (*Candidate->end()).Inst; + assert(StartInst && EndInst && "Expected a start and end instruction?"); + StartBB = StartInst->getParent(); + PrevBB = StartBB; + + // The basic block gets split like so: + // block: block: + // inst1 inst1 + // inst2 inst2 + // region1 br block_to_outline + // region2 block_to_outline: + // region3 -> region1 + // region4 region2 + // inst3 region3 + // inst4 region4 + // br block_after_outline + // block_after_outline: + // inst3 + // inst4 + + std::string OriginalName = PrevBB->getName().str(); + + StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); + + // This is the case for the inner block since we do not have to include + // multiple blocks. + EndBB = StartBB; + FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + + CandidateSplit = true; +} + +void OutlinableRegion::reattachCandidate() { + assert(CandidateSplit && "Candidate is not split!"); + + // The basic block gets reattached like so: + // block: block: + // inst1 inst1 + // inst2 inst2 + // br block_to_outline region1 + // block_to_outline: -> region2 + // region1 region3 + // region2 region4 + // region3 inst3 + // region4 inst4 + // br block_after_outline + // block_after_outline: + // inst3 + // inst4 + assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); + assert(FollowBB != nullptr && "StartBB for Candidate is not defined!"); + + // StartBB should only have one predecessor since we put an unconditional + // branch at the end of PrevBB when we split the BasicBlock. + PrevBB = StartBB->getSinglePredecessor(); + assert(PrevBB != nullptr && + "No Predecessor for the region start basic block!"); + + assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); + assert(EndBB->getTerminator() && "Terminator removed from EndBB!"); + PrevBB->getTerminator()->eraseFromParent(); + EndBB->getTerminator()->eraseFromParent(); + + moveBBContents(*StartBB, *PrevBB); + + BasicBlock *PlacementBB = PrevBB; + if (StartBB != EndBB) + PlacementBB = EndBB; + moveBBContents(*FollowBB, *PlacementBB); + + PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); + PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); + StartBB->eraseFromParent(); + FollowBB->eraseFromParent(); + + // Make sure to save changes back to the StartBB. + StartBB = PrevBB; + EndBB = nullptr; + PrevBB = nullptr; + FollowBB = nullptr; + + CandidateSplit = false; +} + +void IROutliner::pruneIncompatibleRegions( + std::vector &CandidateVec, + OutlinableGroup &CurrentGroup) { + bool PreviouslyOutlined; + + // Sort from beginning to end, so the IRSimilarityCandidates are in order. + stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS, + const IRSimilarityCandidate &RHS) { + return LHS.getStartIdx() < RHS.getStartIdx(); + }); + + unsigned CurrentEndIdx = 0; + for (IRSimilarityCandidate &IRSC : CandidateVec) { + PreviouslyOutlined = false; + unsigned StartIdx = IRSC.getStartIdx(); + unsigned EndIdx = IRSC.getEndIdx(); + + for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) + if (Outlined.find(Idx) != Outlined.end()) { + PreviouslyOutlined = true; + break; + } + + if (PreviouslyOutlined) + continue; + + // TODO: If in the future we can outline across BasicBlocks, we will need to + // check all BasicBlocks contained in the region. + if (IRSC.getStartBB()->hasAddressTaken()) + continue; + + // Greedily prune out any regions that will overlap with already chosen + // regions. + if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx) + continue; + + OutlinableRegion *OS = new (OutlinerAllocator.Allocate()) + OutlinableRegion(IRSC, CurrentGroup); + CurrentGroup.Regions.push_back(OS); + + CurrentEndIdx = EndIdx; + } +} + +bool IROutliner::extractSection(OutlinableRegion &Region) { + assert(Region.StartBB != nullptr && + "StartBB for the OutlinableRegion is nullptr!"); + assert(Region.FollowBB != nullptr && + "StartBB for the OutlinableRegion is nullptr!"); + Function *OrigF = Region.StartBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OrigF); + Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC); + + // If the extraction was successful, find the BasicBlock, and reassign the + // OutlinableRegion blocks + if (!Region.ExtractedFunction) { + LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB + << "\n"); + Region.reattachCandidate(); + return false; + } + + BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor(); + Region.StartBB = RewrittenBB; + Region.EndBB = RewrittenBB; + + // The sequences of outlinable regions has now changed. We must fix the + // IRInstructionDataList for consistency. Although they may not be illegal + // instructions, they should not be compared with anything else as they + // should not be outlined in this round. So marking these as illegal is + // allowed. + IRInstructionDataList *IDL = Region.Candidate->front()->IDL; + Region.NewFront = new (OutlinerAllocator.Allocate()) + IRInstructionData(*RewrittenBB->begin(), false, *IDL); + Region.NewBack = new (OutlinerAllocator.Allocate()) + IRInstructionData(*std::prev(std::prev(RewrittenBB->end())), false, *IDL); + + // Insert the first IRInstructionData of the new region in front of the + // first IRInstructionData of the IRSimilarityCandidate. + IDL->insert(Region.Candidate->begin(), *Region.NewFront); + // Insert the first IRInstructionData of the new region after the + // last IRInstructionData of the IRSimilarityCandidate. + IDL->insert(Region.Candidate->end(), *Region.NewBack); + // Remove the IRInstructionData from the IRSimilarityCandidate. + IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end())); + + assert(RewrittenBB != nullptr && + "Could not find a predecessor after extraction!"); + + // Iterate over the new set of instructions to find the new call + // instruction. + for (Instruction &I : *RewrittenBB) + if (CallInst *CI = dyn_cast(&I)) + if (Region.ExtractedFunction == CI->getCalledFunction()) { + Region.Call = CI; + break; + } + Region.reattachCandidate(); + return true; +} + +unsigned IROutliner::doOutline(Module &M) { + // Find the possibile similarity sections. + IRSimilarityIdentifier &Identifier = getIRSI(M); + SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); + + // Sort them by size of extracted sections + unsigned OutlinedFunctionNum = 0; + // If we only have one SimilarityGroup in SimilarityCandidates, we do not have + // to sort them by the potential number of instructions to be outlined + if (SimilarityCandidates.size() > 1) + llvm::stable_sort(SimilarityCandidates, + [](const std::vector &LHS, + const std::vector &RHS) { + return LHS[0].getLength() * LHS.size() > + RHS[0].getLength() * RHS.size(); + }); + + // Iterate over the possible sets of similarity. + for (SimilarityGroup &CandidateVec : SimilarityCandidates) { + OutlinableGroup CurrentGroup; + + // Remove entries that were previously outlined + pruneIncompatibleRegions(CandidateVec, CurrentGroup); + + // We pruned the number of regions to 0 to 1, meaning that it's not worth + // trying to outlined since there is no compatible similar instance of this + // code. + if (CurrentGroup.Regions.size() < 2) + continue; + + // Create a CodeExtractor for each outlinable region. + for (OutlinableRegion *OS : CurrentGroup.Regions) { + // Break the outlinable region out of its parent BasicBlock into its own + // BasicBlocks (see function implementation). + OS->splitCandidate(); + std::vector BE = {OS->StartBB}; + OS->CE = new (OutlinerAllocator.Allocate()) + CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, + false, "outlined"); + } + + // Create functions out of all the sections, and mark them as outlined + std::vector OutlinedRegions; + for (OutlinableRegion *OS : CurrentGroup.Regions) { + OutlinedFunctionNum++; + bool FunctionOutlined = extractSection(*OS); + if (FunctionOutlined) { + unsigned StartIdx = OS->Candidate->getStartIdx(); + unsigned EndIdx = OS->Candidate->getEndIdx(); + for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++) + Outlined.insert(Idx); + + OutlinedRegions.push_back(OS); + } + } + + CurrentGroup.Regions = std::move(OutlinedRegions); + } + + return OutlinedFunctionNum; +} + +bool IROutliner::run(Module &M) { return doOutline(M) > 0; } + +// Pass Manager Boilerplate +class IROutlinerLegacyPass : public ModulePass { +public: + static char ID; + IROutlinerLegacyPass() : ModulePass(ID) { + initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + + bool runOnModule(Module &M) override; +}; + +bool IROutlinerLegacyPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + auto GTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + + auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & { + return this->getAnalysis().getIRSI(); + }; + + return IROutliner(GTTI, GIRSI).run(M); +} + +PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) { + auto &FAM = AM.getResult(M).getManager(); + + std::function GTTI = + [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + + std::function GIRSI = + [&AM](Module &M) -> IRSimilarityIdentifier & { + return AM.getResult(M); + }; + + if (IROutliner(GTTI, GIRSI).run(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +char IROutlinerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, + false) +INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, + false) + +ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index e03015f8b0aa..14a265215e2f 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -107,6 +107,9 @@ static cl::opt cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); +cl::opt EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, + cl::desc("Enable ir outliner pass")); + static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); @@ -873,6 +876,9 @@ void PassManagerBuilder::populateModulePassManager( if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) MPM.add(createHotColdSplittingPass()); + if (EnableIROutliner) + MPM.add(createIROutlinerPass()); + if (MergeFunctions) MPM.add(createMergeFunctionsPass()); diff --git a/llvm/test/Transforms/IROutliner/extraction.ll b/llvm/test/Transforms/IROutliner/extraction.ll new file mode 100644 index 000000000000..ec0f0ea4854e --- /dev/null +++ b/llvm/test/Transforms/IROutliner/extraction.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test makes sure we are extracting the found similarity sections +; correctly at the call site. + +define void @extract1() { +; CHECK-LABEL: @extract1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @extract1.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @extract2() { +; CHECK-LABEL: @extract2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @extract2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @extract_outs1() #0 { +; CHECK-LABEL: @extract_outs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) +; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 +; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %add = add i32 %0, %1 + store i32 %add, i32* %output, align 4 + %2 = load i32, i32* %output, align 4 + %3 = load i32, i32* %output, align 4 + %mul = mul i32 %2, %add + store i32 %mul, i32* %result, align 4 + ret void +} + +define void @extract_outs2() #0 { +; CHECK-LABEL: @extract_outs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) +; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 +; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %add = add i32 %0, %1 + store i32 %add, i32* %output, align 4 + %2 = load i32, i32* %output, align 4 + %mul = mul i32 %2, %add + store i32 %mul, i32* %result, align 4 + ret void +} diff --git a/llvm/test/Transforms/IROutliner/outlining-address-taken.ll b/llvm/test/Transforms/IROutliner/outlining-address-taken.ll new file mode 100644 index 000000000000..dbae8e8230d6 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-address-taken.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test shows that we do not outline from basic blocks with their address +; taken. + +@ba1 = constant i8* blockaddress (@dontoutline, %new_block) + +define void @outline_1() { +; CHECK-LABEL: @outline_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_2() { +; CHECK-LABEL: @outline_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @[[FUNCTION_2:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @dontoutline() { +; CHECK-LABEL: @dontoutline( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[NEW_BLOCK:%.*]] +; CHECK: new_block: +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + br label %new_block +new_block: + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_2]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outlining-different-structure.ll b/llvm/test/Transforms/IROutliner/outlining-different-structure.ll new file mode 100644 index 000000000000..bb4af26bf990 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-different-structure.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This is a negative case to show that when we have the same set of +; instructions, but in a different order, they are not outlined in the same way. +; In this case, the arguments passed into the function are in a different order. + +define void @outline_constants1() { +; CHECK-LABEL: @outline_constants1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[C]], i32* [[B]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %cl = load i32, i32* %c + %bl = load i32, i32* %b + ret void +} + +define void @outline_constants2() { +; CHECK-LABEL: @outline_constants2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 + +; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outlining-same-constants.ll b/llvm/test/Transforms/IROutliner/outlining-same-constants.ll new file mode 100644 index 000000000000..e6228abc3d28 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-same-constants.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the constants in the regions, and if it they are the +; same it outlines them as constants rather than elevating them to arguments. + +define void @outline_constants1() { +; CHECK-LABEL: @outline_constants1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_constants2() { +; CHECK-LABEL: @outline_constants2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 + +; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 diff --git a/llvm/test/Transforms/IROutliner/outlining-same-globals.ll b/llvm/test/Transforms/IROutliner/outlining-same-globals.ll new file mode 100644 index 000000000000..9e28cc919010 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-same-globals.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +@global1 = global i32 1, align 4 +@global2 = global i32 2, align 4 + +; This test looks at the globals in the regions, and if it they are the +; same it outlines the region without elevating the globals to arguments. + +define void @outline_globals1() { +; CHECK-LABEL: @outline_globals1( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @[[FUNCTION_0:.*]]() +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global1 + %1 = load i32, i32* @global2 + %2 = add i32 %0, %1 + ret void +} + +define void @outline_globals2() { +; CHECK-LABEL: @outline_globals2( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @[[FUNCTION_1:.*]]() +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global1 + %1 = load i32, i32* @global2 + %2 = add i32 %0, %1 + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]]() +; CHECK: entry_to_outline: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]] + + +; CHECK: define internal void @[[FUNCTION_1]]() +; CHECK: entry_to_outline: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]] +