forked from OSchip/llvm-project
[PGO] Profile guided code size optimization.
Summary: Enable some of the existing size optimizations for cold code under PGO. A ~5% code size saving in big internal app under PGO. The way it gets BFI/PSI is discussed in the RFC thread http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html Note it doesn't currently touch loop passes. Reviewers: davidxl, eraman Reviewed By: eraman Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59514 llvm-svn: 358422
This commit is contained in:
parent
64041d7b90
commit
09e539fcae
|
@ -55,6 +55,7 @@ class DominatorTree;
|
|||
class Function;
|
||||
class GlobalVariable;
|
||||
class Instruction;
|
||||
class ProfileSummaryInfo;
|
||||
class TargetTransformInfo;
|
||||
|
||||
/// A private "module" namespace for types and utilities used by
|
||||
|
@ -124,7 +125,8 @@ public:
|
|||
|
||||
// Glue for old PM.
|
||||
bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
|
||||
BlockFrequencyInfo *BFI, BasicBlock &Entry);
|
||||
BlockFrequencyInfo *BFI, BasicBlock &Entry,
|
||||
ProfileSummaryInfo *PSI);
|
||||
|
||||
void cleanup() {
|
||||
ClonedCastMap.clear();
|
||||
|
@ -148,6 +150,7 @@ private:
|
|||
LLVMContext *Ctx;
|
||||
const DataLayout *DL;
|
||||
BasicBlock *Entry;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
/// Keeps track of constant candidates found in the function.
|
||||
using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
|
||||
|
|
|
@ -28,6 +28,8 @@ class TargetLibraryInfo;
|
|||
class BasicBlock;
|
||||
class Function;
|
||||
class OptimizationRemarkEmitter;
|
||||
class BlockFrequencyInfo;
|
||||
class ProfileSummaryInfo;
|
||||
|
||||
/// This class implements simplifications for calls to fortified library
|
||||
/// functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to,
|
||||
|
@ -74,6 +76,8 @@ private:
|
|||
const DataLayout &DL;
|
||||
const TargetLibraryInfo *TLI;
|
||||
OptimizationRemarkEmitter &ORE;
|
||||
BlockFrequencyInfo *BFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
bool UnsafeFPShrink;
|
||||
function_ref<void(Instruction *, Value *)> Replacer;
|
||||
function_ref<void(Instruction *)> Eraser;
|
||||
|
@ -101,6 +105,7 @@ public:
|
|||
LibCallSimplifier(
|
||||
const DataLayout &DL, const TargetLibraryInfo *TLI,
|
||||
OptimizationRemarkEmitter &ORE,
|
||||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
|
||||
function_ref<void(Instruction *, Value *)> Replacer =
|
||||
&replaceAllUsesWithDefault,
|
||||
function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
//===- llvm/Transforms/Utils/SizeOpts.h - size optimization -----*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains some shared code size optimization related code.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
|
||||
#define LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class BasicBlock;
|
||||
class BlockFrequencyInfo;
|
||||
class Function;
|
||||
class ProfileSummaryInfo;
|
||||
|
||||
/// Returns true if function \p F is suggested to be size-optimized base on the
|
||||
/// profile.
|
||||
bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI);
|
||||
/// Returns true if basic block \p BB is suggested to be size-optimized base
|
||||
/// on the profile.
|
||||
bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
|
|
@ -24,11 +24,13 @@ namespace llvm {
|
|||
|
||||
class AssumptionCache;
|
||||
class BasicBlock;
|
||||
class BlockFrequencyInfo;
|
||||
class DependenceInfo;
|
||||
class DominatorTree;
|
||||
class Loop;
|
||||
class LoopInfo;
|
||||
class MDNode;
|
||||
class ProfileSummaryInfo;
|
||||
class OptimizationRemarkEmitter;
|
||||
class ScalarEvolution;
|
||||
|
||||
|
@ -120,7 +122,8 @@ void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
|
|||
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
|
||||
|
||||
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
|
||||
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
|
||||
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
|
||||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
|
||||
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
|
||||
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
|
||||
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling);
|
||||
|
|
|
@ -71,6 +71,7 @@ class Loop;
|
|||
class LoopAccessInfo;
|
||||
class LoopInfo;
|
||||
class OptimizationRemarkEmitter;
|
||||
class ProfileSummaryInfo;
|
||||
class ScalarEvolution;
|
||||
class TargetLibraryInfo;
|
||||
class TargetTransformInfo;
|
||||
|
@ -96,6 +97,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
|
|||
AssumptionCache *AC;
|
||||
std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
|
||||
OptimizationRemarkEmitter *ORE;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
||||
|
||||
|
@ -105,7 +107,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
|
|||
BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
|
||||
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
|
||||
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
|
||||
OptimizationRemarkEmitter &ORE);
|
||||
OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
|
||||
|
||||
bool processLoop(Loop *L);
|
||||
};
|
||||
|
|
|
@ -575,8 +575,12 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
|
|||
Options.DoCounterPromotion = true;
|
||||
Options.UseBFIInPromotion = IsCS;
|
||||
MPM.addPass(InstrProfiling(Options, IsCS));
|
||||
} else if (!ProfileFile.empty())
|
||||
} else if (!ProfileFile.empty()) {
|
||||
MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
|
||||
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
|
||||
// RequireAnalysisPass for PSI before subsequent non-module passes.
|
||||
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
|
||||
}
|
||||
}
|
||||
|
||||
static InlineParams
|
||||
|
@ -649,6 +653,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
|||
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
|
||||
PGOOpt->ProfileRemappingFile,
|
||||
Phase == ThinLTOPhase::PreLink));
|
||||
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
|
||||
// RequireAnalysisPass for PSI before subsequent non-module passes.
|
||||
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
|
||||
// Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
|
||||
// for the profile annotation to be accurate in the ThinLTO backend.
|
||||
if (Phase != ThinLTOPhase::PreLink)
|
||||
|
@ -1065,6 +1072,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
|
|||
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
|
||||
PGOOpt->ProfileRemappingFile,
|
||||
false /* ThinLTOPhase::PreLink */));
|
||||
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
|
||||
// RequireAnalysisPass for PSI before subsequent non-module passes.
|
||||
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
|
||||
}
|
||||
|
||||
// Remove unused virtual tables to improve the quality of code generated by
|
||||
|
|
|
@ -4178,7 +4178,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
|
|||
auto InstCombineErase = [this](Instruction *I) {
|
||||
eraseInstFromFunction(*I);
|
||||
};
|
||||
LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
|
||||
LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
|
||||
InstCombineErase);
|
||||
if (Value *With = Simplifier.optimizeCall(CI)) {
|
||||
++NumSimplified;
|
||||
|
|
|
@ -52,12 +52,14 @@ namespace llvm {
|
|||
|
||||
class APInt;
|
||||
class AssumptionCache;
|
||||
class BlockFrequencyInfo;
|
||||
class DataLayout;
|
||||
class DominatorTree;
|
||||
class GEPOperator;
|
||||
class GlobalVariable;
|
||||
class LoopInfo;
|
||||
class OptimizationRemarkEmitter;
|
||||
class ProfileSummaryInfo;
|
||||
class TargetLibraryInfo;
|
||||
class User;
|
||||
|
||||
|
@ -304,6 +306,8 @@ private:
|
|||
const DataLayout &DL;
|
||||
const SimplifyQuery SQ;
|
||||
OptimizationRemarkEmitter &ORE;
|
||||
BlockFrequencyInfo *BFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
|
||||
// Optional analyses. When non-null, these can both be used to do better
|
||||
// combining and will be updated to reflect any changes.
|
||||
|
@ -315,11 +319,11 @@ public:
|
|||
InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
|
||||
bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
|
||||
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
|
||||
OptimizationRemarkEmitter &ORE, const DataLayout &DL,
|
||||
LoopInfo *LI)
|
||||
OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
|
||||
ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
|
||||
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
|
||||
ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
|
||||
DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
|
||||
DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
|
||||
|
||||
/// Run the combiner over the entire worklist until it is empty.
|
||||
///
|
||||
|
|
|
@ -46,14 +46,17 @@
|
|||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/EHPersonalities.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetFolder.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
|
@ -3478,7 +3481,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
|
|||
static bool combineInstructionsOverFunction(
|
||||
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
|
||||
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
|
||||
OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
|
||||
OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
|
||||
ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
|
||||
LoopInfo *LI = nullptr) {
|
||||
auto &DL = F.getParent()->getDataLayout();
|
||||
ExpensiveCombines |= EnableExpensiveCombines;
|
||||
|
@ -3509,7 +3513,7 @@ static bool combineInstructionsOverFunction(
|
|||
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
|
||||
|
||||
InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
|
||||
AC, TLI, DT, ORE, DL, LI);
|
||||
AC, TLI, DT, ORE, BFI, PSI, DL, LI);
|
||||
IC.MaxArraySizeForCombine = MaxArraySize;
|
||||
|
||||
if (!IC.run())
|
||||
|
@ -3529,8 +3533,15 @@ PreservedAnalyses InstCombinePass::run(Function &F,
|
|||
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
|
||||
|
||||
auto *AA = &AM.getResult<AAManager>(F);
|
||||
const ModuleAnalysisManager &MAM =
|
||||
AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
|
||||
ProfileSummaryInfo *PSI =
|
||||
MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
|
||||
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
|
||||
|
||||
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
|
||||
ExpensiveCombines, LI))
|
||||
BFI, PSI, ExpensiveCombines, LI))
|
||||
// No changes, all analyses are preserved.
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
|
@ -3554,6 +3565,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
AU.addPreserved<AAResultsWrapperPass>();
|
||||
AU.addPreserved<BasicAAWrapperPass>();
|
||||
AU.addPreserved<GlobalsAAWrapperPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool InstructionCombiningPass::runOnFunction(Function &F) {
|
||||
|
@ -3570,9 +3583,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
|
|||
// Optional analyses.
|
||||
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
|
||||
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
|
||||
ProfileSummaryInfo *PSI =
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
BlockFrequencyInfo *BFI =
|
||||
(PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
|
||||
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
|
||||
ExpensiveCombines, LI);
|
||||
BFI, PSI, ExpensiveCombines, LI);
|
||||
}
|
||||
|
||||
char InstructionCombiningPass::ID = 0;
|
||||
|
@ -3585,6 +3604,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|||
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
|
||||
"Combine redundant instructions", false, false)
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
|
@ -60,6 +61,7 @@
|
|||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
@ -111,6 +113,7 @@ public:
|
|||
if (ConstHoistWithBlockFrequency)
|
||||
AU.addRequired<BlockFrequencyInfoWrapperPass>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
}
|
||||
|
||||
|
@ -126,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
|
|||
"Constant Hoisting", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
|
||||
"Constant Hoisting", false, false)
|
||||
|
@ -148,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
|
|||
ConstHoistWithBlockFrequency
|
||||
? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
|
||||
: nullptr,
|
||||
Fn.getEntryBlock());
|
||||
Fn.getEntryBlock(),
|
||||
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
|
||||
|
||||
if (MadeChange) {
|
||||
LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
|
||||
|
@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
|
|||
ConstCandVecType::iterator &MaxCostItr) {
|
||||
unsigned NumUses = 0;
|
||||
|
||||
if(!Entry->getParent()->hasOptSize() || std::distance(S,E) > 100) {
|
||||
bool OptForSize = Entry->getParent()->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
|
||||
if (!OptForSize || std::distance(S,E) > 100) {
|
||||
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
|
||||
NumUses += ConstCand->Uses.size();
|
||||
if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
|
||||
|
@ -919,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCastInst() const {
|
|||
/// Optimize expensive integer constants in the given function.
|
||||
bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
|
||||
DominatorTree &DT, BlockFrequencyInfo *BFI,
|
||||
BasicBlock &Entry) {
|
||||
BasicBlock &Entry, ProfileSummaryInfo *PSI) {
|
||||
this->TTI = &TTI;
|
||||
this->DT = &DT;
|
||||
this->BFI = BFI;
|
||||
this->DL = &Fn.getParent()->getDataLayout();
|
||||
this->Ctx = &Fn.getContext();
|
||||
this->Entry = &Entry;
|
||||
this->PSI = PSI;
|
||||
// Collect all constant candidates.
|
||||
collectConstantCandidates(Fn);
|
||||
|
||||
|
@ -962,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
|
|||
auto BFI = ConstHoistWithBlockFrequency
|
||||
? &AM.getResult<BlockFrequencyAnalysis>(F)
|
||||
: nullptr;
|
||||
if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
|
||||
auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
|
||||
auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
|
||||
if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
PreservedAnalyses PA;
|
||||
|
|
|
@ -29,11 +29,14 @@
|
|||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/GlobalsModRef.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||
#include "llvm/Analysis/LoopAnalysisManager.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemorySSA.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
|
@ -54,6 +57,7 @@
|
|||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils.h"
|
||||
#include "llvm/Transforms/Utils/LoopVersioning.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <forward_list>
|
||||
|
@ -159,8 +163,9 @@ namespace {
|
|||
class LoadEliminationForLoop {
|
||||
public:
|
||||
LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
|
||||
DominatorTree *DT)
|
||||
: L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
|
||||
DominatorTree *DT, BlockFrequencyInfo *BFI,
|
||||
ProfileSummaryInfo* PSI)
|
||||
: L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
|
||||
|
||||
/// Look through the loop-carried and loop-independent dependences in
|
||||
/// this loop and find store->load dependences.
|
||||
|
@ -529,7 +534,11 @@ public:
|
|||
}
|
||||
|
||||
if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
|
||||
if (L->getHeader()->getParent()->hasOptSize()) {
|
||||
auto *HeaderBB = L->getHeader();
|
||||
auto *F = HeaderBB->getParent();
|
||||
bool OptForSize = F->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
|
||||
if (OptForSize) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Versioning is needed but not allowed when optimizing "
|
||||
"for size.\n");
|
||||
|
@ -572,6 +581,8 @@ private:
|
|||
LoopInfo *LI;
|
||||
const LoopAccessInfo &LAI;
|
||||
DominatorTree *DT;
|
||||
BlockFrequencyInfo *BFI;
|
||||
ProfileSummaryInfo *PSI;
|
||||
PredicatedScalarEvolution PSE;
|
||||
};
|
||||
|
||||
|
@ -579,6 +590,7 @@ private:
|
|||
|
||||
static bool
|
||||
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
|
||||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
|
||||
function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
|
||||
// Build up a worklist of inner-loops to transform to avoid iterator
|
||||
// invalidation.
|
||||
|
@ -597,7 +609,7 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
|
|||
bool Changed = false;
|
||||
for (Loop *L : Worklist) {
|
||||
// The actual work is performed by LoadEliminationForLoop.
|
||||
LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
|
||||
LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
|
||||
Changed |= LEL.processLoop();
|
||||
}
|
||||
return Changed;
|
||||
|
@ -622,10 +634,14 @@ public:
|
|||
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
|
||||
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
|
||||
nullptr;
|
||||
|
||||
// Process each loop nest in the function.
|
||||
return eliminateLoadsAcrossLoops(
|
||||
F, LI, DT,
|
||||
F, LI, DT, BFI, PSI,
|
||||
[&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
|
||||
}
|
||||
|
||||
|
@ -638,6 +654,8 @@ public:
|
|||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addPreserved<GlobalsAAWrapperPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -653,6 +671,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
|
|||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
|
||||
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
|
||||
|
||||
FunctionPass *llvm::createLoopLoadEliminationPass() {
|
||||
|
@ -668,13 +688,17 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
|
|||
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
|
||||
auto &AA = AM.getResult<AAManager>(F);
|
||||
auto &AC = AM.getResult<AssumptionAnalysis>(F);
|
||||
auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
|
||||
auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
|
||||
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
|
||||
MemorySSA *MSSA = EnableMSSALoopDependency
|
||||
? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
|
||||
: nullptr;
|
||||
|
||||
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
|
||||
bool Changed = eliminateLoadsAcrossLoops(
|
||||
F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
|
||||
F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
|
||||
LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
|
||||
return LAM.getResult<LoopAccessAnalysis>(L, AR);
|
||||
});
|
||||
|
|
|
@ -294,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
|
|||
return LoopUnrollResult::Unmodified;
|
||||
|
||||
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
|
||||
L, SE, TTI, OptLevel, None, None, None, None, None, None);
|
||||
L, SE, TTI, nullptr, nullptr, OptLevel,
|
||||
None, None, None, None, None, None);
|
||||
if (AllowUnrollAndJam.getNumOccurrences() > 0)
|
||||
UP.UnrollAndJam = AllowUnrollAndJam;
|
||||
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
|
||||
|
|
|
@ -23,7 +23,9 @@
|
|||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/CodeMetrics.h"
|
||||
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/LoopAnalysisManager.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
|
@ -55,6 +57,7 @@
|
|||
#include "llvm/Transforms/Utils.h"
|
||||
#include "llvm/Transforms/Utils/LoopSimplify.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
#include "llvm/Transforms/Utils/UnrollLoop.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
@ -165,7 +168,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
|
|||
/// Gather the various unrolling parameters based on the defaults, compiler
|
||||
/// flags, TTI overrides and user specified parameters.
|
||||
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
||||
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
|
||||
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
|
||||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
|
||||
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
|
||||
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
|
||||
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
|
||||
|
@ -198,7 +202,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
|
|||
TTI.getUnrollingPreferences(L, SE, UP);
|
||||
|
||||
// Apply size attributes
|
||||
if (L->getHeader()->getParent()->hasOptSize()) {
|
||||
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
|
||||
if (OptForSize) {
|
||||
UP.Threshold = UP.OptSizeThreshold;
|
||||
UP.PartialThreshold = UP.PartialOptSizeThreshold;
|
||||
}
|
||||
|
@ -963,7 +969,9 @@ bool llvm::computeUnrollCount(
|
|||
static LoopUnrollResult tryToUnrollLoop(
|
||||
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
|
||||
const TargetTransformInfo &TTI, AssumptionCache &AC,
|
||||
OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
|
||||
OptimizationRemarkEmitter &ORE,
|
||||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
|
||||
bool PreserveLCSSA, int OptLevel,
|
||||
bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
|
||||
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
|
||||
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
|
||||
|
@ -989,7 +997,7 @@ static LoopUnrollResult tryToUnrollLoop(
|
|||
bool NotDuplicatable;
|
||||
bool Convergent;
|
||||
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
|
||||
L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
|
||||
L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
|
||||
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
|
||||
ProvidedAllowPeeling);
|
||||
// Exit early if unrolling is disabled.
|
||||
|
@ -1176,7 +1184,8 @@ public:
|
|||
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
|
||||
|
||||
LoopUnrollResult Result = tryToUnrollLoop(
|
||||
L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
|
||||
L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
|
||||
PreserveLCSSA, OptLevel, OnlyWhenForced,
|
||||
ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
|
||||
ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
|
||||
|
||||
|
@ -1257,6 +1266,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
|||
|
||||
bool Changed =
|
||||
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
|
||||
/*BFI*/ nullptr, /*PSI*/ nullptr,
|
||||
/*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
|
||||
/*ForgetAllSCEV*/ false, /*Count*/ None,
|
||||
/*Threshold*/ None, /*AllowPartial*/ false,
|
||||
|
@ -1359,6 +1369,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
|
|||
AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
|
||||
ProfileSummaryInfo *PSI =
|
||||
MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
|
||||
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
|
||||
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
|
@ -1394,7 +1406,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
|
|||
// The API here is quite complex to call and we allow to select some
|
||||
// flavors of unrolling during construction time (by setting UnrollOpts).
|
||||
LoopUnrollResult Result = tryToUnrollLoop(
|
||||
&L, DT, &LI, SE, TTI, AC, ORE,
|
||||
&L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
|
||||
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
|
||||
/*ForgetAllSCEV*/ false, /*Count*/ None,
|
||||
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
|
||||
|
|
|
@ -51,6 +51,7 @@ add_llvm_library(LLVMTransformUtils
|
|||
SimplifyCFG.cpp
|
||||
SimplifyIndVar.cpp
|
||||
SimplifyLibCalls.cpp
|
||||
SizeOpts.cpp
|
||||
SplitModule.cpp
|
||||
StripNonLineTableDebugInfo.cpp
|
||||
SymbolRewriter.cpp
|
||||
|
|
|
@ -16,8 +16,10 @@
|
|||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
|
@ -34,6 +36,7 @@
|
|||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Transforms/Utils/BuildLibCalls.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
|
||||
using namespace llvm;
|
||||
using namespace PatternMatch;
|
||||
|
@ -2375,7 +2378,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
|
|||
|
||||
// Don't rewrite fputs to fwrite when optimising for size because fwrite
|
||||
// requires more arguments and thus extra MOVs are required.
|
||||
if (CI->getFunction()->hasOptSize())
|
||||
bool OptForSize = CI->getFunction()->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
|
||||
if (OptForSize)
|
||||
return nullptr;
|
||||
|
||||
// Check if has any use
|
||||
|
@ -2750,9 +2755,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
|
|||
LibCallSimplifier::LibCallSimplifier(
|
||||
const DataLayout &DL, const TargetLibraryInfo *TLI,
|
||||
OptimizationRemarkEmitter &ORE,
|
||||
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
|
||||
function_ref<void(Instruction *, Value *)> Replacer,
|
||||
function_ref<void(Instruction *)> Eraser)
|
||||
: FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
|
||||
: FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
|
||||
UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
|
||||
|
||||
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
//===-- SizeOpts.cpp - code size optimization related code ----------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains some shared code size optimization related code.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> ProfileGuidedSizeOpt(
|
||||
"pgso", cl::Hidden, cl::init(true),
|
||||
cl::desc("Enable the profile guided size optimization. "));
|
||||
|
||||
bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI) {
|
||||
assert(F);
|
||||
if (!PSI || !BFI || !PSI->hasProfileSummary())
|
||||
return false;
|
||||
return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
|
||||
}
|
||||
|
||||
bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
|
||||
BlockFrequencyInfo *BFI) {
|
||||
assert(BB);
|
||||
if (!PSI || !BFI || !PSI->hasProfileSummary())
|
||||
return false;
|
||||
return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
|
||||
}
|
|
@ -88,6 +88,7 @@
|
|||
#include "llvm/Analysis/LoopIterator.h"
|
||||
#include "llvm/Analysis/MemorySSA.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||
|
@ -134,6 +135,7 @@
|
|||
#include "llvm/Transforms/Utils/LoopSimplify.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
#include "llvm/Transforms/Utils/LoopVersioning.h"
|
||||
#include "llvm/Transforms/Utils/SizeOpts.h"
|
||||
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
@ -1452,12 +1454,13 @@ struct LoopVectorize : public FunctionPass {
|
|||
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
|
||||
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
|
||||
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
|
||||
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
|
||||
|
||||
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
|
||||
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
|
||||
|
||||
return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
|
||||
GetLAA, *ORE);
|
||||
GetLAA, *ORE, PSI);
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
|
@ -1483,6 +1486,7 @@ struct LoopVectorize : public FunctionPass {
|
|||
|
||||
AU.addPreserved<BasicAAWrapperPass>();
|
||||
AU.addPreserved<GlobalsAAWrapperPass>();
|
||||
AU.addRequired<ProfileSummaryInfoWrapperPass>();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -6054,6 +6058,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
|||
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
|
||||
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
|
||||
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
|
||||
|
||||
namespace llvm {
|
||||
|
@ -7147,7 +7152,8 @@ static bool processLoopInVPlanNativePath(
|
|||
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
|
||||
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
|
||||
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
|
||||
OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
|
||||
OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
|
||||
ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
|
||||
|
||||
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
|
||||
Function *F = L->getHeader()->getParent();
|
||||
|
@ -7162,10 +7168,12 @@ static bool processLoopInVPlanNativePath(
|
|||
// Get user vectorization factor.
|
||||
const unsigned UserVF = Hints.getWidth();
|
||||
|
||||
// Check the function attributes to find out if this function should be
|
||||
// optimized for size.
|
||||
// Check the function attributes and profiles to find out if this function
|
||||
// should be optimized for size.
|
||||
bool OptForSize =
|
||||
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
|
||||
Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
|
||||
(F->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
|
||||
|
||||
// Plan how to best vectorize, return the best VF and its cost.
|
||||
const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
|
||||
|
@ -7245,10 +7253,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Check the function attributes to find out if this function should be
|
||||
// optimized for size.
|
||||
// Check the function attributes and profiles to find out if this function
|
||||
// should be optimized for size.
|
||||
bool OptForSize =
|
||||
Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
|
||||
Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
|
||||
(F->hasOptSize() ||
|
||||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
|
||||
|
||||
// Entrance to the VPlan-native vectorization path. Outer loops are processed
|
||||
// here. They may require CFG and instruction level transformations before
|
||||
|
@ -7257,7 +7267,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|||
// pipeline.
|
||||
if (!L->empty())
|
||||
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
|
||||
ORE, Hints);
|
||||
ORE, BFI, PSI, Hints);
|
||||
|
||||
assert(L->empty() && "Inner loop expected.");
|
||||
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
|
||||
|
@ -7523,7 +7533,7 @@ bool LoopVectorizePass::runImpl(
|
|||
DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
|
||||
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
|
||||
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
|
||||
OptimizationRemarkEmitter &ORE_) {
|
||||
OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
|
||||
SE = &SE_;
|
||||
LI = &LI_;
|
||||
TTI = &TTI_;
|
||||
|
@ -7535,6 +7545,7 @@ bool LoopVectorizePass::runImpl(
|
|||
GetLAA = &GetLAA_;
|
||||
DB = &DB_;
|
||||
ORE = &ORE_;
|
||||
PSI = PSI_;
|
||||
|
||||
// Don't attempt if
|
||||
// 1. the target claims to have no vector registers, and
|
||||
|
@ -7603,8 +7614,12 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
|
|||
LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
|
||||
return LAM.getResult<LoopAccessAnalysis>(L, AR);
|
||||
};
|
||||
const ModuleAnalysisManager &MAM =
|
||||
AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
|
||||
ProfileSummaryInfo *PSI =
|
||||
MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
|
||||
bool Changed =
|
||||
runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
|
||||
runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
PreservedAnalyses PA;
|
||||
|
|
|
@ -106,6 +106,7 @@
|
|||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: AAManager
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
|
||||
|
@ -245,7 +246,6 @@
|
|||
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: LoopUnrollPass
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
|
||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
|
||||
; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
|
||||
; CHECK-O2-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
|
||||
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
|
||||
; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass>
|
||||
|
|
|
@ -88,6 +88,7 @@
|
|||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
|
||||
; CHECK-O-NEXT: Running analysis: AAManager
|
||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
|
||||
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
|
||||
|
@ -219,7 +220,6 @@
|
|||
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
|
||||
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
|
||||
; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||
; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
|
||||
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
|
||||
; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
|
||||
|
|
|
@ -214,6 +214,8 @@
|
|||
; CHECK-NEXT: Scalar Evolution Analysis
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Loop Access Analysis
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Loop Load Elimination
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
|
|
|
@ -219,6 +219,8 @@
|
|||
; CHECK-NEXT: Scalar Evolution Analysis
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Loop Access Analysis
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Loop Load Elimination
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
|
|
|
@ -201,6 +201,8 @@
|
|||
; CHECK-NEXT: Scalar Evolution Analysis
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
; CHECK-NEXT: Loop Access Analysis
|
||||
; CHECK-NEXT: Lazy Branch Probability Analysis
|
||||
; CHECK-NEXT: Lazy Block Frequency Analysis
|
||||
; CHECK-NEXT: Loop Load Elimination
|
||||
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
|
||||
; CHECK-NEXT: Function Alias Analysis Results
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
|
||||
; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO
|
||||
; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
|
||||
|
||||
; There are different candidates here for the base constant: 1073876992 and
|
||||
; 1073876996. But we don't want to see the latter because it results in
|
||||
|
@ -8,6 +10,7 @@ define void @foo() #0 {
|
|||
entry:
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
|
||||
; CHECK-LABEL: @foo_pgso
|
||||
%0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
%or = or i32 %0, 1
|
||||
store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
|
@ -40,3 +43,59 @@ entry:
|
|||
}
|
||||
|
||||
attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
|
||||
|
||||
define void @foo_pgso() #1 !prof !14 {
|
||||
entry:
|
||||
; PGSO-LABEL: @foo_pgso
|
||||
; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
|
||||
; NPGSO-LABEL: @foo_pgso
|
||||
; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
|
||||
%0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
%or = or i32 %0, 1
|
||||
store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
%1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
|
||||
%and = and i32 %1, -117506048
|
||||
store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
|
||||
%2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
%and1 = and i32 %2, -17367041
|
||||
store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
|
||||
%3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
%and2 = and i32 %3, -262145
|
||||
store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
|
||||
%4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
|
||||
%and3 = and i32 %4, -8323073
|
||||
store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
|
||||
store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
|
||||
%5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
|
||||
%or4 = or i32 %5, 65536
|
||||
store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
|
||||
%6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
|
||||
%or6.i.i = or i32 %6, 16
|
||||
store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
|
||||
%7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
|
||||
%and7.i.i = and i32 %7, -4
|
||||
store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
|
||||
%8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
|
||||
%or8.i.i = or i32 %8, 2
|
||||
store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #1 = { norecurse nounwind readnone uwtable } ; no optsize or minsize
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
; because it requires more arguments and thus extra MOVs are required.
|
||||
;
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO
|
||||
; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO
|
||||
|
||||
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
|
||||
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
|
||||
|
@ -26,3 +28,34 @@ declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_un
|
|||
|
||||
attributes #0 = { nounwind optsize }
|
||||
attributes #1 = { nounwind optsize }
|
||||
|
||||
define i32 @main_pgso() local_unnamed_addr !prof !14 {
|
||||
entry:
|
||||
; PGSO-LABEL: @main_pgso(
|
||||
; PGSO-NOT: call i64 @fwrite
|
||||
; PGSO: call i32 @fputs
|
||||
; NPGSO-LABEL: @main_pgso(
|
||||
; NPGSO: call i64 @fwrite
|
||||
; NPGSO-NOT: call i32 @fputs
|
||||
|
||||
%call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
|
||||
%call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
|
||||
; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO
|
||||
; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
|
||||
|
||||
; When optimizing for size don't eliminate in this loop because the loop would
|
||||
; have to be versioned first because A and C may alias.
|
||||
|
@ -74,3 +76,54 @@ for.body: ; preds = %for.body, %entry
|
|||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; PGSO-LABEL: @f_pgso(
|
||||
; NPGSO-LABEL: @f_pgso(
|
||||
define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 {
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
|
||||
%Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
|
||||
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
||||
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
|
||||
%Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
|
||||
%b = load i32, i32* %Bidx, align 4
|
||||
%a_p1 = add i32 %b, 2
|
||||
store i32 %a_p1, i32* %Aidx_next, align 4
|
||||
|
||||
%a = load i32, i32* %Aidx, align 4
|
||||
; PGSO: %c = mul i32 %a, 2
|
||||
; NPGSO-NOT: %c = mul i32 %a, 2
|
||||
%c = mul i32 %a, 2
|
||||
store i32 %c, i32* %Cidx, align 4
|
||||
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %N
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
|
||||
; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
|
||||
; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
|
||||
; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
|
||||
|
||||
|
||||
;///////////////////// TEST 1 //////////////////////////////
|
||||
|
@ -128,3 +130,47 @@ for.end: ; preds = %for.body
|
|||
; CHECK_NOCOUNT-LABEL: @Test4
|
||||
; CHECK_NOCOUNT: phi
|
||||
; CHECK_NOCOUNT: icmp
|
||||
|
||||
;///////////////////// TEST 5 //////////////////////////////
|
||||
|
||||
; This test shows that with PGO, this loop is cold and not unrolled.
|
||||
|
||||
define i32 @Test5() !prof !14 {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
|
||||
store i32 %i.05, i32* %arrayidx, align 4
|
||||
%inc = add nuw nsw i32 %i.05, 1
|
||||
%exitcond = icmp eq i32 %inc, 24
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 42
|
||||
}
|
||||
|
||||
; PGSO-LABEL: @Test5
|
||||
; PGSO: phi
|
||||
; PGSO: icmp
|
||||
; NPGSO-LABEL: @Test5
|
||||
; NPGSO-NOT: phi
|
||||
; NPGSO-NOT: icmp
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
; loop with the optimize for size or the minimize size attributes.
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
|
||||
; RUN: opt < %s -loop-vectorize -pgso -S | FileCheck %s -check-prefix=PGSO
|
||||
; RUN: opt < %s -loop-vectorize -pgso=false -S | FileCheck %s -check-prefix=NPGSO
|
||||
|
||||
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
|
||||
|
||||
|
@ -36,6 +38,7 @@ define i32 @foo_minsize() #1 {
|
|||
; CHECK-LABEL: @foo_minsize(
|
||||
; CHECK-NOT: <2 x i8>
|
||||
; CHECK-NOT: <4 x i8>
|
||||
; CHECK-LABEL: @foo_pgso(
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
@ -57,3 +60,43 @@ for.end: ; preds = %for.body
|
|||
|
||||
attributes #1 = { minsize }
|
||||
|
||||
define i32 @foo_pgso() !prof !14 {
|
||||
; PGSO-LABEL: @foo_pgso(
|
||||
; PGSO-NOT: <{{[0-9]+}} x i8>
|
||||
; NPGSO-LABEL: @foo_pgso(
|
||||
; NPGSO: <{{[0-9]+}} x i8>
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
|
||||
%0 = load i8, i8* %arrayidx, align 1
|
||||
%cmp1 = icmp eq i8 %0, 0
|
||||
%. = select i1 %cmp1, i8 2, i8 1
|
||||
store i8 %., i8* %arrayidx, align 1
|
||||
%inc = add nsw i32 %i.08, 1
|
||||
%exitcond = icmp eq i32 %i.08, 202
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!0 = !{i32 1, !"ProfileSummary", !1}
|
||||
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
||||
!2 = !{!"ProfileFormat", !"InstrProf"}
|
||||
!3 = !{!"TotalCount", i64 10000}
|
||||
!4 = !{!"MaxCount", i64 10}
|
||||
!5 = !{!"MaxInternalCount", i64 1}
|
||||
!6 = !{!"MaxFunctionCount", i64 1000}
|
||||
!7 = !{!"NumCounts", i64 3}
|
||||
!8 = !{!"NumFunctions", i64 3}
|
||||
!9 = !{!"DetailedSummary", !10}
|
||||
!10 = !{!11, !12, !13}
|
||||
!11 = !{i32 10000, i64 100, i32 1}
|
||||
!12 = !{i32 999000, i64 100, i32 1}
|
||||
!13 = !{i32 999999, i64 1, i32 2}
|
||||
!14 = !{!"function_entry_count", i64 0}
|
||||
|
|
Loading…
Reference in New Issue