forked from OSchip/llvm-project
[InlineAdvisor] New inliner advisor to replay inlining from optimization remarks
Summary: This change added a new inline advisor that takes optimization remarks for previous inlining as input, and provide the decision as advice so current inlining can replay inline decision of a different compilation. Dwarf inline stack with line and discriminator is used as anchor for call sites. The change can be useful for Inliner tuning. A switch -sample-profile-inline-replay=<inline_remarks_file> is added to hook up the new inliner advisor with SampleProfileLoader's inline decision for replay. The new inline advisor can also be used by regular CGSCC inliner later if needed. Reviewers: davidxl, mtrofin, wmi, hoy Subscribers: aprantl, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D83743
This commit is contained in:
parent
ec6ada6264
commit
029946b112
|
@ -226,6 +226,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
|
|||
bool ForProfileContext = false,
|
||||
const char *PassName = nullptr);
|
||||
|
||||
/// get call site location as string
|
||||
StringRef getCallSiteLocation(DebugLoc DLoc);
|
||||
|
||||
/// Add location info to ORE message.
|
||||
void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
|
||||
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef LLVM_REPLAYINLINEADVISOR_H_
|
||||
#define LLVM_REPLAYINLINEADVISOR_H_
|
||||
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/Analysis/InlineAdvisor.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
|
||||
namespace llvm {
|
||||
class BasicBlock;
|
||||
class CallBase;
|
||||
class Function;
|
||||
class Module;
|
||||
class OptimizationRemarkEmitter;
|
||||
|
||||
/// Replay inline advisor that uses optimization remarks from inlining of
|
||||
/// previous build to guide current inlining. This is useful for inliner tuning.
|
||||
class ReplayInlineAdvisor : public InlineAdvisor {
|
||||
public:
|
||||
ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
|
||||
StringRef RemarksFile);
|
||||
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
|
||||
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
|
||||
|
||||
private:
|
||||
StringSet<> InlineSitesFromRemarks;
|
||||
bool HasReplayRemarks = false;
|
||||
};
|
||||
} // namespace llvm
|
||||
#endif // LLVM_REPLAYINLINEADVISOR_H_
|
|
@ -117,6 +117,7 @@ add_llvm_component_library(LLVMAnalysis
|
|||
RegionInfo.cpp
|
||||
RegionPass.cpp
|
||||
RegionPrinter.cpp
|
||||
ReplayInlineAdvisor.cpp
|
||||
ScalarEvolution.cpp
|
||||
ScalarEvolutionAliasAnalysis.cpp
|
||||
ScalarEvolutionDivision.cpp
|
||||
|
|
|
@ -365,6 +365,31 @@ llvm::shouldInline(CallBase &CB,
|
|||
return IC;
|
||||
}
|
||||
|
||||
StringRef llvm::getCallSiteLocation(DebugLoc DLoc) {
|
||||
std::ostringstream CallSiteLoc;
|
||||
bool First = true;
|
||||
for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
|
||||
if (!First)
|
||||
CallSiteLoc << " @ ";
|
||||
// Note that negative line offset is actually possible, but we use
|
||||
// unsigned int to match line offset representation in remarks so
|
||||
// it's directly consumable by relay advisor.
|
||||
uint32_t Offset =
|
||||
DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
|
||||
uint32_t Discriminator = DIL->getBaseDiscriminator();
|
||||
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
|
||||
if (Name.empty())
|
||||
Name = DIL->getScope()->getSubprogram()->getName();
|
||||
CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
|
||||
if (Discriminator) {
|
||||
CallSiteLoc << "." << llvm::utostr(Discriminator);
|
||||
}
|
||||
First = false;
|
||||
}
|
||||
|
||||
return CallSiteLoc.str();
|
||||
}
|
||||
|
||||
void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
|
||||
if (!DLoc.get())
|
||||
return;
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements ReplayInlineAdvisor that replays inline decision based
|
||||
// on previous inline remarks from optimization remark log.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/InlineAdvisor.h"
|
||||
#include "llvm/Analysis/ReplayInlineAdvisor.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Support/LineIterator.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "inline-replay"
|
||||
|
||||
ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
|
||||
LLVMContext &Context,
|
||||
StringRef RemarksFile)
|
||||
: InlineAdvisor(FAM), HasReplayRemarks(false) {
|
||||
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
|
||||
std::error_code EC = BufferOrErr.getError();
|
||||
if (EC) {
|
||||
Context.emitError("Could not open remarks file: " + EC.message());
|
||||
return;
|
||||
}
|
||||
|
||||
// Example for inline remarks to parse:
|
||||
// _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
|
||||
// We use the callsite string after `at callsite` to replay inlining.
|
||||
line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
|
||||
for (; !LineIt.is_at_eof(); ++LineIt) {
|
||||
StringRef Line = *LineIt;
|
||||
auto Pair = Line.split(" at callsite ");
|
||||
if (Pair.second.empty())
|
||||
continue;
|
||||
InlineSitesFromRemarks.insert(Pair.second);
|
||||
}
|
||||
HasReplayRemarks = true;
|
||||
}
|
||||
|
||||
std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
|
||||
assert(HasReplayRemarks);
|
||||
|
||||
Function &Caller = *CB.getCaller();
|
||||
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
|
||||
|
||||
if (InlineSitesFromRemarks.empty())
|
||||
return std::make_unique<InlineAdvice>(this, CB, ORE, false);
|
||||
|
||||
StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
|
||||
bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
|
||||
return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
|
||||
}
|
|
@ -43,6 +43,7 @@
|
|||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Analysis/PostDominators.h"
|
||||
#include "llvm/Analysis/ProfileSummaryInfo.h"
|
||||
#include "llvm/Analysis/ReplayInlineAdvisor.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
|
@ -170,6 +171,13 @@ static cl::opt<int> SampleColdCallSiteThreshold(
|
|||
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
|
||||
cl::desc("Threshold for inlining cold callsites"));
|
||||
|
||||
static cl::opt<std::string> ProfileInlineReplayFile(
|
||||
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
|
||||
cl::desc(
|
||||
"Optimization remarks file containing inline remarks to be replayed "
|
||||
"by inlining from sample profile loader."),
|
||||
cl::Hidden);
|
||||
|
||||
namespace {
|
||||
|
||||
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
|
||||
|
@ -319,7 +327,7 @@ public:
|
|||
RemappingFilename(std::string(RemapName)),
|
||||
IsThinLTOPreLink(IsThinLTOPreLink) {}
|
||||
|
||||
bool doInitialization(Module &M);
|
||||
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
|
||||
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
|
||||
ProfileSummaryInfo *_PSI, CallGraph *CG);
|
||||
|
||||
|
@ -473,6 +481,9 @@ protected:
|
|||
// overriden by -profile-sample-accurate or profile-sample-accurate
|
||||
// attribute.
|
||||
bool ProfAccForSymsInList;
|
||||
|
||||
// External inline advisor used to replay inline decision from remarks.
|
||||
std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
|
||||
};
|
||||
|
||||
class SampleProfileLoaderLegacyPass : public ModulePass {
|
||||
|
@ -898,6 +909,16 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
|
|||
}
|
||||
|
||||
bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
|
||||
if (ExternalInlineAdvisor) {
|
||||
auto Advice = ExternalInlineAdvisor->getAdvice(CB);
|
||||
if (!Advice->isInliningRecommended()) {
|
||||
Advice->recordUnattemptedInlining();
|
||||
return false;
|
||||
}
|
||||
// Dummy record, we don't use it for replay.
|
||||
Advice->recordInlining();
|
||||
}
|
||||
|
||||
Function *CalledFunction = CB.getCalledFunction();
|
||||
assert(CalledFunction);
|
||||
DebugLoc DLoc = CB.getDebugLoc();
|
||||
|
@ -1005,7 +1026,7 @@ bool SampleProfileLoader::inlineHotFunctions(
|
|||
}
|
||||
}
|
||||
}
|
||||
if (Hot) {
|
||||
if (Hot || ExternalInlineAdvisor) {
|
||||
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
|
||||
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
|
||||
} else {
|
||||
|
@ -1818,7 +1839,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
|
|||
return FunctionOrderList;
|
||||
}
|
||||
|
||||
bool SampleProfileLoader::doInitialization(Module &M) {
|
||||
bool SampleProfileLoader::doInitialization(Module &M,
|
||||
FunctionAnalysisManager *FAM) {
|
||||
auto &Ctx = M.getContext();
|
||||
|
||||
std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
|
||||
|
@ -1843,6 +1865,13 @@ bool SampleProfileLoader::doInitialization(Module &M) {
|
|||
NamesInProfile.insert(NameTable->begin(), NameTable->end());
|
||||
}
|
||||
|
||||
if (FAM && !ProfileInlineReplayFile.empty()) {
|
||||
ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
|
||||
*FAM, Ctx, ProfileInlineReplayFile);
|
||||
if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
|
||||
ExternalInlineAdvisor.reset();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1995,7 +2024,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
|
|||
: ProfileRemappingFileName,
|
||||
IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
|
||||
|
||||
if (!SampleLoader.doInitialization(M))
|
||||
if (!SampleLoader.doInitialization(M, &FAM))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
|
||||
remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1
|
|
@ -0,0 +1,122 @@
|
|||
;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
|
||||
|
||||
;; Check baseline inline decisions
|
||||
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
|
||||
|
||||
;; Check replay inline decisions
|
||||
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
|
||||
|
||||
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
|
||||
|
||||
define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
|
||||
entry:
|
||||
%x.addr = alloca i32, align 4
|
||||
%y.addr = alloca i32, align 4
|
||||
store i32 %x, i32* %x.addr, align 4
|
||||
store i32 %y, i32* %y.addr, align 4
|
||||
%tmp = load i32, i32* %x.addr, align 4, !dbg !8
|
||||
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
|
||||
%add = add nsw i32 %tmp, %tmp1, !dbg !8
|
||||
%tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
|
||||
%tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
|
||||
%call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
|
||||
ret i32 %add, !dbg !8
|
||||
}
|
||||
|
||||
define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
|
||||
entry:
|
||||
%x.addr = alloca i32, align 4
|
||||
%y.addr = alloca i32, align 4
|
||||
store i32 %x, i32* %x.addr, align 4
|
||||
store i32 %y, i32* %y.addr, align 4
|
||||
%tmp = load i32, i32* %x.addr, align 4, !dbg !10
|
||||
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
|
||||
%add = sub nsw i32 %tmp, %tmp1, !dbg !10
|
||||
ret i32 %add, !dbg !11
|
||||
}
|
||||
|
||||
define i32 @main() #0 !dbg !12 {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
%s = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 0, i32* %retval
|
||||
store i32 0, i32* %i, align 4, !dbg !13
|
||||
br label %while.cond, !dbg !14
|
||||
|
||||
while.cond: ; preds = %if.end, %entry
|
||||
%tmp = load i32, i32* %i, align 4, !dbg !15
|
||||
%inc = add nsw i32 %tmp, 1, !dbg !15
|
||||
store i32 %inc, i32* %i, align 4, !dbg !15
|
||||
%cmp = icmp slt i32 %tmp, 400000000, !dbg !15
|
||||
br i1 %cmp, label %while.body, label %while.end, !dbg !15
|
||||
|
||||
while.body: ; preds = %while.cond
|
||||
%tmp1 = load i32, i32* %i, align 4, !dbg !17
|
||||
%cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
|
||||
br i1 %cmp1, label %if.then, label %if.else, !dbg !17
|
||||
|
||||
if.then: ; preds = %while.body
|
||||
%tmp2 = load i32, i32* %i, align 4, !dbg !19
|
||||
%tmp3 = load i32, i32* %s, align 4, !dbg !19
|
||||
%call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
|
||||
store i32 %call, i32* %s, align 4, !dbg !19
|
||||
br label %if.end, !dbg !19
|
||||
|
||||
if.else: ; preds = %while.body
|
||||
store i32 30, i32* %s, align 4, !dbg !21
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
br label %while.cond, !dbg !23
|
||||
|
||||
while.end: ; preds = %while.cond
|
||||
%tmp4 = load i32, i32* %s, align 4, !dbg !25
|
||||
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
|
||||
ret i32 0, !dbg !26
|
||||
}
|
||||
|
||||
declare i32 @printf(i8*, ...)
|
||||
|
||||
attributes #0 = { "use-sample-profile" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4}
|
||||
!llvm.ident = !{!5}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
|
||||
!1 = !DIFile(filename: "calls.cc", directory: ".")
|
||||
!2 = !{}
|
||||
!3 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!4 = !{i32 1, !"Debug Info Version", i32 3}
|
||||
!5 = !{!"clang version 3.5 "}
|
||||
!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
||||
!7 = !DISubroutineType(types: !2)
|
||||
!8 = !DILocation(line: 4, scope: !6)
|
||||
!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
||||
!10 = !DILocation(line: 20, scope: !9)
|
||||
!11 = !DILocation(line: 21, scope: !9)
|
||||
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
||||
!13 = !DILocation(line: 8, scope: !12)
|
||||
!14 = !DILocation(line: 9, scope: !12)
|
||||
!15 = !DILocation(line: 9, scope: !16)
|
||||
!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
|
||||
!17 = !DILocation(line: 10, scope: !18)
|
||||
!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
|
||||
!19 = !DILocation(line: 10, scope: !20)
|
||||
!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
|
||||
!21 = !DILocation(line: 10, scope: !22)
|
||||
!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
|
||||
!23 = !DILocation(line: 10, scope: !24)
|
||||
!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
|
||||
!25 = !DILocation(line: 11, scope: !12)
|
||||
!26 = !DILocation(line: 12, scope: !12)
|
||||
|
||||
|
||||
; DEFAULT: _Z3sumii inlined into main
|
||||
; DEFAULT: _Z3subii inlined into _Z3sumii
|
||||
; DEFAULT-NOT: _Z3subii inlined into main
|
||||
|
||||
; REPLAY: _Z3sumii inlined into main
|
||||
; REPLAY: _Z3subii inlined into main
|
||||
; REPLA-NOT: _Z3subii inlined into _Z3sumii
|
Loading…
Reference in New Issue