[InlineAdvisor] New inliner advisor to replay inlining from optimization remarks

Summary:
This change added a new inline advisor that takes optimization remarks for previous inlining as input, and provide the decision as advice so current inlining can replay inline decision of a different compilation. Dwarf inline stack with line and discriminator is used as anchor for call sites. The change can be useful for Inliner tuning.
A switch -sample-profile-inline-replay=<inline_remarks_file> is added to hook up the new inliner advisor with SampleProfileLoader's inline decision for replay. The new inline advisor can also be used by regular CGSCC inliner later if needed.

Reviewers: davidxl, mtrofin, wmi, hoy

Subscribers: aprantl, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83743
This commit is contained in:
Wenlei He 2020-07-13 22:12:28 -07:00
parent ec6ada6264
commit 029946b112
8 changed files with 284 additions and 4 deletions

View File

@ -226,6 +226,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
bool ForProfileContext = false,
const char *PassName = nullptr);
/// get call site location as string
StringRef getCallSiteLocation(DebugLoc DLoc);
/// Add location info to ORE message.
void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);

View File

@ -0,0 +1,37 @@
//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_REPLAYINLINEADVISOR_H_
#define LLVM_REPLAYINLINEADVISOR_H_
#include "llvm/ADT/StringSet.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/IR/LLVMContext.h"
namespace llvm {
class BasicBlock;
class CallBase;
class Function;
class Module;
class OptimizationRemarkEmitter;
/// Replay inline advisor that uses optimization remarks from inlining of
/// previous build to guide current inlining. This is useful for inliner tuning.
class ReplayInlineAdvisor : public InlineAdvisor {
public:
ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
StringRef RemarksFile);
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
private:
StringSet<> InlineSitesFromRemarks;
bool HasReplayRemarks = false;
};
} // namespace llvm
#endif // LLVM_REPLAYINLINEADVISOR_H_

View File

@ -117,6 +117,7 @@ add_llvm_component_library(LLVMAnalysis
RegionInfo.cpp
RegionPass.cpp
RegionPrinter.cpp
ReplayInlineAdvisor.cpp
ScalarEvolution.cpp
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionDivision.cpp

View File

@ -365,6 +365,31 @@ llvm::shouldInline(CallBase &CB,
return IC;
}
StringRef llvm::getCallSiteLocation(DebugLoc DLoc) {
std::ostringstream CallSiteLoc;
bool First = true;
for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
if (!First)
CallSiteLoc << " @ ";
// Note that negative line offset is actually possible, but we use
// unsigned int to match line offset representation in remarks so
// it's directly consumable by relay advisor.
uint32_t Offset =
DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
uint32_t Discriminator = DIL->getBaseDiscriminator();
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
if (Discriminator) {
CallSiteLoc << "." << llvm::utostr(Discriminator);
}
First = false;
}
return CallSiteLoc.str();
}
void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
if (!DLoc.get())
return;

View File

@ -0,0 +1,61 @@
//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements ReplayInlineAdvisor that replays inline decision based
// on previous inline remarks from optimization remark log.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/LineIterator.h"
using namespace llvm;
#define DEBUG_TYPE "inline-replay"
ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
LLVMContext &Context,
StringRef RemarksFile)
: InlineAdvisor(FAM), HasReplayRemarks(false) {
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
std::error_code EC = BufferOrErr.getError();
if (EC) {
Context.emitError("Could not open remarks file: " + EC.message());
return;
}
// Example for inline remarks to parse:
// _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
// We use the callsite string after `at callsite` to replay inlining.
line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
for (; !LineIt.is_at_eof(); ++LineIt) {
StringRef Line = *LineIt;
auto Pair = Line.split(" at callsite ");
if (Pair.second.empty())
continue;
InlineSitesFromRemarks.insert(Pair.second);
}
HasReplayRemarks = true;
}
std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
assert(HasReplayRemarks);
Function &Caller = *CB.getCaller();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
if (InlineSitesFromRemarks.empty())
return std::make_unique<InlineAdvice>(this, CB, ORE, false);
StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
}

View File

@ -43,6 +43,7 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
@ -170,6 +171,13 @@ static cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
static cl::opt<std::string> ProfileInlineReplayFile(
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
"Optimization remarks file containing inline remarks to be replayed "
"by inlining from sample profile loader."),
cl::Hidden);
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@ -319,7 +327,7 @@ public:
RemappingFilename(std::string(RemapName)),
IsThinLTOPreLink(IsThinLTOPreLink) {}
bool doInitialization(Module &M);
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummaryInfo *_PSI, CallGraph *CG);
@ -473,6 +481,9 @@ protected:
// overriden by -profile-sample-accurate or profile-sample-accurate
// attribute.
bool ProfAccForSymsInList;
// External inline advisor used to replay inline decision from remarks.
std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@ -898,6 +909,16 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
}
bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
if (ExternalInlineAdvisor) {
auto Advice = ExternalInlineAdvisor->getAdvice(CB);
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
return false;
}
// Dummy record, we don't use it for replay.
Advice->recordInlining();
}
Function *CalledFunction = CB.getCalledFunction();
assert(CalledFunction);
DebugLoc DLoc = CB.getDebugLoc();
@ -1005,7 +1026,7 @@ bool SampleProfileLoader::inlineHotFunctions(
}
}
}
if (Hot) {
if (Hot || ExternalInlineAdvisor) {
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
} else {
@ -1818,7 +1839,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
return FunctionOrderList;
}
bool SampleProfileLoader::doInitialization(Module &M) {
bool SampleProfileLoader::doInitialization(Module &M,
FunctionAnalysisManager *FAM) {
auto &Ctx = M.getContext();
std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@ -1843,6 +1865,13 @@ bool SampleProfileLoader::doInitialization(Module &M) {
NamesInProfile.insert(NameTable->begin(), NameTable->end());
}
if (FAM && !ProfileInlineReplayFile.empty()) {
ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
*FAM, Ctx, ProfileInlineReplayFile);
if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
ExternalInlineAdvisor.reset();
}
return true;
}
@ -1995,7 +2024,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
: ProfileRemappingFileName,
IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
if (!SampleLoader.doInitialization(M))
if (!SampleLoader.doInitialization(M, &FAM))
return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);

View File

@ -0,0 +1,2 @@
remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1

View File

@ -0,0 +1,122 @@
;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
;; Check baseline inline decisions
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
;; Check replay inline decisions
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%tmp = load i32, i32* %x.addr, align 4, !dbg !8
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
%add = add nsw i32 %tmp, %tmp1, !dbg !8
%tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
%tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
%call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
ret i32 %add, !dbg !8
}
define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%tmp = load i32, i32* %x.addr, align 4, !dbg !10
%tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
%add = sub nsw i32 %tmp, %tmp1, !dbg !10
ret i32 %add, !dbg !11
}
define i32 @main() #0 !dbg !12 {
entry:
%retval = alloca i32, align 4
%s = alloca i32, align 4
%i = alloca i32, align 4
store i32 0, i32* %retval
store i32 0, i32* %i, align 4, !dbg !13
br label %while.cond, !dbg !14
while.cond: ; preds = %if.end, %entry
%tmp = load i32, i32* %i, align 4, !dbg !15
%inc = add nsw i32 %tmp, 1, !dbg !15
store i32 %inc, i32* %i, align 4, !dbg !15
%cmp = icmp slt i32 %tmp, 400000000, !dbg !15
br i1 %cmp, label %while.body, label %while.end, !dbg !15
while.body: ; preds = %while.cond
%tmp1 = load i32, i32* %i, align 4, !dbg !17
%cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
br i1 %cmp1, label %if.then, label %if.else, !dbg !17
if.then: ; preds = %while.body
%tmp2 = load i32, i32* %i, align 4, !dbg !19
%tmp3 = load i32, i32* %s, align 4, !dbg !19
%call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
store i32 %call, i32* %s, align 4, !dbg !19
br label %if.end, !dbg !19
if.else: ; preds = %while.body
store i32 30, i32* %s, align 4, !dbg !21
br label %if.end
if.end: ; preds = %if.else, %if.then
br label %while.cond, !dbg !23
while.end: ; preds = %while.cond
%tmp4 = load i32, i32* %s, align 4, !dbg !25
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
ret i32 0, !dbg !26
}
declare i32 @printf(i8*, ...)
attributes #0 = { "use-sample-profile" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!"clang version 3.5 "}
!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!7 = !DISubroutineType(types: !2)
!8 = !DILocation(line: 4, scope: !6)
!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!10 = !DILocation(line: 20, scope: !9)
!11 = !DILocation(line: 21, scope: !9)
!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
!13 = !DILocation(line: 8, scope: !12)
!14 = !DILocation(line: 9, scope: !12)
!15 = !DILocation(line: 9, scope: !16)
!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
!17 = !DILocation(line: 10, scope: !18)
!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
!19 = !DILocation(line: 10, scope: !20)
!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
!21 = !DILocation(line: 10, scope: !22)
!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
!23 = !DILocation(line: 10, scope: !24)
!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
!25 = !DILocation(line: 11, scope: !12)
!26 = !DILocation(line: 12, scope: !12)
; DEFAULT: _Z3sumii inlined into main
; DEFAULT: _Z3subii inlined into _Z3sumii
; DEFAULT-NOT: _Z3subii inlined into main
; REPLAY: _Z3sumii inlined into main
; REPLAY: _Z3subii inlined into main
; REPLA-NOT: _Z3subii inlined into _Z3sumii