[ThinLTOCodeGenerator] Add support for index-based WPD

Differential revision: https://reviews.llvm.org/D68950

llvm-svn: 375219
This commit is contained in:
Eugene Leviant 2019-10-18 10:54:14 +00:00
parent b38f577c01
commit eb34c3e8a4
2 changed files with 104 additions and 21 deletions

View File

@ -53,6 +53,7 @@
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
@ -225,7 +226,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
}
static void optimizeModule(Module &TheModule, TargetMachine &TM,
unsigned OptLevel, bool Freestanding) {
unsigned OptLevel, bool Freestanding,
ModuleSummaryIndex *Index) {
// Populate the PassManager
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
@ -239,6 +241,7 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
// Already did this in verifyLoadedModule().
PMB.VerifyInput = false;
PMB.VerifyOutput = false;
PMB.ImportSummary = Index;
legacy::PassManager PM;
@ -433,7 +436,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
}
optimizeModule(TheModule, TM, OptLevel, Freestanding);
optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index);
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
@ -577,29 +580,36 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
return CombinedIndex;
}
static void internalizeAndPromoteInIndex(
const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
&PrevailingCopy,
ModuleSummaryIndex &Index) {
auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
struct IsExported {
const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
: ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const {
const auto &ExportList = ExportLists.find(ModuleIdentifier);
return (ExportList != ExportLists.end() &&
ExportList->second.count(GUID)) ||
GUIDPreservedSymbols.count(GUID);
};
}
};
auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
struct IsPrevailing {
const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy;
IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
&PrevailingCopy)
: PrevailingCopy(PrevailingCopy) {}
bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const {
const auto &Prevailing = PrevailingCopy.find(GUID);
// Not in map means that there was only one copy, which must be prevailing.
if (Prevailing == PrevailingCopy.end())
return true;
return Prevailing->second == S;
};
thinLTOInternalizeAndPromoteInIndex(Index, isExported, isPrevailing);
}
};
static void computeDeadSymbolsInIndex(
ModuleSummaryIndex &Index,
@ -656,8 +666,9 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
// Promote the exported values in the index, so that they are promoted
// in the module.
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
PrevailingCopy, Index);
thinLTOInternalizeAndPromoteInIndex(
Index, IsExported(ExportLists, GUIDPreservedSymbols),
IsPrevailing(PrevailingCopy));
promoteModule(TheModule, Index);
}
@ -814,8 +825,9 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
// Promote the exported values in the index, so that they are promoted
// in the module.
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
PrevailingCopy, Index);
thinLTOInternalizeAndPromoteInIndex(
Index, IsExported(ExportLists, GUIDPreservedSymbols),
IsPrevailing(PrevailingCopy));
promoteModule(TheModule, Index);
@ -834,7 +846,8 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
// Optimize now
optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding,
nullptr);
}
/// Write out the generated object file, either from CacheEntryPath or from
@ -955,6 +968,15 @@ void ThinLTOCodeGenerator::run() {
// Synthesize entry counts for functions in the combined index.
computeSyntheticCounts(*Index);
// Perform index-based WPD. This will return immediately if there are
// no index entries in the typeIdMetadata map (e.g. if we are instead
// performing IR-based WPD in hybrid regular/thin LTO mode).
std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
std::set<GlobalValue::GUID> ExportedGUIDs;
runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap);
for (auto GUID : ExportedGUIDs)
GUIDPreservedSymbols.insert(GUID);
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
@ -979,8 +1001,12 @@ void ThinLTOCodeGenerator::run() {
// Use global summary-based analysis to identify symbols that can be
// internalized (because they aren't exported or preserved as per callback).
// Changes are made in the index, consumed in the ThinLTO backends.
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
PrevailingCopy, *Index);
updateIndexWPDForExports(*Index,
IsExported(ExportLists, GUIDPreservedSymbols),
LocalWPDTargetsMap);
thinLTOInternalizeAndPromoteInIndex(
*Index, IsExported(ExportLists, GUIDPreservedSymbols),
IsPrevailing(PrevailingCopy));
// Make sure that every module has an entry in the ExportLists, ImportList,
// GVSummary and ResolvedODR maps to enable threaded access to these maps

View File

@ -0,0 +1,57 @@
; REQUIRES: x86-registered-target
; Test devirtualization requiring promotion of local targets, where the
; promotion is required by one devirtualization and needs to be updated
; for a second devirtualization in the defining module as a post-pass
; update.
; Generate unsplit module with summary for ThinLTO index-based WPD.
; RUN: opt -thinlto-bc -o %t3.o %s
; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll
; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \
; RUN: --pass-remarks=. \
; RUN: --exported-symbol=test \
; RUN: --exported-symbol=test2 \
; RUN: --exported-symbol=_ZTV1B 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
; We should devirt call to _ZN1A1nEi once in importing module and once
; in original (exporting) module.
; REMARK-COUNT-2: single-impl: devirtualized a call to _ZN1A1nEi.llvm.
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
%struct.A = type { i32 (...)** }
; CHECK-IR1-LABEL: define i32 @test
define i32 @test(%struct.A* %obj, i32 %a) {
entry:
%0 = bitcast %struct.A* %obj to i8***
%vtable = load i8**, i8*** %0
%1 = bitcast i8** %vtable to i8*
%p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr i8*, i8** %vtable, i32 1
%2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
%fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
; Check that the call was devirtualized.
; CHECK-IR1: %call = tail call i32 bitcast (void ()* @_ZN1A1nEi
%call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
ret i32 %call
}
; CHECK-IR1-LABEL: ret i32
; CHECK-IR1-LABEL: }
; CHECK-IR2: define i32 @test2
; Check that the call was devirtualized.
; CHECK-IR2: %call4 = tail call i32 @_ZN1A1nEi
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
attributes #0 = { noinline optnone }