forked from OSchip/llvm-project
[ThinLTOCodeGenerator] Add support for index-based WPD
Differential revision: https://reviews.llvm.org/D68950 llvm-svn: 375219
This commit is contained in:
parent
b38f577c01
commit
eb34c3e8a4
|
@ -53,6 +53,7 @@
|
|||
#include "llvm/Transforms/IPO/FunctionImport.h"
|
||||
#include "llvm/Transforms/IPO/Internalize.h"
|
||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
|
||||
#include "llvm/Transforms/ObjCARC.h"
|
||||
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
|
||||
|
||||
|
@ -225,7 +226,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
|
|||
}
|
||||
|
||||
static void optimizeModule(Module &TheModule, TargetMachine &TM,
|
||||
unsigned OptLevel, bool Freestanding) {
|
||||
unsigned OptLevel, bool Freestanding,
|
||||
ModuleSummaryIndex *Index) {
|
||||
// Populate the PassManager
|
||||
PassManagerBuilder PMB;
|
||||
PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
|
||||
|
@ -239,6 +241,7 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
|
|||
// Already did this in verifyLoadedModule().
|
||||
PMB.VerifyInput = false;
|
||||
PMB.VerifyOutput = false;
|
||||
PMB.ImportSummary = Index;
|
||||
|
||||
legacy::PassManager PM;
|
||||
|
||||
|
@ -433,7 +436,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
|
|||
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
|
||||
}
|
||||
|
||||
optimizeModule(TheModule, TM, OptLevel, Freestanding);
|
||||
optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index);
|
||||
|
||||
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
|
||||
|
||||
|
@ -577,29 +580,36 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
|
|||
return CombinedIndex;
|
||||
}
|
||||
|
||||
static void internalizeAndPromoteInIndex(
|
||||
const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
|
||||
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
|
||||
const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
|
||||
&PrevailingCopy,
|
||||
ModuleSummaryIndex &Index) {
|
||||
auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
|
||||
struct IsExported {
|
||||
const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
|
||||
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
|
||||
|
||||
IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
|
||||
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
|
||||
: ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
|
||||
|
||||
bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const {
|
||||
const auto &ExportList = ExportLists.find(ModuleIdentifier);
|
||||
return (ExportList != ExportLists.end() &&
|
||||
ExportList->second.count(GUID)) ||
|
||||
GUIDPreservedSymbols.count(GUID);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
|
||||
struct IsPrevailing {
|
||||
const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy;
|
||||
IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
|
||||
&PrevailingCopy)
|
||||
: PrevailingCopy(PrevailingCopy) {}
|
||||
|
||||
bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const {
|
||||
const auto &Prevailing = PrevailingCopy.find(GUID);
|
||||
// Not in map means that there was only one copy, which must be prevailing.
|
||||
if (Prevailing == PrevailingCopy.end())
|
||||
return true;
|
||||
return Prevailing->second == S;
|
||||
};
|
||||
|
||||
thinLTOInternalizeAndPromoteInIndex(Index, isExported, isPrevailing);
|
||||
}
|
||||
};
|
||||
|
||||
static void computeDeadSymbolsInIndex(
|
||||
ModuleSummaryIndex &Index,
|
||||
|
@ -656,8 +666,9 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
|
|||
|
||||
// Promote the exported values in the index, so that they are promoted
|
||||
// in the module.
|
||||
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
|
||||
PrevailingCopy, Index);
|
||||
thinLTOInternalizeAndPromoteInIndex(
|
||||
Index, IsExported(ExportLists, GUIDPreservedSymbols),
|
||||
IsPrevailing(PrevailingCopy));
|
||||
|
||||
promoteModule(TheModule, Index);
|
||||
}
|
||||
|
@ -814,8 +825,9 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
|
|||
|
||||
// Promote the exported values in the index, so that they are promoted
|
||||
// in the module.
|
||||
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
|
||||
PrevailingCopy, Index);
|
||||
thinLTOInternalizeAndPromoteInIndex(
|
||||
Index, IsExported(ExportLists, GUIDPreservedSymbols),
|
||||
IsPrevailing(PrevailingCopy));
|
||||
|
||||
promoteModule(TheModule, Index);
|
||||
|
||||
|
@ -834,7 +846,8 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
|
|||
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
|
||||
|
||||
// Optimize now
|
||||
optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
|
||||
optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
/// Write out the generated object file, either from CacheEntryPath or from
|
||||
|
@ -955,6 +968,15 @@ void ThinLTOCodeGenerator::run() {
|
|||
// Synthesize entry counts for functions in the combined index.
|
||||
computeSyntheticCounts(*Index);
|
||||
|
||||
// Perform index-based WPD. This will return immediately if there are
|
||||
// no index entries in the typeIdMetadata map (e.g. if we are instead
|
||||
// performing IR-based WPD in hybrid regular/thin LTO mode).
|
||||
std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
|
||||
std::set<GlobalValue::GUID> ExportedGUIDs;
|
||||
runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap);
|
||||
for (auto GUID : ExportedGUIDs)
|
||||
GUIDPreservedSymbols.insert(GUID);
|
||||
|
||||
// Collect the import/export lists for all modules from the call-graph in the
|
||||
// combined index.
|
||||
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
|
||||
|
@ -979,8 +1001,12 @@ void ThinLTOCodeGenerator::run() {
|
|||
// Use global summary-based analysis to identify symbols that can be
|
||||
// internalized (because they aren't exported or preserved as per callback).
|
||||
// Changes are made in the index, consumed in the ThinLTO backends.
|
||||
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols,
|
||||
PrevailingCopy, *Index);
|
||||
updateIndexWPDForExports(*Index,
|
||||
IsExported(ExportLists, GUIDPreservedSymbols),
|
||||
LocalWPDTargetsMap);
|
||||
thinLTOInternalizeAndPromoteInIndex(
|
||||
*Index, IsExported(ExportLists, GUIDPreservedSymbols),
|
||||
IsPrevailing(PrevailingCopy));
|
||||
|
||||
// Make sure that every module has an entry in the ExportLists, ImportList,
|
||||
// GVSummary and ResolvedODR maps to enable threaded access to these maps
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
; REQUIRES: x86-registered-target
|
||||
|
||||
; Test devirtualization requiring promotion of local targets, where the
|
||||
; promotion is required by one devirtualization and needs to be updated
|
||||
; for a second devirtualization in the defining module as a post-pass
|
||||
; update.
|
||||
|
||||
; Generate unsplit module with summary for ThinLTO index-based WPD.
|
||||
; RUN: opt -thinlto-bc -o %t3.o %s
|
||||
; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll
|
||||
|
||||
; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \
|
||||
; RUN: --pass-remarks=. \
|
||||
; RUN: --exported-symbol=test \
|
||||
; RUN: --exported-symbol=test2 \
|
||||
; RUN: --exported-symbol=_ZTV1B 2>&1 | FileCheck %s --check-prefix=REMARK
|
||||
; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
|
||||
; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
|
||||
|
||||
; We should devirt call to _ZN1A1nEi once in importing module and once
|
||||
; in original (exporting) module.
|
||||
; REMARK-COUNT-2: single-impl: devirtualized a call to _ZN1A1nEi.llvm.
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-grtev4-linux-gnu"
|
||||
|
||||
%struct.A = type { i32 (...)** }
|
||||
|
||||
; CHECK-IR1-LABEL: define i32 @test
|
||||
define i32 @test(%struct.A* %obj, i32 %a) {
|
||||
entry:
|
||||
%0 = bitcast %struct.A* %obj to i8***
|
||||
%vtable = load i8**, i8*** %0
|
||||
%1 = bitcast i8** %vtable to i8*
|
||||
%p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
|
||||
call void @llvm.assume(i1 %p)
|
||||
%fptrptr = getelementptr i8*, i8** %vtable, i32 1
|
||||
%2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
|
||||
%fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
|
||||
|
||||
; Check that the call was devirtualized.
|
||||
; CHECK-IR1: %call = tail call i32 bitcast (void ()* @_ZN1A1nEi
|
||||
%call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
|
||||
|
||||
ret i32 %call
|
||||
}
|
||||
; CHECK-IR1-LABEL: ret i32
|
||||
; CHECK-IR1-LABEL: }
|
||||
|
||||
; CHECK-IR2: define i32 @test2
|
||||
; Check that the call was devirtualized.
|
||||
; CHECK-IR2: %call4 = tail call i32 @_ZN1A1nEi
|
||||
|
||||
declare i1 @llvm.type.test(i8*, metadata)
|
||||
declare void @llvm.assume(i1)
|
||||
|
||||
attributes #0 = { noinline optnone }
|
Loading…
Reference in New Issue