[LTO/WPD] Enable aggressive WPD under LTO option

Summary:
Third part in series to support Safe Whole Program Devirtualization
Enablement, see RFC here:
http://lists.llvm.org/pipermail/llvm-dev/2019-December/137543.html

This patch adds type test metadata under -fwhole-program-vtables,
even for classes without hidden visibility. It then changes WPD to skip
devirtualization for a virtual function call when any of the compatible
vtables has public vcall visibility.

Additionally, internal LLVM options as well as lld and gold-plugin
options are added which enable upgrading all public vcall visibility
to linkage unit (hidden) visibility during LTO. This enables the more
aggressive WPD to kick in based on LTO time knowledge of the visibility
guarantees.

Support was added to all flavors of LTO WPD (regular, hybrid and
index-only), and to both the new and old LTO APIs.

Unfortunately it was not simple to split the first and second parts of
this part of the change (the unconditional emission of type tests and
the upgrading of the vcall visiblity) as I needed a way to upgrade the
public visibility on legacy WPD llvm assembly tests that don't include
linkage unit vcall visibility specifiers, to avoid a lot of test churn.

I also added a mechanism to LowerTypeTests that allows dropping type
test assume sequences we now aggressively insert when we invoke
distributed ThinLTO backends with null indexes, which is used in testing
mode, and which doesn't invoke the normal ThinLTO backend pipeline.

Depends on D71907 and D71911.

Reviewers: pcc, evgeny777, steven_wu, espindola

Subscribers: emaste, Prazek, inglorion, arichardson, hiraditya, MaskRay, dexonsmith, dang, davidxl, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D71913
This commit is contained in:
Teresa Johnson 2019-12-26 11:40:18 -08:00
parent 9e66c4ec12
commit 59733525d3
71 changed files with 1046 additions and 93 deletions

View File

@ -51,6 +51,7 @@
#include "llvm/Transforms/Coroutines.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
@ -553,6 +554,16 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
std::unique_ptr<TargetLibraryInfoImpl> TLII(
createTLII(TargetTriple, CodeGenOpts));
// If we reached here with a non-empty index file name, then the index file
// was empty and we are not performing ThinLTO backend compilation (used in
// testing in a distributed build environment). Drop any the type test
// assume sequences inserted for whole program vtables so that codegen doesn't
// complain.
if (!CodeGenOpts.ThinLTOIndexFile.empty())
MPM.add(createLowerTypeTestsPass(/*ExportSummary=*/nullptr,
/*ImportSummary=*/nullptr,
/*DropTypeTests=*/true));
PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts);
// At O0 and O1 we only run the always inliner which is more efficient. At
@ -1114,6 +1125,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
bool IsLTO = CodeGenOpts.PrepareForLTO;
if (CodeGenOpts.OptimizationLevel == 0) {
// If we reached here with a non-empty index file name, then the index
// file was empty and we are not performing ThinLTO backend compilation
// (used in testing in a distributed build environment). Drop any the type
// test assume sequences inserted for whole program vtables so that
// codegen doesn't complain.
if (!CodeGenOpts.ThinLTOIndexFile.empty())
MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr,
/*ImportSummary=*/nullptr,
/*DropTypeTests=*/true));
if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
MPM.addPass(GCOVProfilerPass(*Options));
if (Optional<InstrProfOptions> Options =
@ -1150,6 +1170,18 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// configure the pipeline.
PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts);
// If we reached here with a non-empty index file name, then the index
// file was empty and we are not performing ThinLTO backend compilation
// (used in testing in a distributed build environment). Drop any the type
// test assume sequences inserted for whole program vtables so that
// codegen doesn't complain.
if (!CodeGenOpts.ThinLTOIndexFile.empty())
PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) {
MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr,
/*ImportSummary=*/nullptr,
/*DropTypeTests=*/true));
});
PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) {
MPM.addPass(createModuleToFunctionPassAdaptor(
EntryExitInstrumenterPass(/*PostInlining=*/false)));

View File

@ -2641,7 +2641,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
if (SanOpts.has(SanitizerKind::CFIVCall))
EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
else if (CGM.getCodeGenOpts().WholeProgramVTables &&
CGM.HasHiddenLTOVisibility(RD)) {
// Don't insert type test assumes if we are forcing public std
// visibility.
!CGM.HasLTOVisibilityPublicStd(RD)) {
llvm::Metadata *MD =
CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
llvm::Value *TypeId =

View File

@ -1011,6 +1011,26 @@ void CodeGenModule::EmitDeferredVTables() {
DeferredVTables.clear();
}
bool CodeGenModule::HasLTOVisibilityPublicStd(const CXXRecordDecl *RD) {
if (!getCodeGenOpts().LTOVisibilityPublicStd)
return false;
const DeclContext *DC = RD;
while (1) {
auto *D = cast<Decl>(DC);
DC = DC->getParent();
if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
if (auto *ND = dyn_cast<NamespaceDecl>(D))
if (const IdentifierInfo *II = ND->getIdentifier())
if (II->isStr("std") || II->isStr("stdext"))
return true;
break;
}
}
return false;
}
bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
LinkageInfo LV = RD->getLinkageAndVisibility();
if (!isExternallyVisible(LV.getLinkage()))
@ -1027,22 +1047,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
return false;
}
if (getCodeGenOpts().LTOVisibilityPublicStd) {
const DeclContext *DC = RD;
while (1) {
auto *D = cast<Decl>(DC);
DC = DC->getParent();
if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
if (auto *ND = dyn_cast<NamespaceDecl>(D))
if (const IdentifierInfo *II = ND->getIdentifier())
if (II->isStr("std") || II->isStr("stdext"))
return false;
break;
}
}
}
return true;
return !HasLTOVisibilityPublicStd(RD);
}
llvm::GlobalObject::VCallVisibility

View File

@ -1292,6 +1292,11 @@ public:
/// optimization.
bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
/// Returns whether the given record has public std LTO visibility
/// and therefore may not participate in (single-module) CFI and whole-program
/// vtable optimization.
bool HasLTOVisibilityPublicStd(const CXXRecordDecl *RD);
/// Returns the vcall visibility of the given type. This is the scope in which
/// a virtual function call could be made which ends up being dispatched to a
/// member function of this class. This scope can be wider than the visibility

View File

@ -670,6 +670,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
CGM.HasHiddenLTOVisibility(RD);
bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination &&
CGM.HasHiddenLTOVisibility(RD);
bool ShouldEmitWPDInfo =
CGM.getCodeGenOpts().WholeProgramVTables &&
// Don't insert type tests if we are forcing public std visibility.
!CGM.HasLTOVisibilityPublicStd(RD);
llvm::Value *VirtualFn = nullptr;
{
@ -677,8 +681,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
llvm::Value *TypeId = nullptr;
llvm::Value *CheckResult = nullptr;
if (ShouldEmitCFICheck || ShouldEmitVFEInfo) {
// If doing CFI or VFE, we will need the metadata node to check against.
if (ShouldEmitCFICheck || ShouldEmitVFEInfo || ShouldEmitWPDInfo) {
// If doing CFI, VFE or WPD, we will need the metadata node to check
// against.
llvm::Metadata *MD =
CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0));
TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD);
@ -702,7 +707,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
} else {
// When not doing VFE, emit a normal load, as it allows more
// optimisations than type.checked.load.
if (ShouldEmitCFICheck) {
if (ShouldEmitCFICheck || ShouldEmitWPDInfo) {
CheckResult = Builder.CreateCall(
CGM.getIntrinsic(llvm::Intrinsic::type_test),
{Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId});
@ -713,7 +718,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
"memptr.virtualfn");
}
assert(VirtualFn && "Virtual fuction pointer not created!");
assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) &&
assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || !ShouldEmitWPDInfo ||
CheckResult) &&
"Check result required but not created!");
if (ShouldEmitCFICheck) {

View File

@ -8,6 +8,7 @@
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
; RUN: llvm-lto2 run -thinlto-distributed-indexes %t.o \
; RUN: -whole-program-visibility \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t2.index \
; RUN: -r=%t.o,test,px \

View File

@ -1,5 +1,8 @@
// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-mfcall -fsanitize-trap=cfi-mfcall -fvisibility hidden -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-mfcall -fsanitize-trap=cfi-mfcall -fvisibility default -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s
// With -fwhole-program-vtables we should get the member function pointer type
// test, even without hidden visibility.
// RUN: %clang_cc1 -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck %s --check-prefix=WPV
struct B1 {};
struct B2 {};
@ -9,6 +12,9 @@ struct S : B1, B3 {};
// DEFAULT-NOT: llvm.type.test
void f(S *s, void (S::*p)()) {
// WPV: [[OFFSET:%.*]] = sub i64 {{.*}}, 1
// WPV: [[VFPTR:%.*]] = getelementptr i8, i8* %{{.*}}, i64 [[OFFSET]]
// WPV: [[TT:%.*]] = call i1 @llvm.type.test(i8* [[VFPTR]], metadata !"_ZTSM1SFvvE.virtual")
// CHECK: [[OFFSET:%.*]] = sub i64 {{.*}}, 1
// CHECK: [[VFPTR:%.*]] = getelementptr i8, i8* %{{.*}}, i64 [[OFFSET]]
// CHECK: [[TT:%.*]] = call i1 @llvm.type.test(i8* [[VFPTR]], metadata !"_ZTSM1SFvvE.virtual")

View File

@ -70,20 +70,20 @@ void f(C1 *c1, C2 *c2, C3 *c3, C4 *c4, C5 *c5, C6 *c6, std::C7 *c7,
// ITANIUM: type.test{{.*}}!"_ZTS2C1"
// MS: type.test{{.*}}!"?AUC1@@"
c1->f();
// ITANIUM-NOT: type.test{{.*}}!"_ZTS2C2"
// ITANIUM: type.test{{.*}}!"_ZTS2C2"
// MS: type.test{{.*}}!"?AUC2@@"
c2->f();
// ITANIUM: type.test{{.*}}!"_ZTS2C3"
// MS-NOT: type.test{{.*}}!"?AUC3@@"
// MS: type.test{{.*}}!"?AUC3@@"
c3->f();
// ITANIUM: type.test{{.*}}!"_ZTS2C4"
// MS-NOT: type.test{{.*}}!"?AUC4@@"
// MS: type.test{{.*}}!"?AUC4@@"
c4->f();
// ITANIUM-NOT: type.test{{.*}}!"_ZTS2C5"
// MS-NOT: type.test{{.*}}!"?AUC5@@"
// ITANIUM: type.test{{.*}}!"_ZTS2C5"
// MS: type.test{{.*}}!"?AUC5@@"
c5->f();
// ITANIUM-NOT: type.test{{.*}}!"_ZTS2C6"
// MS-NOT: type.test{{.*}}!"?AUC6@@"
// ITANIUM: type.test{{.*}}!"_ZTS2C6"
// MS: type.test{{.*}}!"?AUC6@@"
c6->f();
// ITANIUM: type.test{{.*}}!"_ZTSSt2C7"
// MS-STD: type.test{{.*}}!"?AUC7@std@@"

View File

@ -0,0 +1,69 @@
// Test distributed ThinLTO backend handling of type tests
// REQUIRES: x86-registered-target
// Ensure that a distributed backend invocation of ThinLTO lowers the type test
// as expected.
// RUN: %clang_cc1 -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s
// RUN: llvm-dis %t.o -o - | FileCheck --check-prefix=TT %s
// RUN: llvm-lto -thinlto -o %t2 %t.o
// RUN: %clang -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj
// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s
// llvm-nm %t3.o | FileCheck --check-prefix=NM %s
// The pre-link bitcode produced by clang should contain a type test assume
// sequence.
// TT: [[TTREG:%[0-9]+]] = call i1 @llvm.type.test({{.*}}, metadata !"_ZTS1A")
// TT: void @llvm.assume(i1 [[TTREG]])
// The ThinLTO backend optimized bitcode should not have any type test assume
// sequences.
// OPT-NOT: @llvm.type.test
// OPT-NOT: call void @llvm.assume
// We should have only one @llvm.assume call, the one that was expanded
// from the builtin in the IR below, not the one fed by the type test.
// OPT: %cmp = icmp ne %struct.A* %0, null
// OPT: void @llvm.assume(i1 %cmp)
// Check after the builtin assume again that we don't have a type test assume
// sequence.
// OPT-NOT: @llvm.type.test
// OPT-NOT: call void @llvm.assume
// NM: T _Z2afP1A
// Also check type test are lowered when the distributed ThinLTO backend clang
// invocation is passed an empty index file, in which case a non-ThinLTO
// compilation pipeline is invoked. If not lowered then LLVM CodeGen may assert.
// RUN: touch %t4.thinlto.bc
// O2 old PM
// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj
// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s
// llvm-nm %t4.o | FileCheck --check-prefix=NM %s
// O2 new PM
// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -fexperimental-new-pass-manager -save-temps=obj
// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s
// llvm-nm %t4.o | FileCheck --check-prefix=NM %s
// O0 new PM
// RUN: %clang -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -fexperimental-new-pass-manager -save-temps=obj
// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s
// llvm-nm %t4.o | FileCheck --check-prefix=NM %s
struct A {
A();
virtual void f();
};
struct B : virtual A {
B();
};
A::A() {}
B::B() {}
void A::f() {
}
void af(A *a) {
__builtin_assume(a != 0);
a->f();
}

View File

@ -6,6 +6,7 @@
// Tests for the whole-program-vtables feature:
// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fvisibility hidden -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM --check-prefix=TT-ITANIUM %s
// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=ITANIUM-DEFAULTVIS --check-prefix=TT-ITANIUM %s
// RUN: %clang_cc1 -flto -flto-unit -triple x86_64-pc-windows-msvc -fwhole-program-vtables -emit-llvm -o - %s | FileCheck --check-prefix=VTABLE-OPT --check-prefix=MS --check-prefix=TT-MS %s
// Tests for cfi + whole-program-vtables:
@ -129,6 +130,7 @@ void D::h() {
}
// ITANIUM: define hidden void @_Z2afP1A
// ITANIUM-DEFAULTVIS: define void @_Z2afP1A
// MS: define dso_local void @"?af@@YAXPEAUA@@@Z"
void af(A *a) {
// TT-ITANIUM: [[P:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT:%[^ ]*]], metadata !"_ZTS1A")
@ -239,6 +241,7 @@ struct D : C {
};
// ITANIUM: define hidden void @_ZN5test21fEPNS_1DE
// ITANIUM-DEFAULTVIS: define void @_ZN5test21fEPNS_1DE
// MS: define dso_local void @"?f@test2@@YAXPEAUD@1@@Z"
void f(D *d) {
// TT-ITANIUM: {{%[^ ]*}} = call i1 @llvm.type.test(i8* {{%[^ ]*}}, metadata !"_ZTSN5test21DE")

View File

@ -165,6 +165,7 @@ struct Configuration {
bool ltoCSProfileGenerate;
bool ltoDebugPassManager;
bool ltoNewPassManager;
bool ltoWholeProgramVisibility;
bool mergeArmExidx;
bool mipsN32Abi = false;
bool mmapOutputFile;

View File

@ -899,6 +899,8 @@ static void readConfigs(opt::InputArgList &args) {
config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
config->ltoNewPassManager = args.hasArg(OPT_lto_new_pass_manager);
config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes);
config->ltoWholeProgramVisibility =
args.hasArg(OPT_lto_whole_program_visibility);
config->ltoo = args::getInteger(args, OPT_lto_O, 2);
config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);

View File

@ -111,6 +111,8 @@ static lto::Config createConfig() {
c.DebugPassManager = config->ltoDebugPassManager;
c.DwoDir = config->dwoDir;
c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility;
c.CSIRProfile = config->ltoCSProfileFile;
c.RunCSIRInstr = config->ltoCSProfileGenerate;

View File

@ -479,6 +479,8 @@ def lto_cs_profile_file: J<"lto-cs-profile-file=">,
def lto_obj_path_eq: J<"lto-obj-path=">;
def lto_sample_profile: J<"lto-sample-profile=">,
HelpText<"Sample profile file path">;
def lto_whole_program_visibility: F<"lto-whole-program-visibility">,
HelpText<"Asserts that the LTO link has whole program visibility">;
def disable_verify: F<"disable-verify">;
defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">;
def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">,

View File

@ -0,0 +1,127 @@
; Test that -lto-whole-program-visibility enables devirtualization.
; Index based WPD
; Generate unsplit module with summary for ThinLTO index-based WPD.
; RUN: opt -thinlto-bc -o %t2.o %s
; RUN: ld.lld %t2.o -o %t3 -save-temps -lto-whole-program-visibility \
; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Hybrid WPD
; Generate split module with summary for hybrid Thin/Regular LTO WPD.
; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
; RUN: ld.lld %t.o -o %t3 -save-temps -lto-whole-program-visibility \
; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Regular LTO WPD
; RUN: opt -o %t4.o %s
; RUN: ld.lld %t4.o -o %t3 -save-temps -lto-whole-program-visibility \
; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi
; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
; Try everything again but without -whole-program-visibility to confirm
; WPD fails
; Index based WPD
; RUN: ld.lld %t2.o -o %t3 -save-temps \
; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Hybrid WPD
; RUN: ld.lld %t.o -o %t3 -save-temps \
; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Regular LTO WPD
; RUN: ld.lld %t4.o -o %t3 -save-temps \
; RUN: -mllvm -pass-remarks=. --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
%struct.A = type { i32 (...)** }
%struct.B = type { %struct.A }
%struct.C = type { %struct.A }
%struct.D = type { i32 (...)** }
@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5
@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5
@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5
; CHECK-IR-LABEL: define dso_local i32 @_start
define i32 @_start(%struct.A* %obj, %struct.D* %obj2, i32 %a) {
entry:
%0 = bitcast %struct.A* %obj to i8***
%vtable = load i8**, i8*** %0
%1 = bitcast i8** %vtable to i8*
%p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr i8*, i8** %vtable, i32 1
%2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
%fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call = tail call i32 @_ZN1A1nEi
; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1
%call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
%3 = bitcast i8** %vtable to i32 (%struct.A*, i32)**
%fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8
; We still have to call it as virtual.
; CHECK-IR: %call3 = tail call i32 %fptr22
; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22
%call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call)
%4 = bitcast %struct.D* %obj2 to i8***
%vtable2 = load i8**, i8*** %4
%5 = bitcast i8** %vtable2 to i8*
%p2 = call i1 @llvm.type.test(i8* %5, metadata !4)
call void @llvm.assume(i1 %p2)
%6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
%fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi
; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33
%call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3)
ret i32 %call4
}
; CHECK-IR-LABEL: ret i32
; CHECK-IR-LABEL: }
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
ret i32 0;
}
; Make sure we don't inline or otherwise optimize out the direct calls.
attributes #0 = { noinline optnone }
!0 = !{i64 16, !"_ZTS1A"}
!1 = !{i64 16, !"_ZTS1B"}
!2 = !{i64 16, !"_ZTS1C"}
!3 = !{i64 16, !4}
!4 = distinct !{}
!5 = !{i64 0}

View File

@ -61,6 +61,10 @@ struct Config {
/// Run PGO context sensitive IR instrumentation.
bool RunCSIRInstr = false;
/// Asserts whether we can assume whole program visibility during the LTO
/// link.
bool HasWholeProgramVisibility = false;
/// If this field is set, the set of passes run in the middle-end optimizer
/// will be the one specified by the string. Only works with the new pass
/// manager as the old one doesn't have this ability.

View File

@ -236,12 +236,15 @@ enum class PassSummaryAction {
/// The behavior depends on the summary arguments:
/// - If ExportSummary is non-null, this pass will export type identifiers to
/// the given summary.
/// - Otherwise, if ImportSummary is non-null, this pass will import type
/// identifiers from the given summary.
/// - Otherwise it does neither.
/// It is invalid for both ExportSummary and ImportSummary to be non-null.
/// - If ImportSummary is non-null, this pass will import type identifiers from
/// the given summary.
/// - Otherwise, if both are null and DropTypeTests is true, all type test
/// assume sequences will be removed from the IR.
/// It is invalid for both ExportSummary and ImportSummary to be non-null
/// unless DropTypeTests is true.
ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary);
const ModuleSummaryIndex *ImportSummary,
bool DropTypeTests = false);
/// This pass export CFI checks for use by external modules.
ModulePass *createCrossDSOCFIPass();

View File

@ -201,9 +201,12 @@ class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
public:
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
bool DropTypeTests;
LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
: ExportSummary(ExportSummary), ImportSummary(ImportSummary) {}
const ModuleSummaryIndex *ImportSummary,
bool DropTypeTests = false)
: ExportSummary(ExportSummary), ImportSummary(ImportSummary),
DropTypeTests(DropTypeTests) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};

View File

@ -236,6 +236,11 @@ struct VTableSlotSummary {
uint64_t ByteOffset;
};
void updateVCallVisibilityInModule(Module &M,
bool WholeProgramVisibilityEnabledInLTO);
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index,
bool WholeProgramVisibilityEnabledInLTO);
/// Perform index-based whole program devirtualization on the \p Summary
/// index. Any devirtualized targets used by a type test in another module
/// are added to the \p ExportedGUIDs set. For any local devirtualized targets

View File

@ -982,6 +982,11 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
}
}
// If allowed, upgrade public vcall visibility metadata to linkage unit
// visibility before whole program devirtualization in the optimizer.
updateVCallVisibilityInModule(*RegularLTO.CombinedModule,
Conf.HasWholeProgramVisibility);
if (Conf.PreOptModuleHook &&
!Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
return Error::success();
@ -1299,6 +1304,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
std::set<GlobalValue::GUID> ExportedGUIDs;
// If allowed, upgrade public vcall visibility to linkage unit visibility in
// the summaries before whole program devirtualization below.
updateVCallVisibilityInIndex(ThinLTO.CombinedIndex,
Conf.HasWholeProgramVisibility);
// Perform index-based WPD. This will return immediately if there are
// no index entries in the typeIdMetadata map (e.g. if we are instead
// performing IR-based WPD in hybrid regular/thin LTO mode).

View File

@ -57,6 +57,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <system_error>
@ -542,6 +543,13 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
}
StatsFile = std::move(StatsFileOrErr.get());
// Currently there is no support for enabling whole program visibility via a
// linker option in the old LTO API, but this call allows it to be specified
// via the internal option. Must be done before WPD invoked via the optimizer
// pipeline run below.
updateVCallVisibilityInModule(*MergedModule,
/* WholeProgramVisibilityEnabledInLTO */ false);
// We always run the verifier once on the merged module, the `DisableVerify`
// parameter only applies to subsequent verify.
verifyMergedModuleOnce();

View File

@ -969,6 +969,12 @@ void ThinLTOCodeGenerator::run() {
// Synthesize entry counts for functions in the combined index.
computeSyntheticCounts(*Index);
// Currently there is no support for enabling whole program visibility via a
// linker option in the old LTO API, but this call allows it to be specified
// via the internal option. Must be done before WPD below.
updateVCallVisibilityInIndex(*Index,
/* WholeProgramVisibilityEnabledInLTO */ false);
// Perform index-based WPD. This will return immediately if there are
// no index entries in the typeIdMetadata map (e.g. if we are instead
// performing IR-based WPD in hybrid regular/thin LTO mode).

View File

@ -382,6 +382,9 @@ class LowerTypeTestsModule {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
// Set when the client has invoked this to simply drop all type test assume
// sequences.
bool DropTypeTests;
Triple::ArchType Arch;
Triple::OSType OS;
@ -500,7 +503,8 @@ class LowerTypeTestsModule {
public:
LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary);
const ModuleSummaryIndex *ImportSummary,
bool DropTypeTests);
bool lower();
@ -516,22 +520,24 @@ struct LowerTypeTests : public ModulePass {
ModuleSummaryIndex *ExportSummary;
const ModuleSummaryIndex *ImportSummary;
bool DropTypeTests;
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
LowerTypeTests(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
: ModulePass(ID), ExportSummary(ExportSummary),
ImportSummary(ImportSummary) {
ImportSummary(ImportSummary), DropTypeTests(DropTypeTests) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (UseCommandLine)
return LowerTypeTestsModule::runForTesting(M);
return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
.lower();
}
};
@ -544,8 +550,9 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
ModulePass *
llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
return new LowerTypeTests(ExportSummary, ImportSummary);
const ModuleSummaryIndex *ImportSummary,
bool DropTypeTests) {
return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests);
}
/// Build a bit set for TypeId using the object layouts in
@ -1655,8 +1662,9 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(
Module &M, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary)
: M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
: M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
DropTypeTests(DropTypeTests) {
assert(!(ExportSummary && ImportSummary));
Triple TargetTriple(M.getTargetTriple());
Arch = TargetTriple.getArch();
@ -1683,7 +1691,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool Changed =
LowerTypeTestsModule(
M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
/*DropTypeTests*/ false)
.lower();
if (!ClWriteSummary.empty()) {
@ -1750,6 +1759,33 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
}
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
if (DropTypeTests && TypeTestFunc) {
for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
UI != UE;) {
auto *CI = cast<CallInst>((*UI++).getUser());
// Find and erase llvm.assume intrinsics for this llvm.type.test call.
for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;) {
if (auto *AssumeCI = dyn_cast<CallInst>((*CIU++).getUser())) {
Function *F = AssumeCI->getCalledFunction();
if (F && F->getIntrinsicID() == Intrinsic::assume)
AssumeCI->eraseFromParent();
}
}
CI->eraseFromParent();
}
// We have deleted the type intrinsics, so we no longer have enough
// information to reason about the liveness of virtual function pointers
// in GlobalDCE.
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
return true;
}
// If only some of the modules were split, we cannot correctly perform
// this transformation. We already checked for the presense of type tests
// with partially split modules during the thin link, and would have emitted
@ -1758,8 +1794,6 @@ bool LowerTypeTestsModule::lower() {
(ImportSummary && ImportSummary->partiallySplitLTOUnits()))
return false;
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *ICallBranchFunnelFunc =
M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel));
if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
@ -2196,7 +2230,9 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
bool Changed =
LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
.lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();

View File

@ -134,6 +134,22 @@ static cl::opt<bool>
cl::init(false), cl::ZeroOrMore,
cl::desc("Print index-based devirtualization messages"));
/// Provide a way to force enable whole program visibility in tests.
/// This is needed to support legacy tests that don't contain
/// !vcall_visibility metadata (the mere presense of type tests
/// previously implied hidden visibility).
cl::opt<bool>
WholeProgramVisibility("whole-program-visibility", cl::init(false),
cl::Hidden, cl::ZeroOrMore,
cl::desc("Enable whole program visibility"));
/// Provide a way to force disable whole program for debugging or workarounds,
/// when enabled via the linker.
cl::opt<bool> DisableWholeProgramVisibility(
"disable-whole-program-visibility", cl::init(false), cl::Hidden,
cl::ZeroOrMore,
cl::desc("Disable whole program visibility (overrides enabling options)"));
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@ -702,7 +718,49 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return PreservedAnalyses::none();
}
// Enable whole program visibility if enabled by client (e.g. linker) or
// internal option, and not force disabled.
static bool hasWholeProgramVisibility(bool WholeProgramVisibilityEnabledInLTO) {
return (WholeProgramVisibilityEnabledInLTO || WholeProgramVisibility) &&
!DisableWholeProgramVisibility;
}
namespace llvm {
/// If whole program visibility asserted, then upgrade all public vcall
/// visibility metadata on vtable definitions to linkage unit visibility in
/// Module IR (for regular or hybrid LTO).
void updateVCallVisibilityInModule(Module &M,
bool WholeProgramVisibilityEnabledInLTO) {
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
for (GlobalVariable &GV : M.globals())
// Add linkage unit visibility to any variable with type metadata, which are
// the vtable definitions. We won't have an existing vcall_visibility
// metadata on vtable definitions with public visibility.
if (GV.hasMetadata(LLVMContext::MD_type) &&
GV.getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
GV.setVCallVisibilityMetadata(GlobalObject::VCallVisibilityLinkageUnit);
}
/// If whole program visibility asserted, then upgrade all public vcall
/// visibility metadata on vtable definition summaries to linkage unit
/// visibility in Module summary index (for ThinLTO).
void updateVCallVisibilityInIndex(ModuleSummaryIndex &Index,
bool WholeProgramVisibilityEnabledInLTO) {
if (!hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
return;
for (auto &P : Index) {
for (auto &S : P.second.SummaryList) {
auto *GVar = dyn_cast<GlobalVarSummary>(S.get());
if (!GVar || GVar->vTableFuncs().empty() ||
GVar->getVCallVisibility() != GlobalObject::VCallVisibilityPublic)
continue;
GVar->setVCallVisibility(GlobalObject::VCallVisibilityLinkageUnit);
}
}
}
void runWholeProgramDevirtOnIndex(
ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
@ -818,6 +876,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (!TM.Bits->GV->isConstant())
return false;
// We cannot perform whole program devirtualization analysis on a vtable
// with public LTO visibility.
if (TM.Bits->GV->getVCallVisibility() ==
GlobalObject::VCallVisibilityPublic)
return false;
Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
TM.Offset + ByteOffset, M);
if (!Ptr)
@ -863,8 +927,13 @@ bool DevirtIndex::tryFindVirtualCallTargets(
return false;
LocalFound = true;
}
if (!GlobalValue::isAvailableExternallyLinkage(S->linkage()))
if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) {
VS = cast<GlobalVarSummary>(S->getBaseObject());
// We cannot perform whole program devirtualization analysis on a vtable
// with public LTO visibility.
if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic)
return false;
}
}
if (!VS->isLive())
continue;
@ -1808,6 +1877,12 @@ bool DevirtModule::run() {
removeRedundantTypeTests();
// We have lowered or deleted the type instrinsics, so we will no
// longer have enough information to reason about the liveness of virtual
// function pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
// The rest of the code is only necessary when exporting or during regular
// LTO, so we are done.
return true;
@ -1931,7 +2006,7 @@ bool DevirtModule::run() {
for (VTableBits &B : Bits)
rebuildGlobal(B);
// We have lowered or deleted the type checked load intrinsics, so we no
// We have lowered or deleted the type instrinsics, so we will no
// longer have enough information to reason about the liveness of virtual
// function pointers in GlobalDCE.
for (GlobalVariable &GV : M.globals())

View File

@ -9,17 +9,17 @@
; where both t and t-import are sensitive to typeid1's resolution
; so 4 distinct objects in total.
; RUN: rm -rf %t.cache
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t1.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -r=%t1.bc,vt1,plx
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t1.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f1_actual,plx -r=%t.bc,f2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -r=%t1.bc,vt1,plx -whole-program-visibility
; RUN: ls %t.cache | count 4
; Three resolutions for typeid2: Indir, SingleImpl, UniqueRetVal
; where both t and t-import are sensitive to typeid2's resolution
; so 6 distinct objects in total.
; RUN: rm -rf %t.cache
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t2.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t2.bc,vt2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t3.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t3.bc,vt2a,plx -r=%t3.bc,vt2b,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t2.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t2.bc,vt2,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility
; RUN: llvm-lto2 run -o %t.o %t.bc %t-import.bc %t3.bc -cache-dir %t.cache -r=%t.bc,f1,plx -r=%t.bc,f2,plx -r=%t.bc,f1_actual,plx -r=%t3.bc,vt2a,plx -r=%t3.bc,vt2b,plx -r=%t-import.bc,importf1,plx -r=%t-import.bc,f1,lx -r=%t-import.bc,importf2,plx -r=%t-import.bc,f2,lx -whole-program-visibility
; RUN: ls %t.cache | count 6
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -6,6 +6,7 @@
; Legacy PM
; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZN1A1nEi,p \
@ -23,6 +24,7 @@
; New PM
; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZN1A1nEi,p \
@ -46,6 +48,7 @@
; to ensure it is being caught in the thin link.
; RUN: opt -thinlto-bc -o %t2.o %S/Inputs/empty.ll
; RUN: not llvm-lto2 run %t.o %t2.o -thinlto-distributed-indexes \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZN1A1nEi,p \

View File

@ -46,6 +46,7 @@
; Legacy PM
; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t.o,_Z3bazP1A,px \
; RUN: -r=%t.o,_ZN1A3fooEv, \
@ -64,6 +65,7 @@
; New PM
; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t.o,_Z3bazP1A,px \
; RUN: -r=%t.o,_ZN1A3fooEv, \

View File

@ -35,6 +35,7 @@
; Legacy PM, Index based WPD
; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t2.o,test,px \
; RUN: -r=%t2.o,_ZN1A1nEi,p \
@ -48,6 +49,7 @@
; New PM, Index based WPD
; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t2.o,test,px \
; RUN: -r=%t2.o,_ZN1A1nEi,p \
@ -62,6 +64,7 @@
; Legacy PM
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
@ -84,6 +87,7 @@
; New PM
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \

View File

@ -36,6 +36,7 @@
; Legacy PM, Index based WPD
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,test,px \
@ -59,6 +60,7 @@
; New PM, Index based WPD
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,test,px \
@ -92,6 +94,7 @@
; Index based WPD, distributed backends
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \
; RUN: -whole-program-visibility \
; RUN: -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,test,px \
@ -115,6 +118,7 @@
; Legacy PM
; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t5 \
; RUN: -r=%t1.o,test,px \
; RUN: -r=%t1.o,_ZTV1B, \
@ -150,6 +154,7 @@
; New PM
; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t5 \
; RUN: -r=%t1.o,test,px \
; RUN: -r=%t1.o,_ZTV1B, \

View File

@ -7,6 +7,7 @@
; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_alias.ll
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,test,px \

View File

@ -18,6 +18,7 @@
; EXTERNAL: gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc:
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,test,px \

View File

@ -8,6 +8,7 @@
; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_external_comdat_same_guid.ll
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,use_B,px \

View File

@ -10,6 +10,7 @@
; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll
; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -wholeprogramdevirt-print-index-based \
; RUN: -o %t5 \
; RUN: -r=%t3.o,test,px \

View File

@ -10,6 +10,7 @@
; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll
; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \
; RUN: -whole-program-visibility \
; RUN: --pass-remarks=. \
; RUN: --exported-symbol=test \
; RUN: --exported-symbol=test2 \

View File

@ -5,6 +5,7 @@
; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_foo.ll -o %t-foo.bc
; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_bar.ll -o %t-bar.bc
; RUN: llvm-lto2 run -save-temps %t-main.bc %t-foo.bc %t-bar.bc -pass-remarks=. -o %t \
; RUN: -whole-program-visibility \
; RUN: -r=%t-foo.bc,_Z3fooP1A,pl \
; RUN: -r=%t-main.bc,main,plx \
; RUN: -r=%t-main.bc,_Z3barv,l \

View File

@ -0,0 +1,143 @@
; REQUIRES: x86-registered-target
; Test devirtualization through the thin link and backend, when vtables
; have vcall_visibility metadata with public visibility.
; Index based WPD
; Generate unsplit module with summary for ThinLTO index-based WPD.
; RUN: opt -thinlto-bc -o %t2.o %s
; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -o %t3 \
; RUN: -r=%t2.o,test,px \
; RUN: -r=%t2.o,_ZN1A1nEi,p \
; RUN: -r=%t2.o,_ZN1B1fEi,p \
; RUN: -r=%t2.o,_ZN1C1fEi,p \
; RUN: -r=%t2.o,_ZN1D1mEi,p \
; RUN: -r=%t2.o,_ZTV1B,px \
; RUN: -r=%t2.o,_ZTV1C,px \
; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Hybrid WPD
; Generate split module with summary for hybrid Thin/Regular LTO WPD.
; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZN1A1nEi,p \
; RUN: -r=%t.o,_ZN1B1fEi,p \
; RUN: -r=%t.o,_ZN1C1fEi,p \
; RUN: -r=%t.o,_ZN1D1mEi,p \
; RUN: -r=%t.o,_ZTV1B, \
; RUN: -r=%t.o,_ZTV1C, \
; RUN: -r=%t.o,_ZTV1D, \
; RUN: -r=%t.o,_ZN1A1nEi, \
; RUN: -r=%t.o,_ZN1B1fEi, \
; RUN: -r=%t.o,_ZN1C1fEi, \
; RUN: -r=%t.o,_ZN1D1mEi, \
; RUN: -r=%t.o,_ZTV1B,px \
; RUN: -r=%t.o,_ZTV1C,px \
; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Regular LTO WPD
; RUN: opt -o %t4.o %s
; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t5 \
; RUN: -r=%t4.o,test,px \
; RUN: -r=%t4.o,_ZN1A1nEi,p \
; RUN: -r=%t4.o,_ZN1B1fEi,p \
; RUN: -r=%t4.o,_ZN1C1fEi,p \
; RUN: -r=%t4.o,_ZN1D1mEi,p \
; RUN: -r=%t4.o,_ZTV1B,px \
; RUN: -r=%t4.o,_ZTV1C,px \
; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi
; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
%struct.A = type { i32 (...)** }
%struct.B = type { %struct.A }
%struct.C = type { %struct.A }
%struct.D = type { i32 (...)** }
@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5
@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5
@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5
; CHECK-IR-LABEL: define i32 @test
define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) {
entry:
%0 = bitcast %struct.A* %obj to i8***
%vtable = load i8**, i8*** %0
%1 = bitcast i8** %vtable to i8*
%p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr i8*, i8** %vtable, i32 1
%2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
%fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call = tail call i32 @_ZN1A1nEi
%call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
%3 = bitcast i8** %vtable to i32 (%struct.A*, i32)**
%fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8
; We still have to call it as virtual.
; CHECK-IR: %call3 = tail call i32 %fptr22
%call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call)
%4 = bitcast %struct.D* %obj2 to i8***
%vtable2 = load i8**, i8*** %4
%5 = bitcast i8** %vtable2 to i8*
%p2 = call i1 @llvm.type.test(i8* %5, metadata !4)
call void @llvm.assume(i1 %p2)
%6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
%fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi
%call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3)
ret i32 %call4
}
; CHECK-IR-LABEL: ret i32
; CHECK-IR-LABEL: }
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
ret i32 0;
}
; Make sure we don't inline or otherwise optimize out the direct calls.
attributes #0 = { noinline optnone }
!0 = !{i64 16, !"_ZTS1A"}
!1 = !{i64 16, !"_ZTS1B"}
!2 = !{i64 16, !"_ZTS1C"}
!3 = !{i64 16, !4}
!4 = distinct !{}
!5 = !{i64 1}

View File

@ -0,0 +1,215 @@
; REQUIRES: x86-registered-target
; Test devirtualization through the thin link and backend, when vtables
; have vcall_visibility metadata with public visibility.
; Index based WPD
; Generate unsplit module with summary for ThinLTO index-based WPD.
; RUN: opt -thinlto-bc -o %t2.o %s
; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t2.o,test,px \
; RUN: -r=%t2.o,_ZN1A1nEi,p \
; RUN: -r=%t2.o,_ZN1B1fEi,p \
; RUN: -r=%t2.o,_ZN1C1fEi,p \
; RUN: -r=%t2.o,_ZN1D1mEi,p \
; RUN: -r=%t2.o,_ZTV1B,px \
; RUN: -r=%t2.o,_ZTV1C,px \
; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Hybrid WPD
; Generate split module with summary for hybrid Thin/Regular LTO WPD.
; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZN1A1nEi,p \
; RUN: -r=%t.o,_ZN1B1fEi,p \
; RUN: -r=%t.o,_ZN1C1fEi,p \
; RUN: -r=%t.o,_ZN1D1mEi,p \
; RUN: -r=%t.o,_ZTV1B, \
; RUN: -r=%t.o,_ZTV1C, \
; RUN: -r=%t.o,_ZTV1D, \
; RUN: -r=%t.o,_ZN1A1nEi, \
; RUN: -r=%t.o,_ZN1B1fEi, \
; RUN: -r=%t.o,_ZN1C1fEi, \
; RUN: -r=%t.o,_ZN1D1mEi, \
; RUN: -r=%t.o,_ZTV1B,px \
; RUN: -r=%t.o,_ZTV1C,px \
; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Regular LTO WPD
; RUN: opt -o %t4.o %s
; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -o %t5 \
; RUN: -r=%t4.o,test,px \
; RUN: -r=%t4.o,_ZN1A1nEi,p \
; RUN: -r=%t4.o,_ZN1B1fEi,p \
; RUN: -r=%t4.o,_ZN1C1fEi,p \
; RUN: -r=%t4.o,_ZN1D1mEi,p \
; RUN: -r=%t4.o,_ZTV1B,px \
; RUN: -r=%t4.o,_ZTV1C,px \
; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi
; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
; Try everything again but without -whole-program-visibility to confirm
; WPD fails
; Index based WPD
; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -o %t3 \
; RUN: -r=%t2.o,test,px \
; RUN: -r=%t2.o,_ZN1A1nEi,p \
; RUN: -r=%t2.o,_ZN1B1fEi,p \
; RUN: -r=%t2.o,_ZN1C1fEi,p \
; RUN: -r=%t2.o,_ZN1D1mEi,p \
; RUN: -r=%t2.o,_ZTV1B,px \
; RUN: -r=%t2.o,_ZTV1C,px \
; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Hybrid WPD
; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -verify-machineinstrs=0 \
; RUN: -o %t3 \
; RUN: -r=%t.o,test,px \
; RUN: -r=%t.o,_ZN1A1nEi,p \
; RUN: -r=%t.o,_ZN1B1fEi,p \
; RUN: -r=%t.o,_ZN1C1fEi,p \
; RUN: -r=%t.o,_ZN1D1mEi,p \
; RUN: -r=%t.o,_ZTV1B, \
; RUN: -r=%t.o,_ZTV1C, \
; RUN: -r=%t.o,_ZTV1D, \
; RUN: -r=%t.o,_ZN1A1nEi, \
; RUN: -r=%t.o,_ZN1B1fEi, \
; RUN: -r=%t.o,_ZN1C1fEi, \
; RUN: -r=%t.o,_ZN1D1mEi, \
; RUN: -r=%t.o,_ZTV1B,px \
; RUN: -r=%t.o,_ZTV1C,px \
; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Regular LTO WPD
; RUN: llvm-lto2 run %t4.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -o %t5 \
; RUN: -r=%t4.o,test,px \
; RUN: -r=%t4.o,_ZN1A1nEi,p \
; RUN: -r=%t4.o,_ZN1B1fEi,p \
; RUN: -r=%t4.o,_ZN1C1fEi,p \
; RUN: -r=%t4.o,_ZN1D1mEi,p \
; RUN: -r=%t4.o,_ZTV1B,px \
; RUN: -r=%t4.o,_ZTV1C,px \
; RUN: -r=%t4.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Try index-based WPD again with both -whole-program-visibility and
; -disable-whole-program-visibility to confirm the latter overrides
; the former and that WPD fails.
; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
; RUN: -whole-program-visibility \
; RUN: -disable-whole-program-visibility \
; RUN: -o %t3 \
; RUN: -r=%t2.o,test,px \
; RUN: -r=%t2.o,_ZN1A1nEi,p \
; RUN: -r=%t2.o,_ZN1B1fEi,p \
; RUN: -r=%t2.o,_ZN1C1fEi,p \
; RUN: -r=%t2.o,_ZN1D1mEi,p \
; RUN: -r=%t2.o,_ZTV1B,px \
; RUN: -r=%t2.o,_ZTV1C,px \
; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
%struct.A = type { i32 (...)** }
%struct.B = type { %struct.A }
%struct.C = type { %struct.A }
%struct.D = type { i32 (...)** }
@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5
@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5
@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5
; CHECK-IR-LABEL: define i32 @test
define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) {
entry:
%0 = bitcast %struct.A* %obj to i8***
%vtable = load i8**, i8*** %0
%1 = bitcast i8** %vtable to i8*
%p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr i8*, i8** %vtable, i32 1
%2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
%fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call = tail call i32 @_ZN1A1nEi
; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1
%call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
%3 = bitcast i8** %vtable to i32 (%struct.A*, i32)**
%fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8
; We still have to call it as virtual.
; CHECK-IR: %call3 = tail call i32 %fptr22
; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22
%call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call)
%4 = bitcast %struct.D* %obj2 to i8***
%vtable2 = load i8**, i8*** %4
%5 = bitcast i8** %vtable2 to i8*
%p2 = call i1 @llvm.type.test(i8* %5, metadata !4)
call void @llvm.assume(i1 %p2)
%6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
%fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi
; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33
%call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3)
ret i32 %call4
}
; CHECK-IR-LABEL: ret i32
; CHECK-IR-LABEL: }
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
ret i32 0;
}
; Make sure we don't inline or otherwise optimize out the direct calls.
attributes #0 = { noinline optnone }
!0 = !{i64 16, !"_ZTS1A"}
!1 = !{i64 16, !"_ZTS1B"}
!2 = !{i64 16, !"_ZTS1C"}
!3 = !{i64 16, !4}
!4 = distinct !{}
!5 = !{i64 0}

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,8 +1,8 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=1 -S -o - %s | not grep @llvm.icall.branch.funnel | count 0
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=10 -S -o - %s | grep @llvm.icall.branch.funnel | count 4
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -wholeprogramdevirt-branch-funnel-threshold=100 -S -o - %s | grep @llvm.icall.branch.funnel | count 5
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,9 +1,9 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck --check-prefixes=CHECK,RETP %s
; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt | FileCheck --check-prefixes=CHECK,NORETP %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck --check-prefixes=CHECK,RETP %s
; RUN: sed -e 's,+retpoline,-retpoline,g' %s | opt -S -wholeprogramdevirt -whole-program-visibility | FileCheck --check-prefixes=CHECK,NORETP %s
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,RETP %s
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -O3 -S -o - %s | FileCheck --check-prefixes=CHECK %s
; RUN: FileCheck --check-prefix=SUMMARY %s < %t

View File

@ -1,5 +1,5 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
; Test that we correctly expand the llvm.type.checked.load intrinsic in cases
; where we cannot devirtualize.

View File

@ -1,4 +1,4 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-write-summary=%t -o /dev/null %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-write-summary=%t -o /dev/null %s
; RUN: FileCheck %s < %t
; CHECK: ---

View File

@ -1,4 +1,4 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: FileCheck --check-prefix=SUMMARY %s < %t
; SUMMARY: TypeIdMap:

View File

@ -1,4 +1,4 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: FileCheck --check-prefix=SUMMARY %s < %t
; SUMMARY-NOT: TypeTests:

View File

@ -1,4 +1,4 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: FileCheck --check-prefix=SUMMARY %s < %t
; SUMMARY-NOT: TypeTests:

View File

@ -1,4 +1,4 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s
; RUN: opt -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s
; RUN: FileCheck %s < %t
; CHECK: TypeTests: [ 15427464259790519041, 17525413373118030901 ]

View File

@ -1,7 +1,7 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,X86 %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,X86 %s
; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-X86 %s < %t
; RUN: opt -mtriple=armv7-unknown-linux-gnu -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,ARM %s
; RUN: opt -mtriple=armv7-unknown-linux-gnu -wholeprogramdevirt -whole-program-visibility -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck --check-prefixes=CHECK,ARM %s
; RUN: FileCheck --check-prefixes=SUMMARY,SUMMARY-ARM %s < %t
target datalayout = "e-p:64:64"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
; CHECK-NOT: devirtualized call

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,5 +1,5 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
; RUN: opt -S -passes=wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
; Test that we correctly handle function type mismatches in argument counts
; and bitwidths. We handle an argument count mismatch by refusing

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s | FileCheck %s
target datalayout = "e-p:64:64"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -1,5 +1,5 @@
; Check that we don't crash when processing declaration with type metadata
; RUN: opt -S -wholeprogramdevirt %s
; RUN: opt -S -wholeprogramdevirt -whole-program-visibility %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-none-linux-gnu"

View File

@ -0,0 +1,148 @@
; Test that plugin option whole-program-visibility enables devirtualization.
; Index based WPD
; Generate unsplit module with summary for ThinLTO index-based WPD.
; RUN: opt -thinlto-bc -o %t2.o %s
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=whole-program-visibility \
; RUN: --plugin-opt=save-temps \
; RUN: --plugin-opt=-pass-remarks=. \
; RUN: %t2.o -o %t3 \
; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Hybrid WPD
; Generate split module with summary for hybrid Thin/Regular LTO WPD.
; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=whole-program-visibility \
; RUN: --plugin-opt=save-temps \
; RUN: --plugin-opt=-pass-remarks=. \
; RUN: %t.o -o %t3 \
; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; Regular LTO WPD
; RUN: opt -o %t4.o %s
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=whole-program-visibility \
; RUN: --plugin-opt=save-temps \
; RUN: --plugin-opt=-pass-remarks=. \
; RUN: %t4.o -o %t3 \
; RUN: --export-dynamic 2>&1 | FileCheck %s --check-prefix=REMARK
; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi
; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
; Try everything again but without -whole-program-visibility to confirm
; WPD fails
; Index based WPD
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=save-temps \
; RUN: --plugin-opt=-pass-remarks=. \
; RUN: %t2.o -o %t3 \
; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t2.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Hybrid WPD
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=save-temps \
; RUN: --plugin-opt=-pass-remarks=. \
; RUN: %t.o -o %t3 \
; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
; Regular LTO WPD
; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold%shlibext \
; RUN: --plugin-opt=save-temps \
; RUN: --plugin-opt=-pass-remarks=. \
; RUN: %t4.o -o %t3 \
; RUN: --export-dynamic 2>&1 | FileCheck %s --implicit-check-not single-impl --allow-empty
; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-NODEVIRT-IR
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
%struct.A = type { i32 (...)** }
%struct.B = type { %struct.A }
%struct.C = type { %struct.A }
%struct.D = type { i32 (...)** }
@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !5
@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2, !vcall_visibility !5
@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3, !vcall_visibility !5
; CHECK-IR-LABEL: define dso_local i32 @_start
define i32 @_start(%struct.A* %obj, %struct.D* %obj2, i32 %a) {
entry:
%0 = bitcast %struct.A* %obj to i8***
%vtable = load i8**, i8*** %0
%1 = bitcast i8** %vtable to i8*
%p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
call void @llvm.assume(i1 %p)
%fptrptr = getelementptr i8*, i8** %vtable, i32 1
%2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
%fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call = tail call i32 @_ZN1A1nEi
; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1
%call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
%3 = bitcast i8** %vtable to i32 (%struct.A*, i32)**
%fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8
; We still have to call it as virtual.
; CHECK-IR: %call3 = tail call i32 %fptr22
; CHECK-NODEVIRT-IR: %call3 = tail call i32 %fptr22
%call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call)
%4 = bitcast %struct.D* %obj2 to i8***
%vtable2 = load i8**, i8*** %4
%5 = bitcast i8** %vtable2 to i8*
%p2 = call i1 @llvm.type.test(i8* %5, metadata !4)
call void @llvm.assume(i1 %p2)
%6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
%fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8
; Check that the call was devirtualized.
; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi
; CHECK-NODEVIRT-IR: %call4 = tail call i32 %fptr33
%call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3)
ret i32 %call4
}
; CHECK-IR-LABEL: ret i32
; CHECK-IR-LABEL: }
declare i1 @llvm.type.test(i8*, metadata)
declare void @llvm.assume(i1)
define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
ret i32 0;
}
define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
ret i32 0;
}
; Make sure we don't inline or otherwise optimize out the direct calls.
attributes #0 = { noinline optnone }
!0 = !{i64 16, !"_ZTS1A"}
!1 = !{i64 16, !"_ZTS1B"}
!2 = !{i64 16, !"_ZTS1C"}
!3 = !{i64 16, !4}
!4 = distinct !{}
!5 = !{i64 0}

View File

@ -204,6 +204,8 @@ namespace options {
static std::string dwo_dir;
/// Statistics output filename.
static std::string stats_file;
// Asserts that LTO link has whole program visibility
static bool whole_program_visibility = false;
// Optimization remarks filename, accepted passes and hotness options
static std::string RemarksFilename;
@ -283,6 +285,8 @@ namespace options {
new_pass_manager = true;
} else if (opt == "debug-pass-manager") {
debug_pass_manager = true;
} else if (opt == "whole-program-visibility") {
whole_program_visibility = true;
} else if (opt.startswith("dwo_dir=")) {
dwo_dir = opt.substr(strlen("dwo_dir="));
} else if (opt.startswith("opt-remarks-filename=")) {
@ -926,6 +930,8 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite,
// Debug new pass manager if requested
Conf.DebugPassManager = options::debug_pass_manager;
Conf.HasWholeProgramVisibility = options::whole_program_visibility;
Conf.StatsFile = options::stats_file;
return std::make_unique<LTO>(std::move(Conf), Backend,
options::ParallelCodeGenParallelismLevel);

View File

@ -54,6 +54,7 @@
#include "llvm/Transforms/Coroutines.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Debugify.h"
#include <algorithm>
@ -625,6 +626,13 @@ int main(int argc, char **argv) {
return 1;
}
// Enable testing of whole program devirtualization on this module by invoking
// the facility for updating public visibility to linkage unit visibility when
// specified by an internal option. This is normally done during LTO which is
// not performed via opt.
updateVCallVisibilityInModule(*M,
/* WholeProgramVisibilityEnabledInLTO */ false);
// Figure out what stream we are supposed to write to...
std::unique_ptr<ToolOutputFile> Out;
std::unique_ptr<ToolOutputFile> ThinLinkOut;