[ThinLTO] Disallow importing for functions with indir branch to block address

We don't allowing inlining for functions with blockaddress with uses other than strictly callbr. This is because if the blockaddress escapes the function via a global variable, inlining may lead to an invalid cross-function reference.

We check against such cases during inlining, however the check can fail for ThinLTO post-link because CFG simplification can incorrectly removes blocks based on wrong block reachability.

When we import a function with blockaddress taken in a global variable but without importing that variable, we won't go through value mapping to reflect the real address-taken-ness of the cloned blocks. For the imported clone, this leads to blocks reachable from indirect branch through global variable being incorrectly treated as unreachable and removed by SimplifyCFG.

Since inlining for such cases shouldn't be allowed in the first place, I'm marking them as ineligible for importing during pre-link to save the problem of missing address-taken-ness of imported clone as well as bad DCE and inlining.

Differential Revision: https://reviews.llvm.org/D106930
This commit is contained in:
Wenlei He 2021-07-27 18:00:55 -07:00
parent c6ad3f2157
commit 1a8087adaf
3 changed files with 36 additions and 8 deletions

View File

@ -264,7 +264,20 @@ static void computeFunctionSummary(
std::vector<const Instruction *> NonVolatileStores;
bool HasInlineAsmMaybeReferencingInternal = false;
for (const BasicBlock &BB : F)
bool HasIndirBranchToBlockAddress = false;
for (const BasicBlock &BB : F) {
// We don't allow inlining of function with indirect branch to blockaddress.
// If the blockaddress escapes the function, e.g., via a global variable,
// inlining may lead to an invalid cross-function reference. So we shouldn't
// import such function either.
if (BB.hasAddressTaken()) {
for (User *U : BlockAddress::get(const_cast<BasicBlock *>(&BB))->users())
if (!isa<CallBrInst>(*U)) {
HasIndirBranchToBlockAddress = true;
break;
}
}
for (const Instruction &I : BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
@ -386,6 +399,7 @@ static void computeFunctionSummary(
.updateHotness(getHotness(Candidate.Count, PSI));
}
}
}
Index.addBlockCount(F.size());
std::vector<ValueInfo> Refs;
@ -452,8 +466,9 @@ static void computeFunctionSummary(
: CalleeInfo::HotnessType::Critical);
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
bool NotEligibleForImport = NonRenamableLocal ||
HasInlineAsmMaybeReferencingInternal ||
HasIndirBranchToBlockAddress;
GlobalValueSummary::GVFlags Flags(
F.getLinkage(), F.getVisibility(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal(),

View File

@ -1,10 +1,15 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@label_addr = internal constant [1 x i8*] [i8* blockaddress(@foo, %lb)], align 8
@label_addr = internal constant [1 x i8*] [i8* blockaddress(@bar, %lb)], align 8
; Function Attrs: noinline norecurse nounwind optnone uwtable
define dso_local [1 x i8*]* @foo() {
ret [1 x i8*]* @label_addr
}
; Function Attrs: noinline norecurse nounwind optnone uwtable
define dso_local [1 x i8*]* @bar() {
br label %lb
lb:

View File

@ -1,18 +1,26 @@
; RUN: opt -module-summary %s -o %t1.bc
; RUN: opt -module-summary %p/Inputs/globals-import-blockaddr.ll -o %t2.bc
; RUN: llvm-lto2 run -save-temps %t1.bc -r=%t1.bc,foo,l -r=%t1.bc,main,pl %t2.bc -r=%t2.bc,foo,pl -o %t3
; RUN: llvm-lto2 run -save-temps %t1.bc -r=%t1.bc,foo,l -r=%t1.bc,bar,l -r=%t1.bc,main,pl %t2.bc -r=%t2.bc,foo,pl -r=%t2.bc,bar,pl -o %t3
; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s
; Verify that we haven't imported GV containing blockaddress
; CHECK: @label_addr.llvm.0 = external hidden constant
; Verify that bar is not imported since it has address-taken block that is target of indirect branch
; CHECK: declare [1 x i8*]* @bar()
; Verify that foo is imported
; CHECK: available_externally [1 x i8*]* @foo
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare dso_local [1 x i8*]* @foo();
declare dso_local [1 x i8*]* @bar();
define dso_local i32 @main() {
%p = call [1 x i8*]* @foo()
%v = ptrtoint [1 x i8*]* %p to i32
ret i32 %v
%p1 = call [1 x i8*]* @foo()
%p2 = call [1 x i8*]* @bar()
%v1 = ptrtoint [1 x i8*]* %p1 to i32
%v2 = ptrtoint [1 x i8*]* %p2 to i32
%v3 = add i32 %v1, %v2
ret i32 %v3
}