[INLINER] allow inlining of blockaddresses if sole uses are callbrs

Summary:
It was supposed that Ref LazyCallGraph::Edge's were being inserted by
inlining, but that doesn't seem to be the case.  Instead, it seems that
there was no test for a blockaddress Constant in an instruction that
referenced the function that contained the instruction. Ex:

```
define void @f() {
  %1 = alloca i8*, align 8
2:
  store i8* blockaddress(@f, %2), i8** %1, align 8
  ret void
}
```

When iterating blockaddresses, do not add the function they refer to
back to the worklist if the blockaddress is referring to the contained
function (as opposed to an external function).

Because blockaddress has sligtly different semantics than GNU C's
address of labels, there are 3 cases that can occur with blockaddress,
where only 1 can happen in GNU C due to C's scoping rules:
* blockaddress is within the function it refers to (possible in GNU C).
* blockaddress is within a different function than the one it refers to
(not possible in GNU C).
* blockaddress is used in to declare a global (not possible in GNU C).

The second case is tested in:

```
$ ./llvm/build/unittests/Analysis/AnalysisTests \
  --gtest_filter=LazyCallGraphTest.HandleBlockAddress
```

This patch adjusts the iteration of blockaddresses in
LazyCallGraph::visitReferences to not revisit the blockaddresses
function in the first case.

The Linux kernel contains code that's not semantically valid at -O0;
specifically code passed to asm goto. It requires that asm goto be
inline-able. This patch conservatively does not attempt to handle the
more general case of inlining blockaddresses that have non-callbr users
(pr/39560).

https://bugs.llvm.org/show_bug.cgi?id=39560
https://bugs.llvm.org/show_bug.cgi?id=40722
https://github.com/ClangBuiltLinux/linux/issues/6
https://reviews.llvm.org/rL212077

Reviewers: jyknight, eli.friedman, chandlerc

Reviewed By: chandlerc

Subscribers: george.burgess.iv, nathanchance, mgorny, craig.topper, mengxu.gatech, void, mehdi_amini, E5ten, chandlerc, efriedma, eraman, hiraditya, haicheng, pirama, llvm-commits, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58260

llvm-svn: 361173
This commit is contained in:
Nick Desaulniers 2019-05-20 16:48:09 +00:00
parent 4d05a974b7
commit 639b29b1b5
5 changed files with 199 additions and 15 deletions

View File

@ -38,6 +38,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@ -1082,12 +1083,26 @@ public:
continue;
}
// The blockaddress constant expression is a weird special case, we can't
// generically walk its operands the way we do for all other constants.
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
// The blockaddress constant expression is a weird special case, we
// can't generically walk its operands the way we do for all other
// constants.
if (Visited.insert(BA->getFunction()).second)
Worklist.push_back(BA->getFunction());
// If we've already visited the function referred to by the block
// address, we don't need to revisit it.
if (Visited.count(BA->getFunction()))
continue;
// If all of the blockaddress' users are instructions within the
// referred to function, we don't need to insert a cycle.
if (llvm::all_of(BA->users(), [&](User *U) {
if (Instruction *I = dyn_cast<Instruction>(U))
return I->getFunction() == BA->getFunction();
return false;
}))
continue;
// Otherwise we should go visit the referred to function.
Visited.insert(BA->getFunction());
Worklist.push_back(BA->getFunction());
continue;
}

View File

@ -1830,14 +1830,18 @@ InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
if (BB->empty())
continue;
// Disallow inlining a blockaddress. A blockaddress only has defined
// behavior for an indirect branch in the same function, and we do not
// currently support inlining indirect branches. But, the inliner may not
// see an indirect branch that ends up being dead code at a particular call
// site. If the blockaddress escapes the function, e.g., via a global
// variable, inlining may lead to an invalid cross-function reference.
// Disallow inlining a blockaddress with uses other than strictly callbr.
// A blockaddress only has defined behavior for an indirect branch in the
// same function, and we do not currently support inlining indirect
// branches. But, the inliner may not see an indirect branch that ends up
// being dead code at a particular call site. If the blockaddress escapes
// the function, e.g., via a global variable, inlining may lead to an
// invalid cross-function reference.
// FIXME: pr/39560: continue relaxing this overt restriction.
if (BB->hasAddressTaken())
return "blockaddress";
for (User *U : BlockAddress::get(&*BB)->users())
if (!isa<CallBrInst>(*U))
return "blockaddress used outside of callbr";
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
@ -2081,13 +2085,16 @@ InlineCost llvm::getInlineCost(
InlineResult llvm::isInlineViable(Function &F) {
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain indirect branches or
// blockaddresses.
// Disallow inlining of functions which contain indirect branches.
if (isa<IndirectBrInst>(BI->getTerminator()))
return "contains indirect branches";
// Disallow inlining of blockaddresses which are used by non-callbr
// instructions.
if (BI->hasAddressTaken())
return "uses block address";
for (User *U : BlockAddress::get(&*BI)->users())
if (!isa<CallBrInst>(*U))
return "blockaddress used outside of callbr";
for (auto &II : *BI) {
CallBase *Call = dyn_cast<CallBase>(&II);

View File

@ -49,3 +49,82 @@ bb:
}
@run.bb = global [1 x i8*] zeroinitializer
; Check that a function referenced by a global blockaddress wont be inlined,
; even if it contains a callbr. We might be able to relax this in the future
; as long as the global blockaddress is updated correctly.
@ba = internal global i8* blockaddress(@foo, %7), align 8
define internal i32 @foo(i32) {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, i32* %3, align 4
%4 = load i32, i32* %3, align 4
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@foo, %7), i8* blockaddress(@foo, %6)) #1
to label %5 [label %7, label %6]
; <label>:5: ; preds = %1
store i32 0, i32* %2, align 4
br label %8
; <label>:6: ; preds = %1
store i32 1, i32* %2, align 4
br label %8
; <label>:7: ; preds = %1
store i32 2, i32* %2, align 4
br label %8
; <label>:8: ; preds = %7, %6, %5
%9 = load i32, i32* %2, align 4
ret i32 %9
}
define dso_local i32 @bar() {
%1 = call i32 @foo(i32 0)
ret i32 %1
}
; CHECK: define dso_local i32 @bar() {
; CHECK: %1 = call i32 @foo(i32 0)
; CHECK: ret i32 %1
; CHECK: }
; Triple check that even with a global aggregate whose member is a blockaddress,
; we still don't inline referred to functions.
%struct.foo = type { i8* }
@my_foo = dso_local global %struct.foo { i8* blockaddress(@baz, %7) }
define internal i32 @baz(i32) {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, i32* %3, align 4
%4 = load i32, i32* %3, align 4
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@baz, %7), i8* blockaddress(@baz, %6)) #1
to label %5 [label %7, label %6]
; <label>:5: ; preds = %1
store i32 0, i32* %2, align 4
br label %8
; <label>:6: ; preds = %1
store i32 1, i32* %2, align 4
br label %8
; <label>:7: ; preds = %1
store i32 2, i32* %2, align 4
br label %8
; <label>:8: ; preds = %7, %6, %5
%9 = load i32, i32* %2, align 4
ret i32 %9
}
define dso_local i32 @quux() {
%1 = call i32 @baz(i32 0)
ret i32 %1
}
; CHECK: define dso_local i32 @quux() {
; CHECK: %1 = call i32 @baz(i32 0)
; CHECK: ret i32 %1
; CHECK: }

View File

@ -0,0 +1,54 @@
; RUN: opt -inline -S < %s | FileCheck %s
; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
store i32 0, i32* %1, align 4
%2 = call i32 @t32(i32 0)
ret i32 %2
}
define internal i32 @t32(i32) #0 {
%2 = alloca i32, align 4
%3 = alloca i32, align 4
store i32 %0, i32* %3, align 4
%4 = load i32, i32* %3, align 4
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %4, i8* blockaddress(@t32, %7), i8* blockaddress(@t32, %6)) #1
to label %5 [label %7, label %6]
; <label>:5: ; preds = %1
store i32 0, i32* %2, align 4
br label %8
; <label>:6: ; preds = %1
store i32 1, i32* %2, align 4
br label %8
; <label>:7: ; preds = %1
store i32 2, i32* %2, align 4
br label %8
; <label>:8: ; preds = %7, %6, %5
%9 = load i32, i32* %2, align 4
ret i32 %9
}
; Check that @t32 no longer exists after inlining, as it has now been inlined
; into @main.
; CHECK-NOT: @t32
; CHECK: define dso_local i32 @main
; CHECK: callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %6, i8* blockaddress(@main, %9), i8* blockaddress(@main, %8))
; CHECK: to label %7 [label %9, label %8]
; CHECK: 7:
; CHECK-NEXT: store i32 0, i32* %1, align 4
; CHECK-NEXT: br label %t32.exit
; CHECK: 8:
; CHECK-NEXT: store i32 1, i32* %1, align 4
; CHECK-NEXT: br label %t32.exit
; CHECK: 9:
; CHECK-NEXT: store i32 2, i32* %1, align 4
; CHECK-NEXT: br label %t32.exit
; CHECK: t32.exit:
; CHECK-NEXT: %10 = load i32, i32* %1, align 4
; CHECK: ret i32 %10

View File

@ -1977,6 +1977,35 @@ TEST(LazyCallGraphTest, HandleBlockAddress) {
EXPECT_TRUE(GRC.isParentOf(FRC));
}
// Test that a blockaddress that refers to itself creates no new RefSCC
// connections. https://bugs.llvm.org/show_bug.cgi?id=40722
TEST(LazyCallGraphTest, HandleBlockAddress2) {
LLVMContext Context;
std::unique_ptr<Module> M =
parseAssembly(Context, "define void @f() {\n"
" ret void\n"
"}\n"
"define void @g(i8** %ptr) {\n"
"bb:\n"
" store i8* blockaddress(@g, %bb), i8** %ptr\n"
" ret void\n"
"}\n");
LazyCallGraph CG = buildCG(*M);
CG.buildRefSCCs();
auto I = CG.postorder_ref_scc_begin();
LazyCallGraph::RefSCC &GRC = *I++;
LazyCallGraph::RefSCC &FRC = *I++;
EXPECT_EQ(CG.postorder_ref_scc_end(), I);
LazyCallGraph::Node &F = *CG.lookup(lookupFunction(*M, "f"));
LazyCallGraph::Node &G = *CG.lookup(lookupFunction(*M, "g"));
EXPECT_EQ(&FRC, CG.lookupRefSCC(F));
EXPECT_EQ(&GRC, CG.lookupRefSCC(G));
EXPECT_FALSE(GRC.isParentOf(FRC));
EXPECT_FALSE(FRC.isParentOf(GRC));
}
TEST(LazyCallGraphTest, ReplaceNodeFunction) {
LLVMContext Context;
// A graph with several different kinds of edges pointing at a particular