llvm-project/llvm/test/Transforms/Inline/always-inline.ll

319 lines
7.8 KiB
LLVM
Raw Normal View History

[PM] Port the always inliner to the new pass manager in a much more minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896
2016-08-17 10:56:20 +08:00
; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
;
; Ensure the threshold has no impact on these decisions.
[PM] Port the always inliner to the new pass manager in a much more minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896
2016-08-17 10:56:20 +08:00
; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
;
; The new pass manager doesn't re-use any threshold based infrastructure for
; the always inliner, but test that we get the correct result. The new PM
; always inliner also doesn't support inlining call-site alwaysinline
; annotations. It isn't clear that this is a reasonable use case for
; 'alwaysinline'.
; RUN: opt < %s -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
define internal i32 @inner1() alwaysinline {
; CHECK-NOT: @inner1(
ret i32 1
}
define i32 @outer1() {
; CHECK-LABEL: @outer1(
; CHECK-NOT: call
; CHECK: ret
%r = call i32 @inner1()
ret i32 %r
}
; The always inliner can't DCE arbitrary internal functions. PR2945
define internal i32 @pr2945() nounwind {
; CHECK-LABEL: @pr2945(
ret i32 0
}
define internal void @inner2(i32 %N) alwaysinline {
; CHECK-NOT: @inner2(
%P = alloca i32, i32 %N
ret void
}
define void @outer2(i32 %N) {
; The always inliner (unlike the normal one) should be willing to inline
; a function with a dynamic alloca into one without a dynamic alloca.
; rdar://6655932
;
; CHECK-LABEL: @outer2(
; CHECK-NOT: call void @inner2
; CHECK-NOT: call void @inner2
; CHECK: ret void
call void @inner2( i32 %N )
ret void
}
declare i32 @a() returns_twice
declare i32 @b() returns_twice
; Cannot alwaysinline when that would introduce a returns_twice call.
define internal i32 @inner3() alwaysinline {
; CHECK-LABEL: @inner3(
entry:
%call = call i32 @a() returns_twice
%add = add nsw i32 1, %call
ret i32 %add
}
define i32 @outer3() {
entry:
; CHECK-LABEL: @outer3(
; CHECK-NOT: call i32 @a
; CHECK: ret
%call = call i32 @inner3()
%add = add nsw i32 1, %call
ret i32 %add
}
define internal i32 @inner4() alwaysinline returns_twice {
; CHECK-NOT: @inner4(
entry:
%call = call i32 @b() returns_twice
%add = add nsw i32 1, %call
ret i32 %add
}
define i32 @outer4() {
entry:
; CHECK-LABEL: @outer4(
; CHECK: call i32 @b()
; CHECK: ret
%call = call i32 @inner4() returns_twice
%add = add nsw i32 1, %call
ret i32 %add
}
; We can't inline this even though it has alwaysinline!
define internal i32 @inner5(i8* %addr) alwaysinline {
; CHECK-LABEL: @inner5(
entry:
indirectbr i8* %addr, [ label %one, label %two ]
one:
ret i32 42
two:
ret i32 44
}
define i32 @outer5(i32 %x) {
; CHECK-LABEL: @outer5(
; CHECK: call i32 @inner5
; CHECK: ret
%cmp = icmp slt i32 %x, 42
%addr = select i1 %cmp, i8* blockaddress(@inner5, %one), i8* blockaddress(@inner5, %two)
%call = call i32 @inner5(i8* %addr)
ret i32 %call
}
; We alwaysinline a function that call itself recursively.
define internal void @inner6(i32 %x) alwaysinline {
; CHECK-LABEL: @inner6(
entry:
%icmp = icmp slt i32 %x, 0
br i1 %icmp, label %return, label %bb
bb:
%sub = sub nsw i32 %x, 1
call void @inner6(i32 %sub)
ret void
return:
ret void
}
define void @outer6() {
; CHECK-LABEL: @outer6(
; CHECK: call void @inner6(i32 42)
; CHECK: ret
entry:
call void @inner6(i32 42)
ret void
}
; This is not an alwaysinline function and is actually external.
define i32 @inner7() {
; CHECK-LABEL: @inner7(
ret i32 1
}
define i32 @outer7() {
[PM] Port the always inliner to the new pass manager in a much more minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896
2016-08-17 10:56:20 +08:00
; CHECK-CALL-LABEL: @outer7(
; CHECK-CALL-NOT: call
; CHECK-CALL: ret
%r = call i32 @inner7() alwaysinline
ret i32 %r
}
[PM] Port the always inliner to the new pass manager in a much more minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896
2016-08-17 10:56:20 +08:00
define internal float* @inner8(float* nocapture align 128 %a) alwaysinline {
; CHECK-NOT: @inner8(
[PM] Port the always inliner to the new pass manager in a much more minimal and boring form than the old pass manager's version. This pass does the very minimal amount of work necessary to inline functions declared as always-inline. It doesn't support a wide array of things that the legacy pass manager did support, but is alse ... about 20 lines of code. So it has that going for it. Notably things this doesn't support: - Array alloca merging - To support the above, bottom-up inlining with careful history tracking and call graph updates - DCE of the functions that become dead after this inlining. - Inlining through call instructions with the always_inline attribute. Instead, it focuses on inlining functions with that attribute. The first I've omitted because I'm hoping to just turn it off for the primary pass manager. If that doesn't pan out, I can add it here but it will be reasonably expensive to do so. The second should really be handled by running global-dce after the inliner. I don't want to re-implement the non-trivial logic necessary to do comdat-correct DCE of functions. This means the -O0 pipeline will have to be at least 'always-inline,global-dce', but that seems reasonable to me. If others are seriously worried about this I'd like to hear about it and understand why. Again, this is all solveable by factoring that logic into a utility and calling it here, but I'd like to wait to do that until there is a clear reason why the existing pass-based factoring won't work. The final point is a serious one. I can fairly easily add support for this, but it seems both costly and a confusing construct for the use case of the always inliner running at -O0. This attribute can of course still impact the normal inliner easily (although I find that a questionable re-use of the same attribute). I've started a discussion to sort out what semantics we want here and based on that can figure out if it makes sense ta have this complexity at O0 or not. One other advantage of this design is that it should be quite a bit faster due to checking for whether the function is a viable candidate for inlining exactly once per function instead of doing it for each call site. Anyways, hopefully a reasonable starting point for this pass. Differential Revision: https://reviews.llvm.org/D23299 llvm-svn: 278896
2016-08-17 10:56:20 +08:00
ret float* %a
}
define float @outer8(float* nocapture %a) {
; CHECK-LABEL: @outer8(
; CHECK-NOT: call float* @inner8
; CHECK: ret
%inner_a = call float* @inner8(float* %a)
%f = load float, float* %inner_a, align 4
ret float %f
}
; The 'inner9*' and 'outer9' functions are designed to check that we remove
; a function that is inlined by the always inliner even when it is used by
; a complex constant expression prior to being inlined.
; The 'a' function gets used in a complex constant expression that, despite
; being constant folded, means it isn't dead. As a consequence it shouldn't be
; deleted. If it is, then the constant expression needs to become more complex
; to accurately test this scenario.
define internal void @inner9a(i1 %b) alwaysinline {
; CHECK-LABEL: @inner9a(
entry:
ret void
}
define internal void @inner9b(i1 %b) alwaysinline {
; CHECK-NOT: @inner9b(
entry:
ret void
}
declare void @dummy9(i1 %b)
define void @outer9() {
; CHECK-LABEL: @outer9(
entry:
; First we use @inner9a in a complex constant expression that may get folded
; but won't get removed, and then we call it which will get inlined. Despite
; this the function can't be deleted because of the constant expression
; usage.
%sink = alloca i1
store volatile i1 icmp eq (i64 ptrtoint (void (i1)* @inner9a to i64), i64 ptrtoint(void (i1)* @dummy9 to i64)), i1* %sink
; CHECK: store volatile
call void @inner9a(i1 false)
; CHECK-NOT: call void @inner9a
; Next we call @inner9b passing in a constant expression. This constant
; expression will in fact be removed by inlining, so we should also be able
; to delete the function.
call void @inner9b(i1 icmp eq (i64 ptrtoint (void (i1)* @inner9b to i64), i64 ptrtoint(void (i1)* @dummy9 to i64)))
; CHECK-NOT: @inner9b
ret void
; CHECK: ret void
}
; The 'inner10' and 'outer10' functions test a frustrating consquence of the
; current 'alwaysinline' semantic model. Because such functions are allowed to
; be external functions, it may be necessary to both inline all of their uses
; and leave them in the final output. These tests can be removed if and when
; we restrict alwaysinline further.
define void @inner10() alwaysinline {
; CHECK-LABEL: @inner10(
entry:
ret void
}
define void @outer10() {
; CHECK-LABEL: @outer10(
entry:
call void @inner10()
; CHECK-NOT: call void @inner10
ret void
; CHECK: ret void
}
; The 'inner11' and 'outer11' functions test another dimension of non-internal
; functions with alwaysinline. These functions use external linkages that we can
; actually remove safely and so we should.
define linkonce void @inner11a() alwaysinline {
; CHECK-NOT: @inner11a(
entry:
ret void
}
define available_externally void @inner11b() alwaysinline {
; CHECK-NOT: @inner11b(
entry:
ret void
}
define void @outer11() {
; CHECK-LABEL: @outer11(
entry:
call void @inner11a()
call void @inner11b()
; CHECK-NOT: call void @inner11a
; CHECK-NOT: call void @inner11b
ret void
; CHECK: ret void
}
; The 'inner12' and 'outer12' functions test that we don't remove functions
; which are part of a comdat group even if they otherwise seem dead.
$comdat12 = comdat any
define linkonce void @inner12() alwaysinline comdat($comdat12) {
; CHECK-LABEL: @inner12(
ret void
}
define void @outer12() comdat($comdat12) {
; CHECK-LABEL: @outer12(
entry:
call void @inner12()
; CHECK-NOT: call void @inner12
ret void
; CHECK: ret void
}
; The 'inner13*' and 'outer13' functions test that we do remove functions
; which are part of a comdat group where all of the members are removed during
; always inlining.
$comdat13 = comdat any
define linkonce void @inner13a() alwaysinline comdat($comdat13) {
; CHECK-NOT: @inner13a(
ret void
}
define linkonce void @inner13b() alwaysinline comdat($comdat13) {
; CHECK-NOT: @inner13b(
ret void
}
define void @outer13() {
; CHECK-LABEL: @outer13(
entry:
call void @inner13a()
call void @inner13b()
; CHECK-NOT: call void @inner13a
; CHECK-NOT: call void @inner13b
ret void
; CHECK: ret void
}
define void @inner14() readnone nounwind {
; CHECK: define void @inner14
ret void
}
define void @outer14() {
; CHECK: call void @inner14
call void @inner14()
ret void
}