forked from OSchip/llvm-project
[InlineFunction] Handle return attributes on call within inlined body
Consider a callee function that has a call (C) within it which feeds into the return. When we inline that callee into a callsite that has return attributes, we can backward propagate those attributes to the call (C) within that inlined callee body. This is safe to do so only if we can guarantee transfer of execution to successor in the window of instructions between return value (i.e. the call C) and the return instruction. See added test cases. Reviewed-By: reames, jdoerfert Differential Revision: https://reviews.llvm.org/D76140
This commit is contained in:
parent
128c0d037d
commit
28518d9ae3
|
@ -3665,31 +3665,31 @@ void test_integer(void) {
|
|||
// CHECK-ASM: vsumqg
|
||||
|
||||
idx = vec_test_mask(vsc, vuc);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vuc, vuc);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vss, vus);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vus, vus);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vsi, vui);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vui, vui);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vsl, vul);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vul, vul);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vd, vul);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
}
|
||||
|
||||
|
|
|
@ -654,10 +654,10 @@ void test_integer(void) {
|
|||
// CHECK-ASM: vsrlb
|
||||
|
||||
idx = vec_test_mask(vf, vui);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
idx = vec_test_mask(vd, vul);
|
||||
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
|
||||
// CHECK-ASM: vtm
|
||||
|
||||
vuc = vec_msum_u128(vul, vul, vuc, 0);
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
short test_loadbe_i16(const short *P) {
|
||||
// CHECK-LABEL: @test_loadbe_i16
|
||||
// CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
|
||||
// CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
|
||||
// CHECK: call signext i16 @llvm.bswap.i16(i16 [[LOAD]])
|
||||
return _loadbe_i16(P);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
|
|
@ -80,11 +80,21 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
|
|||
cl::Hidden,
|
||||
cl::desc("Convert noalias attributes to metadata during inlining."));
|
||||
|
||||
static cl::opt<bool> UpdateReturnAttributes(
|
||||
"update-return-attrs", cl::init(true), cl::Hidden,
|
||||
cl::desc("Update return attributes on calls within inlined body"));
|
||||
|
||||
static cl::opt<bool>
|
||||
PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
|
||||
cl::init(true), cl::Hidden,
|
||||
cl::desc("Convert align attributes to assumptions during inlining."));
|
||||
|
||||
static cl::opt<unsigned> InlinerAttributeWindow(
|
||||
"inliner-attribute-window", cl::Hidden,
|
||||
cl::desc("the maximum number of instructions analyzed for may throw during "
|
||||
"attribute inference in inlined body"),
|
||||
cl::init(4));
|
||||
|
||||
llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
|
||||
AAResults *CalleeAAR,
|
||||
bool InsertLifetime) {
|
||||
|
@ -1136,6 +1146,81 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
|
|||
}
|
||||
}
|
||||
|
||||
static bool MayContainThrowingOrExitingCall(Instruction *Begin,
|
||||
Instruction *End) {
|
||||
|
||||
assert(Begin->getParent() == End->getParent() &&
|
||||
"Expected to be in same basic block!");
|
||||
unsigned NumInstChecked = 0;
|
||||
// Check that all instructions in the range [Begin, End) are guaranteed to
|
||||
// transfer execution to successor.
|
||||
for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
|
||||
if (NumInstChecked++ > InlinerAttributeWindow ||
|
||||
!isGuaranteedToTransferExecutionToSuccessor(&I))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) {
|
||||
if (!UpdateReturnAttributes)
|
||||
return;
|
||||
AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex);
|
||||
if (AB.empty())
|
||||
return;
|
||||
|
||||
auto *CalledFunction = CS.getCalledFunction();
|
||||
auto &Context = CalledFunction->getContext();
|
||||
|
||||
for (auto &BB : *CalledFunction) {
|
||||
auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
|
||||
if (!RI || !isa<CallBase>(RI->getOperand(0)))
|
||||
continue;
|
||||
// Sanity check that the cloned return instruction exists and is a return
|
||||
// instruction itself.
|
||||
auto *NewRI = dyn_cast_or_null<ReturnInst>(VMap.lookup(RI));
|
||||
if (!NewRI)
|
||||
continue;
|
||||
auto *RetVal = cast<CallBase>(RI->getOperand(0));
|
||||
// Sanity check that the cloned RetVal exists and is a call.
|
||||
// Simplification during inlining could have transformed the cloned
|
||||
// instruction.
|
||||
auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
|
||||
if (!NewRetVal)
|
||||
continue;
|
||||
// Backward propagation of attributes to the returned value may be incorrect
|
||||
// if it is control flow dependent.
|
||||
// Consider:
|
||||
// @callee {
|
||||
// %rv = call @foo()
|
||||
// %rv2 = call @bar()
|
||||
// if (%rv2 != null)
|
||||
// return %rv2
|
||||
// if (%rv == null)
|
||||
// exit()
|
||||
// return %rv
|
||||
// }
|
||||
// caller() {
|
||||
// %val = call nonnull @callee()
|
||||
// }
|
||||
// Here we cannot add the nonnull attribute on either foo or bar. So, we
|
||||
// limit the check to both NewRetVal and NewRI are in the same basic block
|
||||
// and there are no throwing/exiting instructions between these
|
||||
// instructions.
|
||||
if (NewRI->getParent() != NewRetVal->getParent() ||
|
||||
MayContainThrowingOrExitingCall(NewRetVal, NewRI))
|
||||
continue;
|
||||
// Add to the existing attributes of NewRetVal.
|
||||
// NB! When we have the same attribute already existing on NewRetVal, but
|
||||
// with a differing value, the AttributeList's merge API honours the already
|
||||
// existing attribute value (i.e. attributes such as dereferenceable,
|
||||
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
|
||||
AttributeList AL = NewRetVal->getAttributes();
|
||||
AttributeList NewAL =
|
||||
AL.addAttributes(Context, AttributeList::ReturnIndex, AB);
|
||||
NewRetVal->setAttributes(NewAL);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the inlined function has non-byval align arguments, then
|
||||
/// add @llvm.assume-based alignment assumptions to preserve this information.
|
||||
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
|
||||
|
@ -1801,6 +1886,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
|
|||
// Add noalias metadata if necessary.
|
||||
AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
|
||||
|
||||
// Clone return attributes on the callsite into the calls within the inlined
|
||||
// function which feed into its return value.
|
||||
AddReturnAttributes(CS, VMap);
|
||||
|
||||
// Propagate llvm.mem.parallel_loop_access if necessary.
|
||||
PropagateParallelLoopAccessMetadata(CS, VMap);
|
||||
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
|
||||
; RUN: opt < %s -passes=always-inline -S | FileCheck %s
|
||||
|
||||
declare i8* @foo(i8*) argmemonly nounwind
|
||||
|
||||
define i8* @callee(i8 *%p) alwaysinline {
|
||||
; CHECK: @callee(
|
||||
; CHECK: call i8* @foo(i8* noalias %p)
|
||||
%r = call i8* @foo(i8* noalias %p)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
define i8* @caller(i8* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: @caller
|
||||
; CHECK: call nonnull i8* @foo(i8* noalias
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%p = call nonnull i8* @callee(i8* %gep)
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.guard(i1,...)
|
||||
; Cannot add nonnull attribute to foo
|
||||
; because the guard is a throwing call
|
||||
define internal i8* @callee_with_throwable(i8* %p) alwaysinline {
|
||||
; CHECK-NOT: callee_with_throwable
|
||||
%r = call i8* @foo(i8* %p)
|
||||
%cond = icmp ne i8* %r, null
|
||||
call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
declare i8* @bar(i8*) readonly nounwind
|
||||
; Here also we cannot add nonnull attribute to the call bar.
|
||||
define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline {
|
||||
; CHECK-NOT: callee_with_explicit_control_flow
|
||||
%r = call i8* @bar(i8* %p)
|
||||
%cond = icmp ne i8* %r, null
|
||||
br i1 %cond, label %ret, label %orig
|
||||
|
||||
ret:
|
||||
ret i8* %r
|
||||
|
||||
orig:
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) {
|
||||
; CHECK-LABEL: @caller2
|
||||
; CHECK: call i8* @foo
|
||||
; CHECK: call i8* @bar
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%p = call nonnull i8* @callee_with_throwable(i8* %gep)
|
||||
%q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep)
|
||||
br i1 %cond, label %pret, label %qret
|
||||
|
||||
pret:
|
||||
ret i8* %p
|
||||
|
||||
qret:
|
||||
ret i8* %q
|
||||
}
|
||||
|
||||
define internal i8* @callee3(i8 *%p) alwaysinline {
|
||||
; CHECK-NOT: callee3
|
||||
%r = call noalias i8* @foo(i8* %p)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
; add the deref attribute to the existing attributes on foo.
|
||||
define i8* @caller3(i8* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: caller3
|
||||
; CHECK: call noalias dereferenceable_or_null(12) i8* @foo
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep)
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
declare i8* @inf_loop_call(i8*) nounwind
|
||||
; We cannot propagate attributes to foo because we do not know whether inf_loop_call
|
||||
; will return execution.
|
||||
define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline {
|
||||
; CHECK-NOT: callee_with_sideeffect_callsite
|
||||
%r = call i8* @foo(i8* %p)
|
||||
%v = call i8* @inf_loop_call(i8* %p)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
; do not add deref attribute to foo
|
||||
define i8* @test4(i8* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: test4
|
||||
; CHECK: call i8* @foo
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep)
|
||||
ret i8* %p
|
||||
}
|
||||
|
||||
declare i8* @baz(i8*) nounwind readonly
|
||||
define internal i8* @callee5(i8* %p) alwaysinline {
|
||||
; CHECK-NOT: callee5
|
||||
%r = call i8* @foo(i8* %p)
|
||||
%v = call i8* @baz(i8* %p)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
; add the deref attribute to foo.
|
||||
define i8* @test5(i8* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: test5
|
||||
; CHECK: call dereferenceable_or_null(12) i8* @foo
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep)
|
||||
ret i8* %s
|
||||
}
|
||||
|
||||
; deref attributes have different values on the callee and the call feeding into
|
||||
; the return.
|
||||
; AttrBuilder chooses the already existing value and does not overwrite it.
|
||||
define internal i8* @callee6(i8* %p) alwaysinline {
|
||||
; CHECK-NOT: callee6
|
||||
%r = call dereferenceable_or_null(16) i8* @foo(i8* %p)
|
||||
%v = call i8* @baz(i8* %p)
|
||||
ret i8* %r
|
||||
}
|
||||
|
||||
|
||||
define i8* @test6(i8* %ptr, i64 %x) {
|
||||
; CHECK-LABEL: test6
|
||||
; CHECK: call dereferenceable_or_null(16) i8* @foo
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep)
|
||||
ret i8* %s
|
||||
}
|
||||
|
||||
; We add the attributes from the callee to both the calls below.
|
||||
define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline {
|
||||
; CHECK-NOT: @callee7(
|
||||
br i1 %cond, label %pass, label %fail
|
||||
|
||||
pass:
|
||||
%r = call i8* @foo(i8* noalias %ptr)
|
||||
ret i8* %r
|
||||
|
||||
fail:
|
||||
%s = call i8* @baz(i8* %ptr)
|
||||
ret i8* %s
|
||||
}
|
||||
|
||||
define void @test7(i8* %ptr, i64 %x, i1 %cond) {
|
||||
; CHECK-LABEL: @test7
|
||||
; CHECK: call nonnull i8* @foo(i8* noalias
|
||||
; CHECK: call nonnull i8* @baz
|
||||
; CHECK: phi i8*
|
||||
; CHECK: call void @snort
|
||||
|
||||
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
|
||||
%t = call nonnull i8* @callee7(i8* %gep, i1 %cond)
|
||||
call void @snort(i8* %t)
|
||||
ret void
|
||||
}
|
||||
declare void @snort(i8*)
|
Loading…
Reference in New Issue