[InlineFunction] Handle return attributes on call within inlined body

Consider a callee function that has a call (C) within it which feeds
into the return.  When we inline that callee into a callsite that has
return attributes, we can backward propagate those attributes to the
call (C) within that inlined callee body.

This is safe to do so only if we can guarantee transfer of execution to
successor in the window of instructions between return value (i.e. the
call C) and the return instruction.

See added test cases.

Reviewed-By: reames, jdoerfert

Differential Revision: https://reviews.llvm.org/D76140
This commit is contained in:
Anna Thomas 2020-03-22 13:59:10 -04:00
parent 128c0d037d
commit 28518d9ae3
6 changed files with 266 additions and 18 deletions

View File

@ -3665,31 +3665,31 @@ void test_integer(void) {
// CHECK-ASM: vsumqg
idx = vec_test_mask(vsc, vuc);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vuc, vuc);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vss, vus);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vus, vus);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vsi, vui);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vui, vui);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vsl, vul);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vul, vul);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vd, vul);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
}

View File

@ -654,10 +654,10 @@ void test_integer(void) {
// CHECK-ASM: vsrlb
idx = vec_test_mask(vf, vui);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
idx = vec_test_mask(vd, vul);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK: call signext i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
vuc = vec_msum_u128(vul, vul, vuc, 0);

View File

@ -7,7 +7,7 @@
short test_loadbe_i16(const short *P) {
// CHECK-LABEL: @test_loadbe_i16
// CHECK: [[LOAD:%.*]] = load i16, i16* %{{.*}}, align 1
// CHECK: call i16 @llvm.bswap.i16(i16 [[LOAD]])
// CHECK: call signext i16 @llvm.bswap.i16(i16 [[LOAD]])
return _loadbe_i16(P);
}

View File

@ -1,9 +1,9 @@
// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -ffreestanding -triple i686--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -ffreestanding -triple x86_64--linux -emit-llvm -mllvm -update-return-attrs=false %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=i686-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
// RUN: %clang_cc1 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse2 -emit-llvm -mllvm -update-return-attrs=false -o - -Wall -Werror | FileCheck %s --check-prefixes CHECK,CHECK-32BIT-LONG
#include <x86intrin.h>

View File

@ -80,11 +80,21 @@ EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),
cl::Hidden,
cl::desc("Convert noalias attributes to metadata during inlining."));
static cl::opt<bool> UpdateReturnAttributes(
"update-return-attrs", cl::init(true), cl::Hidden,
cl::desc("Update return attributes on calls within inlined body"));
static cl::opt<bool>
PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(true), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
static cl::opt<unsigned> InlinerAttributeWindow(
"inliner-attribute-window", cl::Hidden,
cl::desc("the maximum number of instructions analyzed for may throw during "
"attribute inference in inlined body"),
cl::init(4));
llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
AAResults *CalleeAAR,
bool InsertLifetime) {
@ -1136,6 +1146,81 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
}
}
static bool MayContainThrowingOrExitingCall(Instruction *Begin,
Instruction *End) {
assert(Begin->getParent() == End->getParent() &&
"Expected to be in same basic block!");
unsigned NumInstChecked = 0;
// Check that all instructions in the range [Begin, End) are guaranteed to
// transfer execution to successor.
for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
if (NumInstChecked++ > InlinerAttributeWindow ||
!isGuaranteedToTransferExecutionToSuccessor(&I))
return true;
return false;
}
static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) {
if (!UpdateReturnAttributes)
return;
AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex);
if (AB.empty())
return;
auto *CalledFunction = CS.getCalledFunction();
auto &Context = CalledFunction->getContext();
for (auto &BB : *CalledFunction) {
auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());
if (!RI || !isa<CallBase>(RI->getOperand(0)))
continue;
// Sanity check that the cloned return instruction exists and is a return
// instruction itself.
auto *NewRI = dyn_cast_or_null<ReturnInst>(VMap.lookup(RI));
if (!NewRI)
continue;
auto *RetVal = cast<CallBase>(RI->getOperand(0));
// Sanity check that the cloned RetVal exists and is a call.
// Simplification during inlining could have transformed the cloned
// instruction.
auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
if (!NewRetVal)
continue;
// Backward propagation of attributes to the returned value may be incorrect
// if it is control flow dependent.
// Consider:
// @callee {
// %rv = call @foo()
// %rv2 = call @bar()
// if (%rv2 != null)
// return %rv2
// if (%rv == null)
// exit()
// return %rv
// }
// caller() {
// %val = call nonnull @callee()
// }
// Here we cannot add the nonnull attribute on either foo or bar. So, we
// limit the check to both NewRetVal and NewRI are in the same basic block
// and there are no throwing/exiting instructions between these
// instructions.
if (NewRI->getParent() != NewRetVal->getParent() ||
MayContainThrowingOrExitingCall(NewRetVal, NewRI))
continue;
// Add to the existing attributes of NewRetVal.
// NB! When we have the same attribute already existing on NewRetVal, but
// with a differing value, the AttributeList's merge API honours the already
// existing attribute value (i.e. attributes such as dereferenceable,
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
AttributeList AL = NewRetVal->getAttributes();
AttributeList NewAL =
AL.addAttributes(Context, AttributeList::ReturnIndex, AB);
NewRetVal->setAttributes(NewAL);
}
}
/// If the inlined function has non-byval align arguments, then
/// add @llvm.assume-based alignment assumptions to preserve this information.
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
@ -1801,6 +1886,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
// Clone return attributes on the callsite into the calls within the inlined
// function which feed into its return value.
AddReturnAttributes(CS, VMap);
// Propagate llvm.mem.parallel_loop_access if necessary.
PropagateParallelLoopAccessMetadata(CS, VMap);

View File

@ -0,0 +1,159 @@
; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
; RUN: opt < %s -passes=always-inline -S | FileCheck %s
declare i8* @foo(i8*) argmemonly nounwind
define i8* @callee(i8 *%p) alwaysinline {
; CHECK: @callee(
; CHECK: call i8* @foo(i8* noalias %p)
%r = call i8* @foo(i8* noalias %p)
ret i8* %r
}
define i8* @caller(i8* %ptr, i64 %x) {
; CHECK-LABEL: @caller
; CHECK: call nonnull i8* @foo(i8* noalias
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%p = call nonnull i8* @callee(i8* %gep)
ret i8* %p
}
declare void @llvm.experimental.guard(i1,...)
; Cannot add nonnull attribute to foo
; because the guard is a throwing call
define internal i8* @callee_with_throwable(i8* %p) alwaysinline {
; CHECK-NOT: callee_with_throwable
%r = call i8* @foo(i8* %p)
%cond = icmp ne i8* %r, null
call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
ret i8* %r
}
declare i8* @bar(i8*) readonly nounwind
; Here also we cannot add nonnull attribute to the call bar.
define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline {
; CHECK-NOT: callee_with_explicit_control_flow
%r = call i8* @bar(i8* %p)
%cond = icmp ne i8* %r, null
br i1 %cond, label %ret, label %orig
ret:
ret i8* %r
orig:
ret i8* %p
}
define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) {
; CHECK-LABEL: @caller2
; CHECK: call i8* @foo
; CHECK: call i8* @bar
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%p = call nonnull i8* @callee_with_throwable(i8* %gep)
%q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep)
br i1 %cond, label %pret, label %qret
pret:
ret i8* %p
qret:
ret i8* %q
}
define internal i8* @callee3(i8 *%p) alwaysinline {
; CHECK-NOT: callee3
%r = call noalias i8* @foo(i8* %p)
ret i8* %r
}
; add the deref attribute to the existing attributes on foo.
define i8* @caller3(i8* %ptr, i64 %x) {
; CHECK-LABEL: caller3
; CHECK: call noalias dereferenceable_or_null(12) i8* @foo
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep)
ret i8* %p
}
declare i8* @inf_loop_call(i8*) nounwind
; We cannot propagate attributes to foo because we do not know whether inf_loop_call
; will return execution.
define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline {
; CHECK-NOT: callee_with_sideeffect_callsite
%r = call i8* @foo(i8* %p)
%v = call i8* @inf_loop_call(i8* %p)
ret i8* %r
}
; do not add deref attribute to foo
define i8* @test4(i8* %ptr, i64 %x) {
; CHECK-LABEL: test4
; CHECK: call i8* @foo
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep)
ret i8* %p
}
declare i8* @baz(i8*) nounwind readonly
define internal i8* @callee5(i8* %p) alwaysinline {
; CHECK-NOT: callee5
%r = call i8* @foo(i8* %p)
%v = call i8* @baz(i8* %p)
ret i8* %r
}
; add the deref attribute to foo.
define i8* @test5(i8* %ptr, i64 %x) {
; CHECK-LABEL: test5
; CHECK: call dereferenceable_or_null(12) i8* @foo
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep)
ret i8* %s
}
; deref attributes have different values on the callee and the call feeding into
; the return.
; AttrBuilder chooses the already existing value and does not overwrite it.
define internal i8* @callee6(i8* %p) alwaysinline {
; CHECK-NOT: callee6
%r = call dereferenceable_or_null(16) i8* @foo(i8* %p)
%v = call i8* @baz(i8* %p)
ret i8* %r
}
define i8* @test6(i8* %ptr, i64 %x) {
; CHECK-LABEL: test6
; CHECK: call dereferenceable_or_null(16) i8* @foo
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep)
ret i8* %s
}
; We add the attributes from the callee to both the calls below.
define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline {
; CHECK-NOT: @callee7(
br i1 %cond, label %pass, label %fail
pass:
%r = call i8* @foo(i8* noalias %ptr)
ret i8* %r
fail:
%s = call i8* @baz(i8* %ptr)
ret i8* %s
}
define void @test7(i8* %ptr, i64 %x, i1 %cond) {
; CHECK-LABEL: @test7
; CHECK: call nonnull i8* @foo(i8* noalias
; CHECK: call nonnull i8* @baz
; CHECK: phi i8*
; CHECK: call void @snort
%gep = getelementptr inbounds i8, i8* %ptr, i64 %x
%t = call nonnull i8* @callee7(i8* %gep, i1 %cond)
call void @snort(i8* %t)
ret void
}
declare void @snort(i8*)