remove inalloca parameters in globalopt and simplify argpromotion

Summary:
Inalloca parameters require special handling in some optimizations.
This change causes globalopt to strip the inalloca attribute from
function parameters when it is safe to do so, removes the special
handling for inallocas from argpromotion, and replaces it with a
simple check that causes argpromotion to skip functions that receive
inallocas (for when the pass is invoked on code that didn't run
through globalopt first). This also avoids a case where argpromotion
would incorrectly try to pass an inalloca in a register.

Fixes PR41658.

Reviewers: rnk, efriedma

Reviewed By: rnk

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61286

llvm-svn: 359743
This commit is contained in:
Bob Haarman 2019-05-02 00:37:36 +00:00
parent 1feaee52ff
commit a78ab77b6b
5 changed files with 76 additions and 34 deletions

View File

@ -566,8 +566,8 @@ static void markIndicesSafe(const IndicesVector &ToMark,
/// This method limits promotion of aggregates to only promote up to three /// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of /// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in. /// arguments passed in.
static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
AAResults &AAR, unsigned MaxElements) { unsigned MaxElements) {
using GEPIndicesSet = std::set<IndicesVector>; using GEPIndicesSet = std::set<IndicesVector>;
// Quick exit for unused arguments // Quick exit for unused arguments
@ -589,9 +589,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// //
// This set will contain all sets of indices that are loaded in the entry // This set will contain all sets of indices that are loaded in the entry
// block, and thus are safe to unconditionally load in the caller. // block, and thus are safe to unconditionally load in the caller.
//
// This optimization is also safe for InAlloca parameters, because it verifies
// that the address isn't captured.
GEPIndicesSet SafeToUnconditionallyLoad; GEPIndicesSet SafeToUnconditionallyLoad;
// This set contains all the sets of indices that we are planning to promote. // This set contains all the sets of indices that we are planning to promote.
@ -599,7 +596,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
GEPIndicesSet ToPromote; GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid. // If the pointer is always valid, any load with first index 0 is valid.
if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg)) if (isByVal || allCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as // First, iterate the entry block and mark loads of (geps of) arguments as
@ -656,8 +653,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// TODO: This runs the above loop over and over again for dead GEPs // TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the // Couldn't we just do increment the UI iterator earlier and erase the
// use? // use?
return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements);
MaxElements);
} }
// Ensure that all of the indices are constants. // Ensure that all of the indices are constants.
@ -856,6 +852,11 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (F->isVarArg()) if (F->isVarArg())
return nullptr; return nullptr;
// Don't transform functions that receive inallocas, as the transformation may
// not be safe depending on calling convention.
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
return nullptr;
// First check: see if there are any pointer arguments! If not, quick exit. // First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument *, 16> PointerArgs; SmallVector<Argument *, 16> PointerArgs;
for (Argument &I : F->args()) for (Argument &I : F->args())
@ -914,8 +915,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// If this is a byval argument, and if the aggregate type is small, just // If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely // pass the elements, which is always safe, if the passed value is densely
// packed or if we can prove the padding bytes are never accessed. This does // packed or if we can prove the padding bytes are never accessed.
// not apply to inalloca.
bool isSafeToPromote = bool isSafeToPromote =
PtrArg->hasByValAttr() && PtrArg->hasByValAttr() &&
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
@ -966,7 +966,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
} }
// Otherwise, see if we can promote the pointer to its value. // Otherwise, see if we can promote the pointer to its value.
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR,
MaxElements)) MaxElements))
ArgsToPromote.insert(PtrArg); ArgsToPromote.insert(PtrArg);
} }

View File

@ -2090,21 +2090,21 @@ static void ChangeCalleesToFastCall(Function *F) {
} }
} }
static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) { static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
// There can be at most one attribute set with a nest attribute. Attribute::AttrKind A) {
unsigned NestIndex; unsigned AttrIndex;
if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex)) if (Attrs.hasAttrSomewhere(A, &AttrIndex))
return Attrs.removeAttribute(C, NestIndex, Attribute::Nest); return Attrs.removeAttribute(C, AttrIndex, A);
return Attrs; return Attrs;
} }
static void RemoveNestAttribute(Function *F) { static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
F->setAttributes(StripNest(F->getContext(), F->getAttributes())); F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
for (User *U : F->users()) { for (User *U : F->users()) {
if (isa<BlockAddress>(U)) if (isa<BlockAddress>(U))
continue; continue;
CallSite CS(cast<Instruction>(U)); CallSite CS(cast<Instruction>(U));
CS.setAttributes(StripNest(F->getContext(), CS.getAttributes())); CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
} }
} }
@ -2119,13 +2119,6 @@ static bool hasChangeableCC(Function *F) {
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
return false; return false;
// Don't break the invariant that the inalloca parameter is the only parameter
// passed in memory.
// FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
// alloca it uses to the entry block if possible.
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
return false;
// FIXME: Change CC for the whole chain of musttail calls when possible. // FIXME: Change CC for the whole chain of musttail calls when possible.
// //
// Can't change CC of the function that either has musttail calls, or is a // Can't change CC of the function that either has musttail calls, or is a
@ -2287,6 +2280,17 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
if (!F->hasLocalLinkage()) if (!F->hasLocalLinkage())
continue; continue;
// If we have an inalloca parameter that we can safely remove the
// inalloca attribute from, do so. This unlocks optimizations that
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
!F->hasAddressTaken()) {
RemoveAttribute(F, Attribute::InAlloca);
Changed = true;
}
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
NumInternalFunc++; NumInternalFunc++;
TargetTransformInfo &TTI = GetTTI(*F); TargetTransformInfo &TTI = GetTTI(*F);
@ -2319,7 +2323,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
!F->hasAddressTaken()) { !F->hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe // The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute. // to remove the 'nest' attribute.
RemoveNestAttribute(F); RemoveAttribute(F, Attribute::Nest);
++NumNestRemoved; ++NumNestRemoved;
Changed = true; Changed = true;
} }

View File

@ -0,0 +1,38 @@
; In PR41658, argpromotion put an inalloca in a position that per the
; calling convention is passed in a register. This test verifies that
; we don't do that anymore. It also verifies that the combination of
; globalopt and argpromotion is able to optimize the call safely.
;
; RUN: opt -S -argpromotion %s | FileCheck --check-prefix=THIS %s
; RUN: opt -S -globalopt -argpromotion %s | FileCheck --check-prefix=OPT %s
; THIS: define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a
; OPT: define internal fastcc void @internalfun(<{ %struct.a }>*)
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc19.11.0"
%struct.a = type { i8 }
define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
entry:
%a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
%argmem = alloca inalloca <{ %struct.a }>, align 4
%1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
%call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
call void @ext(<{ %struct.a }>* inalloca %argmem)
ret void
}
; This is here to ensure @internalfun is live.
define void @exportedfun(%struct.a* %a) {
%inalloca.save = tail call i8* @llvm.stacksave()
%argmem = alloca inalloca <{ %struct.a }>, align 4
call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
call void @llvm.stackrestore(i8* %inalloca.save)
ret void
}
declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
declare void @ext(<{ %struct.a }>* inalloca)
declare i8* @llvm.stacksave()
declare void @llvm.stackrestore(i8*)

View File

@ -1,5 +1,5 @@
; RUN: opt %s -argpromotion -sroa -S | FileCheck %s ; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s
; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s ; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@ -15,7 +15,7 @@ entry:
%r = add i32 %a, %b %r = add i32 %a, %b
ret i32 %r ret i32 %r
} }
; CHECK-LABEL: define internal i32 @f ; CHECK-LABEL: define internal fastcc i32 @f
; CHECK-NOT: load ; CHECK-NOT: load
; CHECK: ret ; CHECK: ret
@ -35,7 +35,7 @@ entry:
; Argpromote can't promote %a because of the icmp use. ; Argpromote can't promote %a because of the icmp use.
define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind {
; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) ; CHECK: define internal fastcc i1 @g(%struct.ss* %a, %struct.ss* %b)
entry: entry:
%c = icmp eq %struct.ss* %a, %b %c = icmp eq %struct.ss* %a, %b
ret i1 %c ret i1 %c
@ -45,6 +45,6 @@ define i32 @test() {
entry: entry:
%S = alloca inalloca %struct.ss %S = alloca inalloca %struct.ss
%c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) ; CHECK: call fastcc i1 @g(%struct.ss* %S, %struct.ss* %S)
ret i32 0 ret i32 0
} }

View File

@ -27,7 +27,7 @@ define internal i32 @j(i32* %m) {
} }
define internal i32 @inalloca(i32* inalloca %p) { define internal i32 @inalloca(i32* inalloca %p) {
; CHECK-LABEL: define internal i32 @inalloca(i32* inalloca %p) ; CHECK-LABEL: define internal fastcc i32 @inalloca(i32* %p)
%rv = load i32, i32* %p %rv = load i32, i32* %p
ret i32 %rv ret i32 %rv
} }
@ -52,4 +52,4 @@ define void @call_things() {
; CHECK: call fastcc i32 @g ; CHECK: call fastcc i32 @g
; CHECK: call coldcc i32 @h ; CHECK: call coldcc i32 @h
; CHECK: call i32 @j ; CHECK: call i32 @j
; CHECK: call i32 @inalloca(i32* inalloca %args) ; CHECK: call fastcc i32 @inalloca(i32* %args)