forked from OSchip/llvm-project
Update optimization passes to handle inalloca arguments
Summary: I searched Transforms/ and Analysis/ for 'ByVal' and updated those call sites to check for inalloca if appropriate. I added tests for any change that would allow an optimization to fire on inalloca. Reviewers: nlewycky Differential Revision: http://llvm-reviews.chandlerc.com/D2449 llvm-svn: 200281
This commit is contained in:
parent
020acd88ec
commit
26af2cae05
|
@ -458,7 +458,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
|
|||
|
||||
SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
|
||||
// no interprocedural analysis is done at the moment
|
||||
if (!A.hasByValAttr()) {
|
||||
if (!A.hasByValOrInAllocaAttr()) {
|
||||
++ObjectVisitorArgument;
|
||||
return unknown();
|
||||
}
|
||||
|
|
|
@ -311,8 +311,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
|
|||
if (Argument *A = dyn_cast<Argument>(V)) {
|
||||
unsigned Align = 0;
|
||||
|
||||
if (A->hasByValAttr()) {
|
||||
// Get alignment information off byval arguments if specified in the IR.
|
||||
if (A->hasByValOrInAllocaAttr()) {
|
||||
// Get alignment information off byval/inalloca arguments if specified in
|
||||
// the IR.
|
||||
Align = A->getParamAlignment();
|
||||
} else if (TD && A->hasStructRetAttr()) {
|
||||
// An sret parameter has at least the ABI alignment of the return type.
|
||||
|
@ -2070,9 +2071,9 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
|
|||
// Alloca never returns null, malloc might.
|
||||
if (isa<AllocaInst>(V)) return true;
|
||||
|
||||
// A byval argument is never null.
|
||||
// A byval or inalloca argument is never null.
|
||||
if (const Argument *A = dyn_cast<Argument>(V))
|
||||
return A->hasByValAttr();
|
||||
return A->hasByValOrInAllocaAttr();
|
||||
|
||||
// Global values are not null unless extern weak.
|
||||
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
|
||||
|
|
|
@ -155,7 +155,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
|
|||
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
|
||||
|
||||
// If this is a byval argument, and if the aggregate type is small, just
|
||||
// pass the elements, which is always safe.
|
||||
// pass the elements, which is always safe. This does not apply to
|
||||
// inalloca.
|
||||
if (PtrArg->hasByValAttr()) {
|
||||
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
|
||||
if (maxElements > 0 && STy->getNumElements() > maxElements) {
|
||||
|
@ -201,7 +202,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
|
|||
}
|
||||
|
||||
// Otherwise, see if we can promote the pointer to its value.
|
||||
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr()))
|
||||
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
|
||||
ArgsToPromote.insert(PtrArg);
|
||||
}
|
||||
|
||||
|
@ -301,7 +302,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
|
|||
/// This method limits promotion of aggregates to only promote up to three
|
||||
/// elements of the aggregate in order to avoid exploding the number of
|
||||
/// arguments passed in.
|
||||
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
|
||||
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
|
||||
bool isByValOrInAlloca) const {
|
||||
typedef std::set<IndicesVector> GEPIndicesSet;
|
||||
|
||||
// Quick exit for unused arguments
|
||||
|
@ -323,6 +325,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
|
|||
//
|
||||
// This set will contain all sets of indices that are loaded in the entry
|
||||
// block, and thus are safe to unconditionally load in the caller.
|
||||
//
|
||||
// This optimization is also safe for InAlloca parameters, because it verifies
|
||||
// that the address isn't captured.
|
||||
GEPIndicesSet SafeToUnconditionallyLoad;
|
||||
|
||||
// This set contains all the sets of indices that we are planning to promote.
|
||||
|
@ -330,7 +335,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
|
|||
GEPIndicesSet ToPromote;
|
||||
|
||||
// If the pointer is always valid, any load with first index 0 is valid.
|
||||
if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
|
||||
if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
|
||||
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
|
||||
|
||||
// First, iterate the entry block and mark loads of (geps of) arguments as
|
||||
|
@ -389,7 +394,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
|
|||
// TODO: This runs the above loop over and over again for dead GEPs
|
||||
// Couldn't we just do increment the UI iterator earlier and erase the
|
||||
// use?
|
||||
return isSafeToPromoteArgument(Arg, isByVal);
|
||||
return isSafeToPromoteArgument(Arg, isByValOrInAlloca);
|
||||
}
|
||||
|
||||
// Ensure that all of the indices are constants.
|
||||
|
|
|
@ -378,7 +378,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
|
|||
I != E; ++I) {
|
||||
Argument *Arg = I;
|
||||
|
||||
if (Arg->use_empty() && !Arg->hasByValAttr())
|
||||
if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr())
|
||||
UnusedArgs.push_back(Arg->getArgNo());
|
||||
}
|
||||
|
||||
|
|
|
@ -414,6 +414,10 @@ determinePointerReadAttrs(Argument *A,
|
|||
SmallSet<Use*, 32> Visited;
|
||||
int Count = 0;
|
||||
|
||||
// inalloca arguments are always clobbered by the call.
|
||||
if (A->hasInAllocaAttr())
|
||||
return Attribute::None;
|
||||
|
||||
bool IsRead = false;
|
||||
// We don't need to track IsWritten. If A is written to, return immediately.
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
|
|||
for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
|
||||
// Do we have a constant argument?
|
||||
if (ArgumentConstants[i].second || AI->use_empty() ||
|
||||
(AI->hasByValAttr() && !F.onlyReadsMemory()))
|
||||
AI->hasInAllocaAttr() || (AI->hasByValAttr() && !F.onlyReadsMemory()))
|
||||
continue;
|
||||
|
||||
Value *V = ArgumentConstants[i].first;
|
||||
|
|
|
@ -767,10 +767,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
|
|||
if (!CI->isLosslessCast())
|
||||
return false;
|
||||
|
||||
// The size of ByVal arguments is derived from the type, so we
|
||||
// The size of ByVal or InAlloca arguments is derived from the type, so we
|
||||
// can't change to a type with a different size. If the size were
|
||||
// passed explicitly we could avoid this check.
|
||||
if (!CS.isByValArgument(ix))
|
||||
if (!CS.isByValOrInAllocaArgument(ix))
|
||||
return true;
|
||||
|
||||
Type* SrcTy =
|
||||
|
@ -1049,6 +1049,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
|
|||
typeIncompatible(ParamTy, i + 1), i + 1))
|
||||
return false; // Attribute not compatible with transformed value.
|
||||
|
||||
if (CS.isInAllocaArgument(i))
|
||||
return false; // Cannot transform to and from inalloca.
|
||||
|
||||
// If the parameter is passed as a byval argument, then we have to have a
|
||||
// sized type and the sized type has to have the same size as the old type.
|
||||
if (ParamTy != ActTy &&
|
||||
|
|
|
@ -81,10 +81,14 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
|
|||
if (CS.isCallee(UI))
|
||||
continue;
|
||||
|
||||
// Inalloca arguments are clobbered by the call.
|
||||
unsigned ArgNo = CS.getArgumentNo(UI);
|
||||
if (CS.isInAllocaArgument(ArgNo))
|
||||
return false;
|
||||
|
||||
// If this is a readonly/readnone call site, then we know it is just a
|
||||
// load (but one that potentially returns the value itself), so we can
|
||||
// ignore it if we know that the value isn't captured.
|
||||
unsigned ArgNo = CS.getArgumentNo(UI);
|
||||
if (CS.onlyReadsMemory() &&
|
||||
(CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
|
||||
continue;
|
||||
|
|
|
@ -308,6 +308,7 @@ static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
|
|||
// Special arguments can not be a valid retainable object pointer.
|
||||
if (const Argument *Arg = dyn_cast<Argument>(Op))
|
||||
if (Arg->hasByValAttr() ||
|
||||
Arg->hasInAllocaAttr() ||
|
||||
Arg->hasNestAttr() ||
|
||||
Arg->hasStructRetAttr())
|
||||
return false;
|
||||
|
|
|
@ -374,8 +374,8 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
|
|||
return OverwriteUnknown;
|
||||
|
||||
// Check to see if the later store is to the entire object (either a global,
|
||||
// an alloca, or a byval argument). If so, then it clearly overwrites any
|
||||
// other store to the same object.
|
||||
// an alloca, or a byval/inalloca argument). If so, then it clearly
|
||||
// overwrites any other store to the same object.
|
||||
const DataLayout *TD = AA.getDataLayout();
|
||||
|
||||
const Value *UO1 = GetUnderlyingObject(P1, TD),
|
||||
|
@ -742,11 +742,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
|
|||
DeadStackObjects.insert(I);
|
||||
}
|
||||
|
||||
// Treat byval arguments the same, stores to them are dead at the end of the
|
||||
// function.
|
||||
// Treat byval or inalloca arguments the same, stores to them are dead at the
|
||||
// end of the function.
|
||||
for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
|
||||
AE = BB.getParent()->arg_end(); AI != AE; ++AI)
|
||||
if (AI->hasByValAttr())
|
||||
if (AI->hasByValOrInAllocaAttr())
|
||||
DeadStackObjects.insert(AI);
|
||||
|
||||
// Scan the basic block backwards
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s
|
||||
|
||||
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
|
||||
|
||||
%struct.ss = type { i32, i32 }
|
||||
|
||||
; Argpromote + scalarrepl should change this to passing the two integers by value.
|
||||
define internal i32 @f(%struct.ss* inalloca %s) {
|
||||
entry:
|
||||
%f0 = getelementptr %struct.ss* %s, i32 0, i32 0
|
||||
%f1 = getelementptr %struct.ss* %s, i32 0, i32 1
|
||||
%a = load i32* %f0, align 4
|
||||
%b = load i32* %f1, align 4
|
||||
%r = add i32 %a, %b
|
||||
ret i32 %r
|
||||
}
|
||||
; CHECK-LABEL: define internal i32 @f
|
||||
; CHECK-NOT: load
|
||||
; CHECK: ret
|
||||
|
||||
define i32 @main() {
|
||||
entry:
|
||||
%S = alloca %struct.ss
|
||||
%f0 = getelementptr %struct.ss* %S, i32 0, i32 0
|
||||
%f1 = getelementptr %struct.ss* %S, i32 0, i32 1
|
||||
store i32 1, i32* %f0, align 4
|
||||
store i32 2, i32* %f1, align 4
|
||||
%r = call i32 @f(%struct.ss* inalloca %S)
|
||||
ret i32 %r
|
||||
}
|
||||
; CHECK-LABEL: define i32 @main
|
||||
; CHECK-NOT: load
|
||||
; CHECK: ret
|
||||
|
||||
; Argpromote can't promote %a because of the icmp use.
|
||||
define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind {
|
||||
; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
|
||||
entry:
|
||||
%c = icmp eq %struct.ss* %a, %b
|
||||
ret i1 %c
|
||||
}
|
||||
|
||||
define i32 @test() {
|
||||
entry:
|
||||
%S = alloca %struct.ss
|
||||
%c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
|
||||
; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
|
||||
ret i32 0
|
||||
}
|
|
@ -105,6 +105,15 @@ define void @test9(%struct.x* byval %a) nounwind {
|
|||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
; Test for inalloca handling.
|
||||
define void @test9_2(%struct.x* inalloca %a) nounwind {
|
||||
%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0
|
||||
store i32 1, i32* %tmp2, align 4
|
||||
ret void
|
||||
; CHECK-LABEL: @test9_2(
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
|
||||
; va_arg has fuzzy dependence, the store shouldn't be zapped.
|
||||
define double @test10(i8* %X) {
|
||||
%X_addr = alloca i8*
|
||||
|
|
|
@ -45,3 +45,9 @@ define void @test6_2(i8** %p, i8* %q) {
|
|||
call void @test6_1()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @test7_1(i32* inalloca nocapture %a)
|
||||
; inalloca parameters are always considered written
|
||||
define void @test7_1(i32* inalloca %a) {
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32"
|
||||
target triple = "i686-pc-linux-gnu"
|
||||
|
||||
declare void @takes_i32(i32)
|
||||
declare void @takes_i32_inalloca(i32* inalloca)
|
||||
|
||||
define void @f() {
|
||||
; CHECK-LABEL: define void @f()
|
||||
%args = alloca i32
|
||||
call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args)
|
||||
; CHECK: call void bitcast
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @g() {
|
||||
; CHECK-LABEL: define void @g()
|
||||
call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
|
||||
; CHECK: call void bitcast
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue