diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 226b4732a495..fb5861c7a192 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -221,23 +221,67 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *I = cast(*UI); - if (isa(I) || isa(I)) - Reduction += InlineConstants::InstrCost; - else if (GetElementPtrInst *GEP = dyn_cast(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (GEP->hasAllConstantIndices()) - Reduction += CountCodeReductionForAlloca(GEP); - } else if (BitCastInst *BCI = dyn_cast(I)) { - // Track pointer through bitcasts. - Reduction += CountCodeReductionForAlloca(BCI); - } else { - // If there is some other strange instruction, we're not going to be able - // to do much if we inline this. - return 0; + + SmallVector Worklist; + Worklist.push_back(V); + do { + Value *V = Worklist.pop_back_val(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI){ + Instruction *I = cast(*UI); + if (LoadInst *LI = dyn_cast(I)) { + if (!LI->isSimple()) + return 0; + Reduction += InlineConstants::InstrCost; + } else if (StoreInst *SI = dyn_cast(I)) { + if (!SI->isSimple()) + return 0; + Reduction += InlineConstants::InstrCost; + } else if (GetElementPtrInst *GEP = dyn_cast(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + return 0; + // A non-zero GEP will likely become a mask operation after SROA. + if (GEP->hasAllZeroIndices()) + Reduction += InlineConstants::InstrCost; + Worklist.push_back(GEP); + } else if (BitCastInst *BCI = dyn_cast(I)) { + // Track pointer through bitcasts. + Worklist.push_back(BCI); + Reduction += InlineConstants::InstrCost; + } else if (SelectInst *SI = dyn_cast(I)) { + // SROA can handle a select of alloca iff all uses of the alloca are + // loads, and dereferenceable. We assume it's dereferenceable since + // we're told the input is an alloca. + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || !LI->isSimple()) return 0; + } + // We don't know whether we'll be deleting the rest of the chain of + // instructions from the SelectInst on, because we don't know whether + // the other side of the select is also an alloca or not. + continue; + } else if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + return 0; + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // SROA can usually chew through these intrinsics. + Reduction += InlineConstants::InstrCost; + break; + } + } else { + // If there is some other strange instruction, we're not going to be + // able to do much if we inline this. + return 0; + } } - } + } while (!Worklist.empty()); return Reduction; } diff --git a/llvm/test/Transforms/Inline/alloca-bonus.ll b/llvm/test/Transforms/Inline/alloca-bonus.ll new file mode 100644 index 000000000000..2587ae136340 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-bonus.ll @@ -0,0 +1,44 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s + +declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) + +@glbl = external global i32 + +define void @outer1() { +; CHECK: @outer1 +; CHECK-NOT: call void @inner1 + %ptr = alloca i32 + call void @inner1(i32* %ptr) + ret void +} + +define void @inner1(i32 *%ptr) { + %A = load i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr i32* %ptr, i32 0 + %D = getelementptr i32* %ptr, i32 1 + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start(i64 0, i8* %E) + ret void +} + +define void @outer2() { +; CHECK: @outer2 +; CHECK: call void @inner2 + %ptr = alloca i32 + call void @inner2(i32* %ptr) + ret void +} + +; %D poisons this call, scalar-repl can't handle that instruction. +define void @inner2(i32 *%ptr) { + %A = load i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr i32* %ptr, i32 0 + %D = getelementptr i32* %ptr, i32 %A + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start(i64 0, i8* %E) + ret void +}