forked from OSchip/llvm-project
Fix CountCodeReductionForAlloca to more accurately represent what SROA can and
can't handle. Also don't produce non-zero results for things which won't be transformed by SROA at all just because we saw the loads/stores before we saw the use of the address. llvm-svn: 148536
This commit is contained in:
parent
c908b43d9f
commit
e8415fea4b
|
@ -221,23 +221,67 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
|
|||
unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
|
||||
if (!V->getType()->isPointerTy()) return 0; // Not a pointer
|
||||
unsigned Reduction = 0;
|
||||
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
|
||||
Instruction *I = cast<Instruction>(*UI);
|
||||
if (isa<LoadInst>(I) || isa<StoreInst>(I))
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
|
||||
// If the GEP has variable indices, we won't be able to do much with it.
|
||||
if (GEP->hasAllConstantIndices())
|
||||
Reduction += CountCodeReductionForAlloca(GEP);
|
||||
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
|
||||
// Track pointer through bitcasts.
|
||||
Reduction += CountCodeReductionForAlloca(BCI);
|
||||
} else {
|
||||
// If there is some other strange instruction, we're not going to be able
|
||||
// to do much if we inline this.
|
||||
return 0;
|
||||
|
||||
SmallVector<Value *, 4> Worklist;
|
||||
Worklist.push_back(V);
|
||||
do {
|
||||
Value *V = Worklist.pop_back_val();
|
||||
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
|
||||
UI != E; ++UI){
|
||||
Instruction *I = cast<Instruction>(*UI);
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
|
||||
if (!LI->isSimple())
|
||||
return 0;
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
if (!SI->isSimple())
|
||||
return 0;
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
|
||||
// If the GEP has variable indices, we won't be able to do much with it.
|
||||
if (!GEP->hasAllConstantIndices())
|
||||
return 0;
|
||||
// A non-zero GEP will likely become a mask operation after SROA.
|
||||
if (GEP->hasAllZeroIndices())
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
Worklist.push_back(GEP);
|
||||
} else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
|
||||
// Track pointer through bitcasts.
|
||||
Worklist.push_back(BCI);
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
} else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
|
||||
// SROA can handle a select of alloca iff all uses of the alloca are
|
||||
// loads, and dereferenceable. We assume it's dereferenceable since
|
||||
// we're told the input is an alloca.
|
||||
for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
|
||||
UI != UE; ++UI) {
|
||||
LoadInst *LI = dyn_cast<LoadInst>(*UI);
|
||||
if (LI == 0 || !LI->isSimple()) return 0;
|
||||
}
|
||||
// We don't know whether we'll be deleting the rest of the chain of
|
||||
// instructions from the SelectInst on, because we don't know whether
|
||||
// the other side of the select is also an alloca or not.
|
||||
continue;
|
||||
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
default:
|
||||
return 0;
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memcpy:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::lifetime_start:
|
||||
case Intrinsic::lifetime_end:
|
||||
// SROA can usually chew through these intrinsics.
|
||||
Reduction += InlineConstants::InstrCost;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// If there is some other strange instruction, we're not going to be
|
||||
// able to do much if we inline this.
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (!Worklist.empty());
|
||||
|
||||
return Reduction;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
|
||||
|
||||
declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
|
||||
|
||||
@glbl = external global i32
|
||||
|
||||
define void @outer1() {
|
||||
; CHECK: @outer1
|
||||
; CHECK-NOT: call void @inner1
|
||||
%ptr = alloca i32
|
||||
call void @inner1(i32* %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @inner1(i32 *%ptr) {
|
||||
%A = load i32* %ptr
|
||||
store i32 0, i32* %ptr
|
||||
%C = getelementptr i32* %ptr, i32 0
|
||||
%D = getelementptr i32* %ptr, i32 1
|
||||
%E = bitcast i32* %ptr to i8*
|
||||
%F = select i1 false, i32* %ptr, i32* @glbl
|
||||
call void @llvm.lifetime.start(i64 0, i8* %E)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @outer2() {
|
||||
; CHECK: @outer2
|
||||
; CHECK: call void @inner2
|
||||
%ptr = alloca i32
|
||||
call void @inner2(i32* %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
; %D poisons this call, scalar-repl can't handle that instruction.
|
||||
define void @inner2(i32 *%ptr) {
|
||||
%A = load i32* %ptr
|
||||
store i32 0, i32* %ptr
|
||||
%C = getelementptr i32* %ptr, i32 0
|
||||
%D = getelementptr i32* %ptr, i32 %A
|
||||
%E = bitcast i32* %ptr to i8*
|
||||
%F = select i1 false, i32* %ptr, i32* @glbl
|
||||
call void @llvm.lifetime.start(i64 0, i8* %E)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue