forked from OSchip/llvm-project
indvars: insert truncate at loop boundary to avoid redundant IVs.
When widening an IV to remove s/zext, we generally try to eliminate the original narrow IV. However, LCSSA phi nodes outside the loop were still using the original IV. Clean this up more aggressively to avoid redundancy in generated code. llvm-svn: 198338
This commit is contained in:
parent
4dcee6960f
commit
020dd898fc
|
@ -897,15 +897,24 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
|
||||||
return AddRec;
|
return AddRec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This IV user cannot be widen. Replace this use of the original narrow IV
|
||||||
|
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
|
||||||
|
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
|
||||||
|
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
|
||||||
|
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
|
||||||
|
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
|
||||||
|
}
|
||||||
|
|
||||||
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
|
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
|
||||||
/// widened. If so, return the wide clone of the user.
|
/// widened. If so, return the wide clone of the user.
|
||||||
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
||||||
|
|
||||||
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
|
// Stop traversing the def-use chain at inner-loop phis or post-loop phis.
|
||||||
if (isa<PHINode>(DU.NarrowUse) &&
|
if (isa<PHINode>(DU.NarrowUse) &&
|
||||||
LI->getLoopFor(DU.NarrowUse->getParent()) != L)
|
LI->getLoopFor(DU.NarrowUse->getParent()) != L) {
|
||||||
|
truncateIVUse(DU, DT);
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
// Our raison d'etre! Eliminate sign and zero extension.
|
// Our raison d'etre! Eliminate sign and zero extension.
|
||||||
if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
|
if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
|
||||||
Value *NewDef = DU.WideDef;
|
Value *NewDef = DU.WideDef;
|
||||||
|
@ -953,9 +962,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
||||||
// This user does not evaluate to a recurence after widening, so don't
|
// This user does not evaluate to a recurence after widening, so don't
|
||||||
// follow it. Instead insert a Trunc to kill off the original use,
|
// follow it. Instead insert a Trunc to kill off the original use,
|
||||||
// eventually isolating the original narrow IV so it can be removed.
|
// eventually isolating the original narrow IV so it can be removed.
|
||||||
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
|
truncateIVUse(DU, DT);
|
||||||
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
|
|
||||||
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Assume block terminators cannot evaluate to a recurrence. We can't to
|
// Assume block terminators cannot evaluate to a recurrence. We can't to
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
; RUN: opt < %s -indvars -S | FileCheck %s
|
||||||
|
|
||||||
|
target triple = "x86_64-apple-darwin"
|
||||||
|
|
||||||
|
; CHECK-LABEL: @sloop
|
||||||
|
; CHECK-LABEL: B18:
|
||||||
|
; Only one phi now.
|
||||||
|
; CHECK: phi
|
||||||
|
; CHECK-NOT: phi
|
||||||
|
; We now get 2 trunc, one for the gep and one for the lcssa phi.
|
||||||
|
; CHECK: trunc i64 %indvars.iv to i32
|
||||||
|
; CHECK: trunc i64 %indvars.iv to i32
|
||||||
|
; CHECK-LABEL: B24:
|
||||||
|
define void @sloop(i32* %a) {
|
||||||
|
Prologue:
|
||||||
|
br i1 undef, label %B18, label %B6
|
||||||
|
|
||||||
|
B18: ; preds = %B24, %Prologue
|
||||||
|
%.02 = phi i32 [ 0, %Prologue ], [ %tmp33, %B24 ]
|
||||||
|
%tmp23 = zext i32 %.02 to i64
|
||||||
|
%tmp33 = add i32 %.02, 1
|
||||||
|
%o = getelementptr i32* %a, i32 %.02
|
||||||
|
%v = load i32* %o
|
||||||
|
%t = icmp eq i32 %v, 0
|
||||||
|
br i1 %t, label %exit24, label %B24
|
||||||
|
|
||||||
|
B24: ; preds = %B18
|
||||||
|
%t2 = icmp eq i32 %tmp33, 20
|
||||||
|
br i1 %t2, label %B6, label %B18
|
||||||
|
|
||||||
|
B6: ; preds = %Prologue
|
||||||
|
ret void
|
||||||
|
|
||||||
|
exit24: ; preds = %B18
|
||||||
|
call void @dummy(i32 %.02)
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @dummy(i32)
|
Loading…
Reference in New Issue