forked from OSchip/llvm-project
[IndVarSimplify] Widen loop compare instructions.
This improves other optimizations such as LSR. A sext may be added to the compare's other operand, but this can often be hoisted outside of the loop. llvm-svn: 217953
This commit is contained in:
parent
ebdf90ca00
commit
bb99f40530
|
@ -762,6 +762,8 @@ protected:
|
||||||
|
|
||||||
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
|
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
|
||||||
|
|
||||||
|
bool WidenLoopCompare(NarrowIVDefUse DU);
|
||||||
|
|
||||||
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
|
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
|
||||||
};
|
};
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
@ -926,6 +928,32 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
|
||||||
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
|
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If the narrow use is a compare instruction, then widen the compare
|
||||||
|
// (and possibly the other operand). The extend operation is hoisted into the
|
||||||
|
// loop preheader as far as possible.
|
||||||
|
bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
|
||||||
|
ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
|
||||||
|
if (!Cmp)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
|
||||||
|
unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
|
||||||
|
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
|
||||||
|
assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
|
||||||
|
|
||||||
|
// Widen the compare instruction.
|
||||||
|
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
|
||||||
|
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
|
||||||
|
|
||||||
|
// Widen the other operand of the compare, if necessary.
|
||||||
|
if (CastWidth < IVWidth) {
|
||||||
|
bool IsSigned = CmpInst::isSigned(Cmp->getPredicate());
|
||||||
|
Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
|
||||||
|
DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
|
/// WidenIVUse - Determine whether an individual user of the narrow IV can be
|
||||||
/// widened. If so, return the wide clone of the user.
|
/// widened. If so, return the wide clone of the user.
|
||||||
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
||||||
|
@ -993,10 +1021,15 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
|
||||||
|
|
||||||
// Does this user itself evaluate to a recurrence after widening?
|
// Does this user itself evaluate to a recurrence after widening?
|
||||||
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
|
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
|
||||||
if (!WideAddRec) {
|
if (!WideAddRec)
|
||||||
WideAddRec = GetExtendedOperandRecurrence(DU);
|
WideAddRec = GetExtendedOperandRecurrence(DU);
|
||||||
}
|
|
||||||
if (!WideAddRec) {
|
if (!WideAddRec) {
|
||||||
|
// If use is a loop condition, try to promote the condition instead of
|
||||||
|
// truncating the IV first.
|
||||||
|
if (WidenLoopCompare(DU))
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
// This user does not evaluate to a recurence after widening, so don't
|
// This user does not evaluate to a recurence after widening, so don't
|
||||||
// follow it. Instead insert a Trunc to kill off the original use,
|
// follow it. Instead insert a Trunc to kill off the original use,
|
||||||
// eventually isolating the original narrow IV so it can be removed.
|
// eventually isolating the original narrow IV so it can be removed.
|
||||||
|
|
|
@ -229,10 +229,11 @@ entry:
|
||||||
; loop and the OR instruction is replaced by an ADD keeping the result
|
; loop and the OR instruction is replaced by an ADD keeping the result
|
||||||
; equivalent.
|
; equivalent.
|
||||||
;
|
;
|
||||||
|
; CHECK: sext
|
||||||
; CHECK: loop:
|
; CHECK: loop:
|
||||||
; CHECK: phi i64
|
; CHECK: phi i64
|
||||||
; CHECK-NOT: sext
|
; CHECK-NOT: sext
|
||||||
; CHECK: icmp slt i32
|
; CHECK: icmp slt i64
|
||||||
; CHECK: exit:
|
; CHECK: exit:
|
||||||
; CHECK: add i64
|
; CHECK: add i64
|
||||||
loop:
|
loop:
|
||||||
|
|
|
@ -380,11 +380,11 @@ for.body48: ; preds = %for.inc221, %for.bo
|
||||||
|
|
||||||
for.body65.lr.ph: ; preds = %for.body48
|
for.body65.lr.ph: ; preds = %for.body48
|
||||||
%0 = load i32* undef, align 4
|
%0 = load i32* undef, align 4
|
||||||
|
%1 = sext i32 %0 to i64
|
||||||
br label %for.body65.us
|
br label %for.body65.us
|
||||||
|
|
||||||
for.body65.us: ; preds = %for.inc219.us, %for.body65.lr.ph
|
for.body65.us: ; preds = %for.inc219.us, %for.body65.lr.ph
|
||||||
%k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
|
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
|
||||||
%idxprom66.us = sext i32 %k.09.us to i64
|
|
||||||
br i1 undef, label %for.inc219.us, label %if.end72.us
|
br i1 undef, label %for.inc219.us, label %if.end72.us
|
||||||
|
|
||||||
if.end72.us: ; preds = %for.body65.us
|
if.end72.us: ; preds = %for.body65.us
|
||||||
|
@ -406,8 +406,8 @@ for.cond152.us: ; preds = %for.cond152.us, %fo
|
||||||
br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
|
br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
|
||||||
|
|
||||||
for.inc219.us: ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
|
for.inc219.us: ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
|
||||||
%inc.us = add nsw i32 %k.09.us, 1
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
%cmp64.us = icmp sgt i32 %inc.us, %0
|
%cmp64.us = icmp sgt i64 %indvars.iv.next, %1
|
||||||
br i1 %cmp64.us, label %for.inc221, label %for.body65.us
|
br i1 %cmp64.us, label %for.inc221, label %for.body65.us
|
||||||
|
|
||||||
for.cond139.loopexit.us: ; preds = %for.cond152.us
|
for.cond139.loopexit.us: ; preds = %for.cond152.us
|
||||||
|
|
|
@ -0,0 +1,166 @@
|
||||||
|
; RUN: opt < %s -indvars -S | FileCheck %s
|
||||||
|
target triple = "aarch64--linux-gnu"
|
||||||
|
|
||||||
|
; Check the loop exit i32 compare instruction and operand are widened to i64
|
||||||
|
; instead of truncating IV before its use in the i32 compare instruction.
|
||||||
|
|
||||||
|
@idx = common global i32 0, align 4
|
||||||
|
@e = common global i32 0, align 4
|
||||||
|
@ptr = common global i32* null, align 8
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test1
|
||||||
|
; CHECK: for.body.lr.ph:
|
||||||
|
; CHECK: sext i32
|
||||||
|
; CHECK: for.cond:
|
||||||
|
; CHECK: icmp slt i64
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK: phi i64
|
||||||
|
|
||||||
|
define i32 @test1() {
|
||||||
|
entry:
|
||||||
|
store i32 -1, i32* @idx, align 4
|
||||||
|
%0 = load i32* @e, align 4
|
||||||
|
%cmp4 = icmp slt i32 %0, 0
|
||||||
|
br i1 %cmp4, label %for.end.loopexit, label %for.body.lr.ph
|
||||||
|
|
||||||
|
for.body.lr.ph:
|
||||||
|
%1 = load i32** @ptr, align 8
|
||||||
|
%2 = load i32* @e, align 4
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.cond:
|
||||||
|
%inc = add nsw i32 %i.05, 1
|
||||||
|
%cmp = icmp slt i32 %i.05, %2
|
||||||
|
br i1 %cmp, label %for.body, label %for.cond.for.end.loopexit_crit_edge
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
|
||||||
|
%idxprom = sext i32 %i.05 to i64
|
||||||
|
%arrayidx = getelementptr inbounds i32* %1, i64 %idxprom
|
||||||
|
%3 = load i32* %arrayidx, align 4
|
||||||
|
%tobool = icmp eq i32 %3, 0
|
||||||
|
br i1 %tobool, label %if.then, label %for.cond
|
||||||
|
|
||||||
|
if.then:
|
||||||
|
%i.05.lcssa = phi i32 [ %i.05, %for.body ]
|
||||||
|
store i32 %i.05.lcssa, i32* @idx, align 4
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.cond.for.end.loopexit_crit_edge:
|
||||||
|
br label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit:
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
%4 = load i32* @idx, align 4
|
||||||
|
ret i32 %4
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test2
|
||||||
|
; CHECK: for.body4.us
|
||||||
|
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
; CHECK: %cmp2.us = icmp slt i64
|
||||||
|
; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32
|
||||||
|
; CHECK-NOT: %cmp2.us = icmp slt i32
|
||||||
|
|
||||||
|
define void @test2([8 x i8]* %a, i8* %b, i8 %limit) {
|
||||||
|
entry:
|
||||||
|
%conv = zext i8 %limit to i32
|
||||||
|
%cmp23 = icmp eq i8 %limit, 0
|
||||||
|
br i1 %cmp23, label %for.cond1.preheader, label %for.cond1.preheader.us
|
||||||
|
|
||||||
|
for.cond1.preheader.us:
|
||||||
|
%storemerge5.us = phi i32 [ 0, %entry ], [ %inc14.us, %for.inc13.us ]
|
||||||
|
br i1 true, label %for.body4.lr.ph.us, label %for.inc13.us
|
||||||
|
|
||||||
|
for.inc13.us:
|
||||||
|
%inc14.us = add nsw i32 %storemerge5.us, 1
|
||||||
|
%cmp.us = icmp slt i32 %inc14.us, 4
|
||||||
|
br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end
|
||||||
|
|
||||||
|
for.body4.us:
|
||||||
|
%storemerge14.us = phi i32 [ 0, %for.body4.lr.ph.us ], [ %inc.us, %for.body4.us ]
|
||||||
|
%idxprom.us = sext i32 %storemerge14.us to i64
|
||||||
|
%arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %idxprom5.us, i64 %idxprom.us
|
||||||
|
%0 = load i8* %arrayidx6.us, align 1
|
||||||
|
%idxprom7.us = zext i8 %0 to i64
|
||||||
|
%arrayidx8.us = getelementptr inbounds i8* %b, i64 %idxprom7.us
|
||||||
|
%1 = load i8* %arrayidx8.us, align 1
|
||||||
|
store i8 %1, i8* %arrayidx6.us, align 1
|
||||||
|
%inc.us = add nsw i32 %storemerge14.us, 1
|
||||||
|
%cmp2.us = icmp slt i32 %inc.us, %conv
|
||||||
|
br i1 %cmp2.us, label %for.body4.us, label %for.inc13.us
|
||||||
|
|
||||||
|
for.body4.lr.ph.us:
|
||||||
|
%idxprom5.us = sext i32 %storemerge5.us to i64
|
||||||
|
br label %for.body4.us
|
||||||
|
|
||||||
|
for.cond1.preheader:
|
||||||
|
%storemerge5 = phi i32 [ 0, %entry ], [ %inc14, %for.inc13 ]
|
||||||
|
br i1 false, label %for.inc13, label %for.inc13
|
||||||
|
|
||||||
|
for.inc13:
|
||||||
|
%inc14 = add nsw i32 %storemerge5, 1
|
||||||
|
%cmp = icmp slt i32 %inc14, 4
|
||||||
|
br i1 %cmp, label %for.cond1.preheader, label %for.end
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test3
|
||||||
|
; CHECK: sext i32 %b
|
||||||
|
; CHECK: for.cond:
|
||||||
|
; CHECK: phi i64
|
||||||
|
; CHECK: icmp slt i64
|
||||||
|
|
||||||
|
define i32 @test3(i32* %a, i32 %b) {
|
||||||
|
entry:
|
||||||
|
br label %for.cond
|
||||||
|
|
||||||
|
for.cond:
|
||||||
|
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||||||
|
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||||
|
%cmp = icmp slt i32 %i.0, %b
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%idxprom = sext i32 %i.0 to i64
|
||||||
|
%arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
|
||||||
|
%0 = load i32* %arrayidx, align 4
|
||||||
|
%add = add nsw i32 %sum.0, %0
|
||||||
|
%inc = add nsw i32 %i.0, 1
|
||||||
|
br label %for.cond
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret i32 %sum.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test4
|
||||||
|
; CHECK: zext i32 %b
|
||||||
|
; CHECK: for.cond:
|
||||||
|
; CHECK: phi i64
|
||||||
|
; CHECK: icmp ule i64
|
||||||
|
|
||||||
|
define i32 @test4(i32* %a, i32 %b) {
|
||||||
|
entry:
|
||||||
|
br label %for.cond
|
||||||
|
|
||||||
|
for.cond:
|
||||||
|
%sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||||||
|
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||||
|
%cmp = icmp ule i32 %i.0, %b
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%idxprom = sext i32 %i.0 to i64
|
||||||
|
%arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
|
||||||
|
%0 = load i32* %arrayidx, align 4
|
||||||
|
%add = add nsw i32 %sum.0, %0
|
||||||
|
%inc = add nsw i32 %i.0, 1
|
||||||
|
br label %for.cond
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret i32 %sum.0
|
||||||
|
}
|
Loading…
Reference in New Issue