[ARM] Fix perf regression in compare optimization.

Fix a performance regression caused by r322737.

While trying to make it easier to replace compares with existing adds and
subtracts, I accidentally stopped it from doing so in some cases.  This should
fix that.  I'm also fixing another potential bug in that commit.

Differential Revision: https://reviews.llvm.org/D42263

llvm-svn: 322972
This commit is contained in:
Joel Galenson 2018-01-19 17:46:27 +00:00
parent bfb02aec5a
commit dbc724f764
2 changed files with 34 additions and 3 deletions

View File

@ -2736,7 +2736,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
}
I = CmpInstr;
E = MI;
} else {
} else if (E != B) {
// Allow the loop below to search E (which was initially MI). Since MI and
// SubAdd have different tests, even if that instruction could not be MI, it
// could still potentially be SubAdd.
@ -2763,8 +2763,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return false;
if (I == B)
// The 'and' is below the comparison instruction.
return false;
break;
}
// Return false if no candidates exist.

View File

@ -197,6 +197,38 @@ cont1:
declare void @external_fn(...) local_unnamed_addr #0
define i32 @are_equal(i32* nocapture readonly %a1, i32* nocapture readonly %a2, i32 %n) local_unnamed_addr #0 {
; CHECK-LABEL: are_equal
; CHECK: subs r{{[0-9]+}}, r{{[0-9]+}}, #1
; CHECK-NEXT: bne
entry:
%tobool7 = icmp eq i32 %n, 0
br i1 %tobool7, label %while.end, label %land.rhs.preheader
land.rhs.preheader:
br label %land.rhs
while.cond:
%tobool = icmp eq i32 %dec9, 0
br i1 %tobool, label %while.end, label %land.rhs
land.rhs:
%dec9.in = phi i32 [ %dec9, %while.cond ], [ %n, %land.rhs.preheader ]
%dec9 = add nsw i32 %dec9.in, -1
%arrayidx = getelementptr inbounds i32, i32* %a1, i32 %dec9
%0 = load i32, i32* %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32, i32* %a2, i32 %dec9
%1 = load i32, i32* %arrayidx1, align 4
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %while.cond, label %while.end
while.end:
%n.addr.0.lcssa = phi i32 [ 0, %entry ], [ 0, %while.cond ], [ %dec9.in, %land.rhs ]
%cmp2 = icmp slt i32 %n.addr.0.lcssa, 1
%conv = zext i1 %cmp2 to i32
ret i32 %conv
}
declare void @llvm.trap() #2
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1