[LSR] Allow already invariant operand for ICmpZero matching [try 2]

Changes since initial commit:

* Wrapping a pointer in an SCEV unknown hides the base, and SCEV is only able to compute a subtraction when the bases are known to be equal. This results in a SCEVCouldNotCompute flowing forward and triggering asserts. Test case added in d767b392.
* isLoopInvariant returns true for instructions outside the loop, but not necessarily *above* the loop. Since this code is allowed to visit uses of an IV outside of a loop, we have to make sure the operands of the compare are both invariant and dominating the header. Test case added in 2aed3cdb.

Original commit message follows...

The ICmpZero matching is checking to see if the expression is loop invariant per SCEV and expandable. This allows expressions inside the loop which can be made loop invariant to be seamlessly expanded, but is overly conservative for expressions which already *are* loop invariant.

As a simple justification for why this is correct, consider a loop invariant urem as RHS vs an alternate function with that same urem wrapped inside a helper call. Why would it be legal to match the later, but not the former?

Differential Revision: https://reviews.llvm.org/D129793
This commit is contained in:
Philip Reames 2022-07-15 13:23:10 -07:00 committed by Philip Reames
parent 4cd1c96d37
commit 6ab686eb86
3 changed files with 480 additions and 467 deletions

View File

@ -3341,6 +3341,21 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
} else if (L->isLoopInvariant(NV) &&
(!isa<Instruction>(NV) ||
DT.dominates(cast<Instruction>(NV), L->getHeader())) &&
!NV->getType()->isPointerTy()) {
// If we can't generally expand the expression (e.g. it contains
// a divide), but it is already at a loop invariant point before the
// loop, wrap it in an unknown (to prevent the expander from trying
// to re-expand in a potentially unsafe way.) The restriction to
// integer types is required because the unknown hides the base, and
// SCEV can't compute the difference of two unknown pointers.
N = SE.getUnknown(NV);
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
assert(!isa<SCEVCouldNotCompute>(S));
}
// -1 and the negations of all interesting strides (except the negation

File diff suppressed because it is too large Load Diff

View File

@ -61,18 +61,16 @@ exit:
ret void
}
; FIXME: We could handle this case even though we don't know %M. The
; faulting instruction is already outside the loop!
define void @icmp_zero_urem_invariant(i64 %N, i64 %M, ptr %p) {
; CHECK-LABEL: @icmp_zero_urem_invariant(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[M:%.*]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@ -226,10 +224,10 @@ define void @icmp_zero_urem_vscale_mul8(i64 %N, ptr %p) {
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[MUL]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@ -260,10 +258,10 @@ define void @icmp_zero_urem_vscale_mul64(i64 %N, ptr %p) {
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[MUL]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@ -293,10 +291,10 @@ define void @icmp_zero_urem_vscale_shl3(i64 %N, ptr %p) {
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[SHL]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@ -326,10 +324,10 @@ define void @icmp_zero_urem_vscale_shl6(i64 %N, ptr %p) {
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[N:%.*]], [[SHL]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i64 0, ptr [[P:%.*]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
; CHECK: exit:
; CHECK-NEXT: ret void