[LoopInterchange] Fix legality for triangular loops

This is a bug fix in legality check.

When we encounter triangular loops such as the following form:
    for (int i = 0; i < m; i++)
      for (int j = 0; j < i; j++), or

    for (int i = 0; i < m; i++)
      for (int j = 0; j*i < n; j++),

we should not perform interchange since the number of executions of the loop body
will be different before and after interchange, resulting in incorrect results.

Reviewed By: bmahjour

Differential Revision: https://reviews.llvm.org/D101305
This commit is contained in:
Congzhe Cao 2021-05-11 10:59:50 -04:00 committed by CongzheUalberta
parent c58912eca7
commit 29342291d2
2 changed files with 211 additions and 0 deletions

View File

@ -656,6 +656,65 @@ bool LoopInterchangeLegality::isLoopStructureUnderstood(
return false;
}
}
// TODO: Handle triangular loops of another form.
// e.g. for(int i=0;i<N;i++)
// for(int j=0;j<i;j++)
// or,
// for(int i=0;i<N;i++)
// for(int j=0;j*i<N;j++)
BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
BranchInst *InnerLoopLatchBI =
dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
if (!InnerLoopLatchBI->isConditional())
return false;
if (CmpInst *InnerLoopCmp =
dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
Value *Op0 = InnerLoopCmp->getOperand(0);
Value *Op1 = InnerLoopCmp->getOperand(1);
// LHS and RHS of the inner loop exit condition, e.g.,
// in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
Value *Left = nullptr;
Value *Right = nullptr;
// Check if V only involves inner loop induction variable.
// Return true if V is InnerInduction, or a cast from
// InnerInduction, or a binary operator that involves
// InnerInduction and a constant.
std::function<bool(Value *)> IsPathToIndVar;
IsPathToIndVar = [&InnerInduction, &IsPathToIndVar](Value *V) -> bool {
if (V == InnerInduction)
return true;
if (isa<Constant>(V))
return true;
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return false;
if (isa<CastInst>(I))
return IsPathToIndVar(I->getOperand(0));
if (isa<BinaryOperator>(I))
return IsPathToIndVar(I->getOperand(0)) &&
IsPathToIndVar(I->getOperand(1));
return false;
};
if (IsPathToIndVar(Op0) && !isa<Constant>(Op0)) {
Left = Op0;
Right = Op1;
} else if (IsPathToIndVar(Op1) && !isa<Constant>(Op1)) {
Left = Op1;
Right = Op0;
}
if (Left == nullptr)
return false;
const SCEV *S = SE->getSCEV(Right);
if (!SE->isLoopInvariant(S, OuterLoop))
return false;
}
return true;
}

View File

@ -0,0 +1,152 @@
; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s
@A = common global [100 x [100 x i64]] zeroinitializer
@N = dso_local local_unnamed_addr global i64 100, align 8
;; for(int i=0;i<100;i++)
;; for(int j=0;j<i;j++)
;; A[j][i] = A[j][i]+k;
;; Inner loop induction variable exit condition depends on the
;; outer loop induction variable, i.e., triangular loops.
; CHECK: Loop structure not understood by pass
; CHECK: Not interchanging loops. Cannot prove legality.
define void @interchange_01(i64 %k) {
entry:
br label %for1.header
for1.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %for1.inc10 ]
br label %for2
for2:
%j = phi i64 [ %j.next, %for2 ], [ 0, %for1.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %i
%lv = load i64, i64* %arrayidx5
%add = add nsw i64 %lv, %k
store i64 %add, i64* %arrayidx5
%j.next = add nuw nsw i64 %j, 1
%exitcond = icmp eq i64 %j, %i
br i1 %exitcond, label %for1.inc10, label %for2
for1.inc10:
%i.next = add nuw nsw i64 %i, 1
%exitcond26 = icmp eq i64 %i, 99
br i1 %exitcond26, label %for.end12, label %for1.header
for.end12:
ret void
}
;; for(int i=0;i<100;i++)
;; for(int j=0;j+i<100;j++)
;; A[j][i] = A[j][i]+k;
;; Inner loop induction variable exit condition depends on the
;; outer loop induction variable, i.e., triangular loops.
; CHECK: Loop structure not understood by pass
; CHECK: Not interchanging loops. Cannot prove legality.
define void @interchange_02(i64 %k) {
entry:
br label %for1.header
for1.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %for1.inc10 ]
br label %for2
for2:
%j = phi i64 [ %j.next, %for2 ], [ 0, %for1.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %i
%lv = load i64, i64* %arrayidx5
%add = add nsw i64 %lv, %k
store i64 %add, i64* %arrayidx5
%0 = add nuw nsw i64 %j, %i
%j.next = add nuw nsw i64 %j, 1
%exitcond = icmp eq i64 %0, 100
br i1 %exitcond, label %for1.inc10, label %for2
for1.inc10:
%i.next = add nuw nsw i64 %i, 1
%exitcond26 = icmp eq i64 %i, 99
br i1 %exitcond26, label %for.end12, label %for1.header
for.end12:
ret void
}
;; for(int i=0;i<100;i++)
;; for(int j=0;i>j;j++)
;; A[j][i] = A[j][i]+k;
;; Inner loop induction variable exit condition depends on the
;; outer loop induction variable, i.e., triangular loops.
; CHECK: Loop structure not understood by pass
; CHECK: Not interchanging loops. Cannot prove legality.
define void @interchange_03(i64 %k) {
entry:
br label %for1.header
for1.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %for1.inc10 ]
br label %for2
for2:
%j = phi i64 [ %j.next, %for2 ], [ 0, %for1.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %i
%lv = load i64, i64* %arrayidx5
%add = add nsw i64 %lv, %k
store i64 %add, i64* %arrayidx5
%j.next = add nuw nsw i64 %j, 1
%exitcond = icmp ne i64 %i, %j
br i1 %exitcond, label %for2, label %for1.inc10
for1.inc10:
%i.next = add nuw nsw i64 %i, 1
%exitcond26 = icmp eq i64 %i, 99
br i1 %exitcond26, label %for.end12, label %for1.header
for.end12:
ret void
}
;; for(int i=0;i<100;i++)
;; for(int j=0;N>j;j++)
;; A[j][i] = A[j][i]+k;
;; Inner loop induction variable exit condition depends on
;; an outer loop invariant, can do interchange.
; CHECK: Loops interchanged
define void @interchange_04(i64 %k) {
entry:
%0 = load i64, i64* @N, align 4
br label %for1.header
for1.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %for1.inc10 ]
br label %for2
for2:
%j = phi i64 [ %j.next, %for2 ], [ 0, %for1.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 %j, i64 %i
%lv = load i64, i64* %arrayidx5
%add = add nsw i64 %lv, %k
store i64 %add, i64* %arrayidx5
%j.next = add nuw nsw i64 %j, 1
%exitcond = icmp ne i64 %0, %j
br i1 %exitcond, label %for2, label %for1.inc10
for1.inc10:
%i.next = add nuw nsw i64 %i, 1
%exitcond26 = icmp eq i64 %i, 99
br i1 %exitcond26, label %for.end12, label %for1.header
for.end12:
ret void
}