forked from OSchip/llvm-project
[LoopInterchange][PR56275] Fix legality with negative dependence vectors
This is the 2nd patch of the two-patch series (D130188, D130189) that fix PR56275 (https://github.com/llvm/llvm-project/issues/56275) which is a missed opportunity for loop interchange. As follow-up on the dependence analysis (DA) patch D130188, this patch normalizes DA results in loop interchange, such that negative dependence vectors queried by loop interchange are reversed to be non-negative. Now all tests in PR56275 can get interchanged. Those tests are added in lit test as `pr56275.ll`. Reviewed By: kawashima-fj, bmahjour, Meinersbur, #loopoptwg Differential Revision: https://reviews.llvm.org/D130189
This commit is contained in:
parent
76be554931
commit
8dc4b2edfa
|
@ -86,7 +86,8 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
|
|||
#endif
|
||||
|
||||
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
||||
Loop *L, DependenceInfo *DI) {
|
||||
Loop *L, DependenceInfo *DI,
|
||||
ScalarEvolution *SE) {
|
||||
using ValueVector = SmallVector<Value *, 16>;
|
||||
|
||||
ValueVector MemInstr;
|
||||
|
@ -125,6 +126,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
|||
// Track Output, Flow, and Anti dependencies.
|
||||
if (auto D = DI->depends(Src, Dst, true)) {
|
||||
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
|
||||
// If the direction vector is negative, normalize it to
|
||||
// make it non-negative.
|
||||
if (D->normalize(SE))
|
||||
LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
|
||||
LLVM_DEBUG(StringRef DepType =
|
||||
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
|
||||
dbgs() << "Found " << DepType
|
||||
|
@ -133,19 +138,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
|||
unsigned Levels = D->getLevels();
|
||||
char Direction;
|
||||
for (unsigned II = 1; II <= Levels; ++II) {
|
||||
const SCEV *Distance = D->getDistance(II);
|
||||
const SCEVConstant *SCEVConst =
|
||||
dyn_cast_or_null<SCEVConstant>(Distance);
|
||||
if (SCEVConst) {
|
||||
const ConstantInt *CI = SCEVConst->getValue();
|
||||
if (CI->isNegative())
|
||||
Direction = '<';
|
||||
else if (CI->isZero())
|
||||
Direction = '=';
|
||||
else
|
||||
Direction = '>';
|
||||
Dep.push_back(Direction);
|
||||
} else if (D->isScalar(II)) {
|
||||
if (D->isScalar(II)) {
|
||||
Direction = 'S';
|
||||
Dep.push_back(Direction);
|
||||
} else {
|
||||
|
@ -486,7 +479,7 @@ struct LoopInterchange {
|
|||
CharMatrix DependencyMatrix;
|
||||
Loop *OuterMostLoop = *(LoopList.begin());
|
||||
if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
|
||||
OuterMostLoop, DI)) {
|
||||
OuterMostLoop, DI, SE)) {
|
||||
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
;; Test to make sure DA outputs the correction direction
|
||||
;; vector [< =] hence the loopnest is interchanged.
|
||||
;;
|
||||
;; void test1(unsigned a[restrict N1][N2],
|
||||
;; unsigned b[restrict N1][N2],
|
||||
;; unsigned c[restrict N1][N2]) {
|
||||
;; for (unsigned long i2 = 1; i2 < N2-1; i2++) {
|
||||
;; for (unsigned long i1 = 1; i1 < N1-1; i1++) {
|
||||
;; a[i1][i2+1] = b[i1][i2];
|
||||
;; c[i1][i2] = a[i1][i2];
|
||||
;; }
|
||||
;; }
|
||||
;; }
|
||||
|
||||
define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) {
|
||||
; CHECK-LABEL: @test1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP2_HEADER_PREHEADER:%.*]]
|
||||
; CHECK: loop1.header.preheader:
|
||||
; CHECK-NEXT: br label [[LOOP1_HEADER:%.*]]
|
||||
; CHECK: loop1.header:
|
||||
; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ]
|
||||
; CHECK-NEXT: [[I2_ST:%.*]] = add i64 [[I2]], 1
|
||||
; CHECK-NEXT: [[I2_LD:%.*]] = add i64 [[I2]], 0
|
||||
; CHECK-NEXT: br label [[LOOP2_HEADER_SPLIT1:%.*]]
|
||||
; CHECK: loop2.header.preheader:
|
||||
; CHECK-NEXT: br label [[LOOP2_HEADER:%.*]]
|
||||
; CHECK: loop2.header:
|
||||
; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ]
|
||||
; CHECK-NEXT: br label [[LOOP1_HEADER_PREHEADER]]
|
||||
; CHECK: loop2.header.split1:
|
||||
; CHECK-NEXT: [[I1_ST:%.*]] = add i64 [[I1]], 0
|
||||
; CHECK-NEXT: [[I1_LD:%.*]] = add i64 [[I1]], 0
|
||||
; CHECK-NEXT: [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]]
|
||||
; CHECK-NEXT: [[A_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[A]], i64 [[I1_LD]], i64 [[I2_LD]]
|
||||
; CHECK-NEXT: [[B_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[B:%.*]], i64 [[I1]], i64 [[I2]]
|
||||
; CHECK-NEXT: [[C_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[C:%.*]], i64 [[I1]], i64 [[I2]]
|
||||
; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B_LD]], align 4
|
||||
; CHECK-NEXT: store i32 [[B_VAL]], ptr [[A_ST]], align 4
|
||||
; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4
|
||||
; CHECK-NEXT: store i32 [[A_VAL]], ptr [[C_ST]], align 4
|
||||
; CHECK-NEXT: [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1
|
||||
; CHECK-NEXT: [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63
|
||||
; CHECK-NEXT: br label [[LOOP1_LATCH]]
|
||||
; CHECK: loop2.header.split:
|
||||
; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[I1]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]]
|
||||
; CHECK: loop1.latch:
|
||||
; CHECK-NEXT: [[I2_INC]] = add nuw nsw i64 [[I2]], 1
|
||||
; CHECK-NEXT: [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63
|
||||
; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %loop1.header
|
||||
|
||||
loop1.header:
|
||||
%i2 = phi i64 [ 1, %entry ], [ %i2.inc, %loop1.latch ]
|
||||
%i2.st = add i64 %i2, 1
|
||||
%i2.ld = add i64 %i2, 0
|
||||
br label %loop2.header
|
||||
|
||||
loop2.header:
|
||||
%i1 = phi i64 [ 1, %loop1.header ], [ %i1.inc, %loop2.header ]
|
||||
%i1.st = add i64 %i1, 0
|
||||
%i1.ld = add i64 %i1, 0
|
||||
%a.st = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.st, i64 %i2.st
|
||||
%a.ld = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.ld, i64 %i2.ld
|
||||
%b.ld = getelementptr inbounds [64 x i32], ptr %b, i64 %i1, i64 %i2
|
||||
%c.st = getelementptr inbounds [64 x i32], ptr %c, i64 %i1, i64 %i2
|
||||
%b.val = load i32, ptr %b.ld, align 4
|
||||
store i32 %b.val, ptr %a.st, align 4 ; (X) store to a[i1][i2+1]
|
||||
%a.val = load i32, ptr %a.ld, align 4 ; (Y) load from a[i1][i2]
|
||||
store i32 %a.val, ptr %c.st, align 4
|
||||
%i1.inc = add nuw nsw i64 %i1, 1
|
||||
%loop2.exitcond.not = icmp eq i64 %i1.inc, 63
|
||||
br i1 %loop2.exitcond.not, label %loop1.latch, label %loop2.header
|
||||
|
||||
loop1.latch:
|
||||
%i2.inc = add nuw nsw i64 %i2, 1
|
||||
%loop1.exitcond.not = icmp eq i64 %i2.inc, 63
|
||||
br i1 %loop1.exitcond.not, label %exit, label %loop1.header
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
;; Semantically equivalent to test1() with only the difference
|
||||
;; of the order of a load and a store at (X) and (Y).
|
||||
;;
|
||||
;; Test to make sure DA outputs the correction direction
|
||||
;; vector [< =] hence the loopnest is interchanged.
|
||||
|
||||
define void @test2(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) {
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[LOOP2_HEADER_PREHEADER:%.*]]
|
||||
; CHECK: loop1.header.preheader:
|
||||
; CHECK-NEXT: br label [[LOOP1_HEADER:%.*]]
|
||||
; CHECK: loop1.header:
|
||||
; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ]
|
||||
; CHECK-NEXT: [[I2_ST:%.*]] = add i64 [[I2]], 1
|
||||
; CHECK-NEXT: [[I2_LD:%.*]] = add i64 [[I2]], 0
|
||||
; CHECK-NEXT: br label [[LOOP2_HEADER_SPLIT1:%.*]]
|
||||
; CHECK: loop2.header.preheader:
|
||||
; CHECK-NEXT: br label [[LOOP2_HEADER:%.*]]
|
||||
; CHECK: loop2.header:
|
||||
; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ]
|
||||
; CHECK-NEXT: br label [[LOOP1_HEADER_PREHEADER]]
|
||||
; CHECK: loop2.header.split1:
|
||||
; CHECK-NEXT: [[I1_ST:%.*]] = add i64 [[I1]], 0
|
||||
; CHECK-NEXT: [[I1_LD:%.*]] = add i64 [[I1]], 0
|
||||
; CHECK-NEXT: [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]]
|
||||
; CHECK-NEXT: [[A_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[A]], i64 [[I1_LD]], i64 [[I2_LD]]
|
||||
; CHECK-NEXT: [[B_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[B:%.*]], i64 [[I1]], i64 [[I2]]
|
||||
; CHECK-NEXT: [[C_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[C:%.*]], i64 [[I1]], i64 [[I2]]
|
||||
; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B_LD]], align 4
|
||||
; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4
|
||||
; CHECK-NEXT: store i32 [[B_VAL]], ptr [[A_ST]], align 4
|
||||
; CHECK-NEXT: store i32 [[A_VAL]], ptr [[C_ST]], align 4
|
||||
; CHECK-NEXT: [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1
|
||||
; CHECK-NEXT: [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63
|
||||
; CHECK-NEXT: br label [[LOOP1_LATCH]]
|
||||
; CHECK: loop2.header.split:
|
||||
; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[I1]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]]
|
||||
; CHECK: loop1.latch:
|
||||
; CHECK-NEXT: [[I2_INC]] = add nuw nsw i64 [[I2]], 1
|
||||
; CHECK-NEXT: [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63
|
||||
; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %loop1.header
|
||||
|
||||
loop1.header:
|
||||
%i2 = phi i64 [ 1, %entry ], [ %i2.inc, %loop1.latch ]
|
||||
%i2.st = add i64 %i2, 1
|
||||
%i2.ld = add i64 %i2, 0
|
||||
br label %loop2.header
|
||||
|
||||
loop2.header:
|
||||
%i1 = phi i64 [ 1, %loop1.header ], [ %i1.inc, %loop2.header ]
|
||||
%i1.st = add i64 %i1, 0
|
||||
%i1.ld = add i64 %i1, 0
|
||||
%a.st = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.st, i64 %i2.st
|
||||
%a.ld = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.ld, i64 %i2.ld
|
||||
%b.ld = getelementptr inbounds [64 x i32], ptr %b, i64 %i1, i64 %i2
|
||||
%c.st = getelementptr inbounds [64 x i32], ptr %c, i64 %i1, i64 %i2
|
||||
%b.val = load i32, ptr %b.ld, align 4
|
||||
%a.val = load i32, ptr %a.ld, align 4 ; (Y) load from a[i1][i2]
|
||||
store i32 %b.val, ptr %a.st, align 4 ; (X) store to a[i1][i2+1]
|
||||
store i32 %a.val, ptr %c.st, align 4
|
||||
%i1.inc = add nuw nsw i64 %i1, 1
|
||||
%loop2.exitcond.not = icmp eq i64 %i1.inc, 63
|
||||
br i1 %loop2.exitcond.not, label %loop1.latch, label %loop2.header
|
||||
|
||||
loop1.latch:
|
||||
%i2.inc = add nuw nsw i64 %i2, 1
|
||||
%loop1.exitcond.not = icmp eq i64 %i2.inc, 63
|
||||
br i1 %loop1.exitcond.not, label %exit, label %loop1.header
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue