forked from OSchip/llvm-project
[DA] Fix direction vectors for weakZeroSrcSIV
Both weakZeroSrcSIV and weakZeroDstSIV are currently giving the same direction vectors. Fix weakZeroSrcSIVtest by flipping the directions it gives. Differential Revision: https://reviews.llvm.org/D46678 llvm-svn: 333658
This commit is contained in:
parent
6995821e90
commit
2911b3a07a
|
@ -1621,9 +1621,9 @@ bool isRemainderZero(const SCEVConstant *Dividend,
|
|||
//
|
||||
// If i is not an integer, there's no dependence.
|
||||
// If i < 0 or > UB, there's no dependence.
|
||||
// If i = 0, the direction is <= and peeling the
|
||||
// If i = 0, the direction is >= and peeling the
|
||||
// 1st iteration will break the dependence.
|
||||
// If i = UB, the direction is >= and peeling the
|
||||
// If i = UB, the direction is <= and peeling the
|
||||
// last iteration will break the dependence.
|
||||
// Otherwise, the direction is *.
|
||||
//
|
||||
|
@ -1657,7 +1657,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
|
|||
LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n");
|
||||
if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
|
||||
if (Level < CommonLevels) {
|
||||
Result.DV[Level].Direction &= Dependence::DVEntry::LE;
|
||||
Result.DV[Level].Direction &= Dependence::DVEntry::GE;
|
||||
Result.DV[Level].PeelFirst = true;
|
||||
++WeakZeroSIVsuccesses;
|
||||
}
|
||||
|
@ -1685,7 +1685,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
|
|||
if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
|
||||
// dependences caused by last iteration
|
||||
if (Level < CommonLevels) {
|
||||
Result.DV[Level].Direction &= Dependence::DVEntry::GE;
|
||||
Result.DV[Level].Direction &= Dependence::DVEntry::LE;
|
||||
Result.DV[Level].PeelLast = true;
|
||||
++WeakZeroSIVsuccesses;
|
||||
}
|
||||
|
|
|
@ -614,3 +614,49 @@ for.body: ; preds = %entry, %for.body
|
|||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
;; for(int i = 0; i < N; i+=1) {
|
||||
;; A[M*N*i] = 1;
|
||||
;; for(int j = 0; j < M; j+=1)
|
||||
;; A[M*N + M*i + j] = 2;
|
||||
|
||||
define void @couple_weakzerosiv(i32* noalias nocapture %A, i64 %N, i64 %M) {
|
||||
entry:
|
||||
%cmp29 = icmp sgt i64 %N, 0
|
||||
br i1 %cmp29, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
; CHECK-LABEL: couple_weakzerosiv
|
||||
; CHECK: da analyze - none!
|
||||
; CHECK: da analyze - output [p>]!
|
||||
; CHECK: da analyze - none!
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%mul = mul nsw i64 %M, %N
|
||||
br label %for.body.us
|
||||
|
||||
for.body.us: ; preds = %for.body.lr.ph, %for.cond.cleanup4.loopexit.us
|
||||
%i.030.us = phi i64 [ %add12.us, %for.cond.cleanup4.loopexit.us ], [ 0, %for.body.lr.ph ]
|
||||
%mul1.us = mul nsw i64 %i.030.us, %mul
|
||||
%arrayidx.us = getelementptr inbounds i32, i32* %A, i64 %mul1.us
|
||||
store i32 1, i32* %arrayidx.us, align 4
|
||||
%mul6.us = mul nsw i64 %i.030.us, %M
|
||||
%add.us = add i64 %mul6.us, %mul
|
||||
br label %for.body5.us
|
||||
|
||||
for.body5.us: ; preds = %for.body5.us, %for.body.us
|
||||
%j.028.us = phi i64 [ 0, %for.body.us ], [ %add10.us, %for.body5.us ]
|
||||
%add8.us = add i64 %add.us, %j.028.us
|
||||
%arrayidx9.us = getelementptr inbounds i32, i32* %A, i64 %add8.us
|
||||
store i32 2, i32* %arrayidx9.us, align 4
|
||||
%add10.us = add nuw nsw i64 %j.028.us, 1
|
||||
%exitcond.us = icmp eq i64 %add10.us, %M
|
||||
br i1 %exitcond.us, label %for.cond.cleanup4.loopexit.us, label %for.body5.us
|
||||
|
||||
for.cond.cleanup4.loopexit.us: ; preds = %for.body5.us
|
||||
%add12.us = add nuw nsw i64 %i.030.us, 1
|
||||
%exitcond31.us = icmp eq i64 %add12.us, %N
|
||||
br i1 %exitcond31.us, label %for.cond.cleanup, label %for.body.us
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup4.loopexit.us, %entry
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -5,6 +5,35 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
|||
target triple = "x86_64-apple-macosx10.6.0"
|
||||
|
||||
|
||||
;; for (int i = 0; i < N; i++) {
|
||||
;; A[i] = 1;
|
||||
;; A[0] = 2;
|
||||
|
||||
define void @dstzero(i32* nocapture %A, i32 %N) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp6, label %for.body, label %for.cond.cleanup
|
||||
|
||||
; CHECK: da analyze - none!
|
||||
; CHECK: da analyze - output [p<=|<]!
|
||||
; CHECK: da analyze - consistent output [S]!
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.07
|
||||
store i32 0, i32* %arrayidx, align 4
|
||||
store i32 1, i32* %A, align 4
|
||||
%add = add nuw nsw i32 %i.07, 1
|
||||
%exitcond = icmp eq i32 %add, %N
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
;; for (long unsigned i = 0; i < 30; i++) {
|
||||
;; A[2*i + 10] = i;
|
||||
;; *B++ = A[10];
|
||||
|
|
|
@ -5,6 +5,33 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
|||
target triple = "x86_64-apple-macosx10.6.0"
|
||||
|
||||
|
||||
;; for (int i = 0; i < N; i++) {
|
||||
;; A[0] = 1;
|
||||
;; A[i] = 2;
|
||||
|
||||
define void @dstzero(i32* nocapture %A, i32 %N) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp6, label %for.body, label %for.cond.cleanup
|
||||
|
||||
; CHECK: da analyze - consistent output [S]!
|
||||
; CHECK: da analyze - output [p=>|<]!
|
||||
; CHECK: da analyze - none!
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%i.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
|
||||
store i32 0, i32* %A, align 4
|
||||
%arrayidx1 = getelementptr inbounds i32, i32* %A, i32 %i.07
|
||||
store i32 1, i32* %arrayidx1, align 4
|
||||
%add = add nuw nsw i32 %i.07, 1
|
||||
%exitcond = icmp eq i32 %add, %N
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;; for (long unsigned i = 0; i < 30; i++) {
|
||||
;; A[10] = i;
|
||||
;; *B++ = A[2*i + 10];
|
||||
|
@ -14,7 +41,7 @@ entry:
|
|||
br label %for.body
|
||||
|
||||
; CHECK: da analyze - consistent output [S]!
|
||||
; CHECK: da analyze - flow [p<=|<]!
|
||||
; CHECK: da analyze - flow [p=>|<]!
|
||||
; CHECK: da analyze - confused!
|
||||
; CHECK: da analyze - none!
|
||||
; CHECK: da analyze - confused!
|
||||
|
@ -51,7 +78,7 @@ entry:
|
|||
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||
|
||||
; CHECK: da analyze - consistent output [S]!
|
||||
; CHECK: da analyze - flow [p<=|<]!
|
||||
; CHECK: da analyze - flow [p=>|<]!
|
||||
; CHECK: da analyze - confused!
|
||||
; CHECK: da analyze - none!
|
||||
; CHECK: da analyze - confused!
|
||||
|
@ -128,7 +155,7 @@ entry:
|
|||
br label %for.body
|
||||
|
||||
; CHECK: da analyze - consistent output [S]!
|
||||
; CHECK: da analyze - flow [=>p|<]!
|
||||
; CHECK: da analyze - flow [<=p|<]!
|
||||
; CHECK: da analyze - confused!
|
||||
; CHECK: da analyze - none!
|
||||
; CHECK: da analyze - confused!
|
||||
|
|
Loading…
Reference in New Issue