Allow unsigned divisions

After zero-extend operations and unsigned comparisons we now allow
  unsigned divisions. The handling is basically the same as for signed
  division, except the interpretation of the operands. As the divisor
  has to be constant in both cases we can simply interpret it as an
  unsigned value without additional complexity in the representation.
  For the dividend we could choose from the different representation
  schemes introduced for zero-extend operations but for now we will
  simply use an assumption.

llvm-svn: 268032
This commit is contained in:
Johannes Doerfert 2016-04-29 11:53:35 +00:00
parent ba9725ff41
commit 172dd8b923
8 changed files with 298 additions and 41 deletions

View File

@ -392,10 +392,6 @@ __isl_give PWACtx SCEVAffinator::visitMulExpr(const SCEVMulExpr *Expr) {
return Prod;
}
__isl_give PWACtx SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) {
llvm_unreachable("SCEVUDivExpr not yet supported");
}
__isl_give PWACtx SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
assert(Expr->isAffine() && "Only affine AddRecurrences allowed");
@ -452,6 +448,44 @@ __isl_give PWACtx SCEVAffinator::visitUMaxExpr(const SCEVUMaxExpr *Expr) {
llvm_unreachable("SCEVUMaxExpr not yet supported");
}
__isl_give PWACtx SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) {
// The handling of unsigned division is basically the same as for signed
// division, except the interpretation of the operands. As the divisor
// has to be constant in both cases we can simply interpret it as an
// unsigned value without additional complexity in the representation.
// For the dividend we could choose from the different representation
// schemes introduced for zero-extend operations but for now we will
// simply use an assumption.
auto *Dividend = Expr->getLHS();
auto *Divisor = Expr->getRHS();
assert(isa<SCEVConstant>(Divisor) &&
"UDiv is no parameter but has a non-constant RHS.");
auto DividendPWAC = visit(Dividend);
auto DivisorPWAC = visit(Divisor);
if (SE.isKnownNegative(Divisor)) {
// Interpret negative divisors unsigned. This is a special case of the
// piece-wise defined value described for zero-extends as we already know
// the actual value of the constant divisor.
unsigned Width = TD.getTypeSizeInBits(Expr->getType());
auto *DivisorDom = isl_pw_aff_domain(isl_pw_aff_copy(DivisorPWAC.first));
auto *WidthExpPWA = getWidthExpValOnDomain(Width, DivisorDom);
DivisorPWAC.first = isl_pw_aff_add(DivisorPWAC.first, WidthExpPWA);
}
// TODO: One can represent the dividend as piece-wise function to be more
// precise but therefor a heuristic is needed.
// Assume a non-negative dividend.
takeNonNegativeAssumption(DividendPWAC);
combine(DividendPWAC, DivisorPWAC, isl_pw_aff_div);
DividendPWAC.first = isl_pw_aff_floor(DividendPWAC.first);
return DividendPWAC;
}
__isl_give PWACtx SCEVAffinator::visitSDivInstruction(Instruction *SDiv) {
assert(SDiv->getOpcode() == Instruction::SDiv && "Assumed SDiv instruction!");
auto *SE = S->getSE();

View File

@ -219,20 +219,6 @@ public:
return Return;
}
class ValidatorResult visitUDivExpr(const SCEVUDivExpr *Expr) {
ValidatorResult LHS = visit(Expr->getLHS());
ValidatorResult RHS = visit(Expr->getRHS());
// We currently do not represent an unsigned division as an affine
// expression. If the division is constant during Scop execution we treat it
// as a parameter, otherwise we bail out.
if (LHS.isConstant() && RHS.isConstant())
return ValidatorResult(SCEVType::PARAM, Expr);
DEBUG(dbgs() << "INVALID: unsigned division of non-constant expressions");
return ValidatorResult(SCEVType::INVALID);
}
class ValidatorResult visitAddRecExpr(const SCEVAddRecExpr *Expr) {
if (!Expr->isAffine()) {
DEBUG(dbgs() << "INVALID: AddRec is not affine");
@ -336,18 +322,43 @@ public:
return visitGenericInst(I, S);
}
ValidatorResult visitSDivInstruction(Instruction *SDiv, const SCEV *S) {
ValidatorResult visitDivision(const SCEV *Dividend, const SCEV *Divisor,
const SCEV *DivExpr,
Instruction *SDiv = nullptr) {
// First check if we might be able to model the division, thus if the
// divisor is constant. If so, check the dividend, otherwise check if
// the whole division can be seen as a parameter.
if (isa<SCEVConstant>(Divisor))
return visit(Dividend);
// For signed divisions use the SDiv instruction to check for a parameter
// division, for unsigned divisions check the operands.
if (SDiv)
return visitGenericInst(SDiv, DivExpr);
ValidatorResult LHS = visit(Dividend);
ValidatorResult RHS = visit(Divisor);
if (LHS.isConstant() && RHS.isConstant())
return ValidatorResult(SCEVType::PARAM, DivExpr);
DEBUG(dbgs() << "INVALID: unsigned division of non-constant expressions");
return ValidatorResult(SCEVType::INVALID);
}
ValidatorResult visitUDivExpr(const SCEVUDivExpr *Expr) {
auto *Dividend = Expr->getLHS();
auto *Divisor = Expr->getRHS();
return visitDivision(Dividend, Divisor, Expr);
}
ValidatorResult visitSDivInstruction(Instruction *SDiv, const SCEV *Expr) {
assert(SDiv->getOpcode() == Instruction::SDiv &&
"Assumed SDiv instruction!");
auto *Divisor = SDiv->getOperand(1);
auto *CI = dyn_cast<ConstantInt>(Divisor);
if (!CI)
return visitGenericInst(SDiv, S);
auto *Dividend = SDiv->getOperand(0);
auto *DividendSCEV = SE.getSCEV(Dividend);
return visit(DividendSCEV);
auto *Dividend = SE.getSCEV(SDiv->getOperand(0));
auto *Divisor = SE.getSCEV(SDiv->getOperand(1));
return visitDivision(Dividend, Divisor, Expr, SDiv);
}
ValidatorResult visitSRemInstruction(Instruction *SRem, const SCEV *S) {

View File

@ -1,21 +1,14 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP
; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
;
; This caused the code generation to emit a broken module as there are two
; dependences that need to be considered, thus code has to be emitted in a
; certain order:
; 1) To preload A[N * M] the expression N * M [p0] is needed (both for the
; condition under which A[N * M] is executed as well as to compute the
; index).
; 2) To generate (A[N * M] / 2) [p1] the preloaded value is needed.
; SCOP: Assumed Context:
; SCOP-NEXT: [p_0, tmp4] -> { : }
; SCOP-NEXT: Invalid Context:
; SCOP-NEXT: [p_0, tmp4] -> { : p_0 > 0 and tmp4 < 0 }
; SCOP-NEXT: p0: (%N * %M)
; SCOP-NEXT: p1: %tmp4
;
; SCOP: p0: (%N * %M)
; SCOP: p1: (%tmp4 /u 2)
;
; CHECK: polly.preload.merge:
; CHECK: %polly.preload.tmp4.merge = phi i32 [ %polly.access.A.load, %polly.preload.exec ], [ 0, %polly.preload.cond ]
; CHECK: %3 = lshr i32 %polly.preload.tmp4.merge, 1
; CHECK: %4 = sext i32 %0 to i64
; CHECK: polly.preload.merge:
;
; void f(int *restrict A, int *restrict B, int N, int M) {
;

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, unsigned N) {
; for (unsigned i = 0; i < N / 2; i++)
; A[i]++;
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : N < 0 }
;
; CHECK: Domain :=
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 2i0 <= -2 + N };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32 %N) {
entry:
%tmp = lshr i32 %N, 1
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%lftr.wideiv = trunc i64 %indvars.iv to i32
%exitcond = icmp ne i32 %lftr.wideiv, %tmp
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp1 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %tmp1, 1
store i32 %inc, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,43 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, unsigned N) {
; for (unsigned i = 0; i < N / 2 + 3; i++)
; A[i]++;
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : N < 0 }
;
; CHECK: Domain :=
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 2i0 <= 4 + N };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i32 %N) {
entry:
%tmp = lshr i32 %N, 1
%tmp1 = add i32 %tmp, 3
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%lftr.wideiv = trunc i64 %indvars.iv to i32
%exitcond = icmp ult i32 %lftr.wideiv, %tmp1
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp2 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %tmp2, 1
store i32 %inc, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, unsigned char N) {
; for (unsigned i = 0; i < N / -128; i++)
; A[i]++;
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : N < 0 }
;
; CHECK: Domain :=
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 128i0 <= -128 + N };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i8 %N) {
entry:
%tmp = udiv i8 %N, -128
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%lftr.wideiv = trunc i64 %indvars.iv to i8
%exitcond = icmp ne i8 %lftr.wideiv, %tmp
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp2 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %tmp2, 1
store i32 %inc, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,43 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, unsigned char N) {
; for (unsigned i = 0; i < (N / -128) + 3; i++)
; A[i]++;
; }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : N < 0 }
;
; CHECK: Domain :=
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 128i0 <= 256 + N };
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i8 %N) {
entry:
%tmp1 = udiv i8 %N, -128
%tmp = add i8 %tmp1, 3
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%lftr.wideiv = trunc i64 %indvars.iv to i8
%exitcond = icmp ne i8 %lftr.wideiv, %tmp
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
%tmp2 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %tmp2, 1
store i32 %inc, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,49 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *A, unsigned N) {
; for (unsigned i = 0; i < N; i++)
; A[i / 3] = A[5 * N / 3];
; }
;
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : -2 + 5N <= 3o0 <= 5N };
; CHECK-NEXT: Execution Context: [N] -> { : 0 < N <= 1844674407370955161 }
; CHECK-NEXT: }
;
; CHECK: Assumed Context:
; CHECK-NEXT: [N] -> { : }
; CHECK-NEXT: Invalid Context:
; CHECK-NEXT: [N] -> { : N >= 1844674407370955162 }
; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : -2 + i0 <= 3o0 <= i0 };
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @f(i32* %A, i64 %N) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
%exitcond = icmp ne i64 %indvars.iv, %N
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%mul = mul nsw i64 %N, 5
%div2 = udiv i64 %mul, 3
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %div2
%load = load i32, i32* %arrayidx2, align 4
%div = udiv i64 %indvars.iv, 3
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %div
store i32 %load, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}