Allow unsigned divisions

After zero-extend operations and unsigned comparisons we now allow unsigned divisions. The handling is basically the same as for signed division, except the interpretation of the operands. As the divisor has to be constant in both cases we can simply interpret it as an unsigned value without additional complexity in the representation. For the dividend we could choose from the different representation schemes introduced for zero-extend operations but for now we will simply use an assumption. llvm-svn: 268032
2016-04-29 11:53:35 +00:00 · 2016-04-29 11:53:35 +00:00 · 172dd8b923
parent ba9725ff41
commit 172dd8b923
8 changed files with 298 additions and 41 deletions
--- a/polly/lib/Support/SCEVAffinator.cpp
+++ b/polly/lib/Support/SCEVAffinator.cpp
@ -392,10 +392,6 @@ __isl_give PWACtx SCEVAffinator::visitMulExpr(const SCEVMulExpr *Expr) {
  return Prod;
 }

-__isl_give PWACtx SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) {
-  llvm_unreachable("SCEVUDivExpr not yet supported");
-}
-
 __isl_give PWACtx SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr *Expr) {
  assert(Expr->isAffine() && "Only affine AddRecurrences allowed");

@ -452,6 +448,44 @@ __isl_give PWACtx SCEVAffinator::visitUMaxExpr(const SCEVUMaxExpr *Expr) {
  llvm_unreachable("SCEVUMaxExpr not yet supported");
 }

+__isl_give PWACtx SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) {
+  // The handling of unsigned division is basically the same as for signed
+  // division, except the interpretation of the operands. As the divisor
+  // has to be constant in both cases we can simply interpret it as an
+  // unsigned value without additional complexity in the representation.
+  // For the dividend we could choose from the different representation
+  // schemes introduced for zero-extend operations but for now we will
+  // simply use an assumption.
+  auto *Dividend = Expr->getLHS();
+  auto *Divisor = Expr->getRHS();
+  assert(isa<SCEVConstant>(Divisor) &&
+         "UDiv is no parameter but has a non-constant RHS.");
+
+  auto DividendPWAC = visit(Dividend);
+  auto DivisorPWAC = visit(Divisor);
+
+  if (SE.isKnownNegative(Divisor)) {
+    // Interpret negative divisors unsigned. This is a special case of the
+    // piece-wise defined value described for zero-extends as we already know
+    // the actual value of the constant divisor.
+    unsigned Width = TD.getTypeSizeInBits(Expr->getType());
+    auto *DivisorDom = isl_pw_aff_domain(isl_pw_aff_copy(DivisorPWAC.first));
+    auto *WidthExpPWA = getWidthExpValOnDomain(Width, DivisorDom);
+    DivisorPWAC.first = isl_pw_aff_add(DivisorPWAC.first, WidthExpPWA);
+  }
+
+  // TODO: One can represent the dividend as piece-wise function to be more
+  //       precise but therefor a heuristic is needed.
+
+  // Assume a non-negative dividend.
+  takeNonNegativeAssumption(DividendPWAC);
+
+  combine(DividendPWAC, DivisorPWAC, isl_pw_aff_div);
+  DividendPWAC.first = isl_pw_aff_floor(DividendPWAC.first);
+
+  return DividendPWAC;
+}
+
 __isl_give PWACtx SCEVAffinator::visitSDivInstruction(Instruction *SDiv) {
  assert(SDiv->getOpcode() == Instruction::SDiv && "Assumed SDiv instruction!");
  auto *SE = S->getSE();
--- a/polly/lib/Support/SCEVValidator.cpp
+++ b/polly/lib/Support/SCEVValidator.cpp
@ -219,20 +219,6 @@ public:
    return Return;
  }

-  class ValidatorResult visitUDivExpr(const SCEVUDivExpr *Expr) {
-    ValidatorResult LHS = visit(Expr->getLHS());
-    ValidatorResult RHS = visit(Expr->getRHS());
-
-    // We currently do not represent an unsigned division as an affine
-    // expression. If the division is constant during Scop execution we treat it
-    // as a parameter, otherwise we bail out.
-    if (LHS.isConstant() && RHS.isConstant())
-      return ValidatorResult(SCEVType::PARAM, Expr);
-
-    DEBUG(dbgs() << "INVALID: unsigned division of non-constant expressions");
-    return ValidatorResult(SCEVType::INVALID);
-  }
-
  class ValidatorResult visitAddRecExpr(const SCEVAddRecExpr *Expr) {
    if (!Expr->isAffine()) {
      DEBUG(dbgs() << "INVALID: AddRec is not affine");
@ -336,18 +322,43 @@ public:
    return visitGenericInst(I, S);
  }

-  ValidatorResult visitSDivInstruction(Instruction *SDiv, const SCEV *S) {
+  ValidatorResult visitDivision(const SCEV *Dividend, const SCEV *Divisor,
+                                const SCEV *DivExpr,
+                                Instruction *SDiv = nullptr) {
+
+    // First check if we might be able to model the division, thus if the
+    // divisor is constant. If so, check the dividend, otherwise check if
+    // the whole division can be seen as a parameter.
+    if (isa<SCEVConstant>(Divisor))
+      return visit(Dividend);
+
+    // For signed divisions use the SDiv instruction to check for a parameter
+    // division, for unsigned divisions check the operands.
+    if (SDiv)
+      return visitGenericInst(SDiv, DivExpr);
+
+    ValidatorResult LHS = visit(Dividend);
+    ValidatorResult RHS = visit(Divisor);
+    if (LHS.isConstant() && RHS.isConstant())
+      return ValidatorResult(SCEVType::PARAM, DivExpr);
+
+    DEBUG(dbgs() << "INVALID: unsigned division of non-constant expressions");
+    return ValidatorResult(SCEVType::INVALID);
+  }
+
+  ValidatorResult visitUDivExpr(const SCEVUDivExpr *Expr) {
+    auto *Dividend = Expr->getLHS();
+    auto *Divisor = Expr->getRHS();
+    return visitDivision(Dividend, Divisor, Expr);
+  }
+
+  ValidatorResult visitSDivInstruction(Instruction *SDiv, const SCEV *Expr) {
    assert(SDiv->getOpcode() == Instruction::SDiv &&
           "Assumed SDiv instruction!");

-    auto *Divisor = SDiv->getOperand(1);
-    auto *CI = dyn_cast<ConstantInt>(Divisor);
-    if (!CI)
-      return visitGenericInst(SDiv, S);
-
-    auto *Dividend = SDiv->getOperand(0);
-    auto *DividendSCEV = SE.getSCEV(Dividend);
-    return visit(DividendSCEV);
+    auto *Dividend = SE.getSCEV(SDiv->getOperand(0));
+    auto *Divisor = SE.getSCEV(SDiv->getOperand(1));
+    return visitDivision(Dividend, Divisor, Expr, SDiv);
  }

  ValidatorResult visitSRemInstruction(Instruction *SRem, const SCEV *S) {
--- a/polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll
+++ b/polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll
@ -1,21 +1,14 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP
 ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
 ;
-; This caused the code generation to emit a broken module as there are two
-; dependences that need to be considered, thus code has to be emitted in a
-; certain order:
-;   1) To preload A[N * M] the expression N * M [p0] is needed (both for the
-;      condition under which A[N * M] is executed as well as to compute the
-;      index).
-;   2) To generate (A[N * M] / 2) [p1] the preloaded value is needed.
+; SCOP:         Assumed Context:
+; SCOP-NEXT:    [p_0, tmp4] -> {  :  }
+; SCOP-NEXT:    Invalid Context:
+; SCOP-NEXT:    [p_0, tmp4] -> {  : p_0 > 0 and tmp4 < 0 }
+; SCOP-NEXT:    p0: (%N * %M)
+; SCOP-NEXT:    p1: %tmp4
 ;
-; SCOP: p0: (%N * %M)
-; SCOP: p1: (%tmp4 /u 2)
-;
-; CHECK: polly.preload.merge:
-; CHECK:   %polly.preload.tmp4.merge = phi i32 [ %polly.access.A.load, %polly.preload.exec ], [ 0, %polly.preload.cond ]
-; CHECK:   %3 = lshr i32 %polly.preload.tmp4.merge, 1
-; CHECK:   %4 = sext i32 %0 to i64
+; CHECK:      polly.preload.merge:
 ;
 ;    void f(int *restrict A, int *restrict B, int N, int M) {
 ;
--- a/polly/test/ScopInfo/unsigned-division-1.ll
+++ b/polly/test/ScopInfo/unsigned-division-1.ll
@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, unsigned N) {
+;      for (unsigned i = 0; i < N / 2; i++)
+;        A[i]++;
+;    }
+;
+; CHECK:          Assumed Context:
+; CHECK-NEXT:     [N] -> {  :  }
+; CHECK-NEXT:     Invalid Context:
+; CHECK-NEXT:     [N] -> {  : N < 0 }
+;
+; CHECK:          Domain :=
+; CHECK-NEXT:     [N] -> { Stmt_for_body[i0] : i0 >= 0 and 2i0 <= -2 + N };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = lshr i32 %N, 1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %tmp
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp1 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %tmp1, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/unsigned-division-2.ll
+++ b/polly/test/ScopInfo/unsigned-division-2.ll
@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, unsigned N) {
+;      for (unsigned i = 0; i < N / 2 + 3; i++)
+;        A[i]++;
+;    }
+;
+; CHECK:          Assumed Context:
+; CHECK-NEXT:     [N] -> {  :  }
+; CHECK-NEXT:     Invalid Context:
+; CHECK-NEXT:     [N] -> {  : N < 0 }
+;
+; CHECK:          Domain :=
+; CHECK-NEXT:     [N] -> { Stmt_for_body[i0] : i0 >= 0 and 2i0 <= 4 + N };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = lshr i32 %N, 1
+  %tmp1 = add i32 %tmp, 3
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ult i32 %lftr.wideiv, %tmp1
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %tmp2, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/unsigned-division-3.ll
+++ b/polly/test/ScopInfo/unsigned-division-3.ll
@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, unsigned char N) {
+;      for (unsigned i = 0; i < N / -128; i++)
+;        A[i]++;
+;    }
+;
+; CHECK:         Assumed Context:
+; CHECK-NEXT:    [N] -> {  :  }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [N] -> {  : N < 0 }
+;
+; CHECK:       Domain :=
+; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : i0 >= 0 and 128i0 <= -128 + N };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i8 %N) {
+entry:
+  %tmp = udiv i8 %N, -128
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i8
+  %exitcond = icmp ne i8 %lftr.wideiv, %tmp
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %tmp2, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/unsigned-division-4.ll
+++ b/polly/test/ScopInfo/unsigned-division-4.ll
@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, unsigned char N) {
+;      for (unsigned i = 0; i < (N / -128) + 3; i++)
+;        A[i]++;
+;    }
+;
+; CHECK:         Assumed Context:
+; CHECK-NEXT:    [N] -> {  :  }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [N] -> {  : N < 0 }
+;
+; CHECK:       Domain :=
+; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : i0 >= 0 and 128i0 <= 256 + N };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i8 %N) {
+entry:
+  %tmp1 = udiv i8 %N, -128
+  %tmp = add i8 %tmp1, 3
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i8
+  %exitcond = icmp ne i8 %lftr.wideiv, %tmp
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %tmp2, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/unsigned-division-5.ll
+++ b/polly/test/ScopInfo/unsigned-division-5.ll
@ -0,0 +1,49 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, unsigned N) {
+;      for (unsigned i = 0; i < N; i++)
+;        A[i / 3] = A[5 * N / 3];
+;    }
+;
+; CHECK:         Invariant Accesses: {
+; CHECK-NEXT:            ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:                [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : -2 + 5N <= 3o0 <= 5N };
+; CHECK-NEXT:            Execution Context: [N] -> {  : 0 < N <= 1844674407370955161 }
+; CHECK-NEXT:    }
+;
+; CHECK:         Assumed Context:
+; CHECK-NEXT:    [N] -> {  :  }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [N] -> {  : N >= 1844674407370955162 }
+
+; CHECK:         MustWriteAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : -2 + i0 <= 3o0 <= i0 };
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i64 %N) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvars.iv, %N
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %mul = mul nsw i64 %N, 5
+  %div2 = udiv i64 %mul, 3
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %div2
+  %load = load i32, i32* %arrayidx2, align 4
+  %div = udiv i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %div
+  store i32 %load, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}