Translate power-of-two floor-division into ashr

Power-of-two floor divisions can be translated into an arithmetic shift
operation. This allows us to replace a complex lowering that requires division
operations:

  %pexp.fdiv_q.0 = sub i64 %21, 128
  %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1
  %pexp.fdiv_q.2 = icmp slt i64 %21, 0
  %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21
  %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 128

with a simple ashr:

  %polly.fdiv_q.shr = ashr i64 %21, 7

llvm-svn: 238905
This commit is contained in:
Tobias Grosser 2015-06-03 06:31:30 +00:00
parent dc9293d051
commit cb73f150d4
4 changed files with 82 additions and 15 deletions

View File

@ -301,6 +301,11 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) {
Res = Builder.CreateUDiv(LHS, RHS, "pexp.p_div_q");
break;
case isl_ast_op_fdiv_q: { // Round towards -infty
auto &Int = dyn_cast<ConstantInt>(RHS)->getValue();
if (Int.isPowerOf2()) {
Res = Builder.CreateAShr(LHS, Int.ceilLogBase2(), "polly.fdiv_q.shr");
break;
}
// TODO: Review code and check that this calculation does not yield
// incorrect overflow in some bordercases.
//

View File

@ -1,4 +1,5 @@
; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -S < %s | FileCheck %s
; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -polly-import-jscop-postfix=pow2 -S < %s | FileCheck %s -check-prefix=POW2
;
; void exprModDiv(float *A, float *B, float *C, long N, long p) {
; for (long i = 0; i < N; i++)
@ -12,34 +13,58 @@
; useful as LLVM will translate urem and udiv operations with power-of-two
; denominators to fast bitwise and or shift operations.
; A[i % 128]
; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 128
; A[i % 127]
; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127
; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r
; A[i / 128]
; CHECK: %pexp.div = sdiv i64 %polly.indvar, 128
; A[i / 127]
; CHECK: %pexp.div = sdiv i64 %polly.indvar, 127
; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.div
;
; FIXME: Make isl mark this as an udiv expression.
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
; A[p + 127 * floord(-p - 1, 127) + 127]
; CHECK: %20 = sub nsw i64 0, %p
; CHECK: %21 = sub nsw i64 %20, 1
; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 128
; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127
; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1
; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0
; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21
; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 128
; CHECK: %22 = mul nsw i64 128, %pexp.fdiv_q.4
; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127
; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %23 = add nsw i64 %p, %22
; CHECK: %24 = add nsw i64 %23, 128
; CHECK: %24 = add nsw i64 %23, 127
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; A[p / 128]
; CHECK: %pexp.div12 = sdiv i64 %p, 128
; A[p / 127]
; CHECK: %pexp.div12 = sdiv i64 %p, 127
; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div12
; A[i % 128]
; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r
; A[i / 128]
; POW2: %pexp.div = sdiv i64 %polly.indvar, 128
; POW2: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.div
;
; FIXME: Make isl mark this as an udiv expression.
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
; POW2: %20 = sub nsw i64 0, %p
; POW2: %21 = sub nsw i64 %20, 1
; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7
; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %23 = add nsw i64 %p, %22
; POW2: %24 = add nsw i64 %23, 128
; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; A[p / 128]
; POW2: %pexp.div12 = sdiv i64 %p, 128
; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div12
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) {

View File

@ -6,19 +6,19 @@
"accesses" : [
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 128] }"
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 127] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 128] }"
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 127] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 128] }"
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 127] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 128] }"
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 127] }"
},
{
"kind" : "read",

View File

@ -0,0 +1,37 @@
{
"context" : "[N, p] -> { : N >= -9223372036854775808 and N <= 9223372036854775807 and p >= -9223372036854775808 and p <= 9223372036854775807 }",
"name" : "for.cond => for.end",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 128] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 128] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 128] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 128] }"
},
{
"kind" : "read",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_C[i0] }"
},
{
"kind" : "write",
"relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_C[i0] }"
}
],
"domain" : "[N, p] -> { Stmt_for_body[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }",
"name" : "Stmt_for_body",
"schedule" : "[N, p] -> { Stmt_for_body[i0] -> [i0] }"
}
]
}