diff --git a/polly/lib/CodeGen/IslExprBuilder.cpp b/polly/lib/CodeGen/IslExprBuilder.cpp index 60a46531f2e9..27c72486c7e0 100644 --- a/polly/lib/CodeGen/IslExprBuilder.cpp +++ b/polly/lib/CodeGen/IslExprBuilder.cpp @@ -301,6 +301,11 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) { Res = Builder.CreateUDiv(LHS, RHS, "pexp.p_div_q"); break; case isl_ast_op_fdiv_q: { // Round towards -infty + auto &Int = dyn_cast(RHS)->getValue(); + if (Int.isPowerOf2()) { + Res = Builder.CreateAShr(LHS, Int.ceilLogBase2(), "polly.fdiv_q.shr"); + break; + } // TODO: Review code and check that this calculation does not yield // incorrect overflow in some bordercases. // diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll index e471e6b1f80f..91d27fabbbc7 100644 --- a/polly/test/Isl/CodeGen/exprModDiv.ll +++ b/polly/test/Isl/CodeGen/exprModDiv.ll @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -S < %s | FileCheck %s +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen -polly-import-jscop-postfix=pow2 -S < %s | FileCheck %s -check-prefix=POW2 ; ; void exprModDiv(float *A, float *B, float *C, long N, long p) { ; for (long i = 0; i < N; i++) @@ -12,34 +13,58 @@ ; useful as LLVM will translate urem and udiv operations with power-of-two ; denominators to fast bitwise and or shift operations. -; A[i % 128] -; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 128 +; A[i % 127] +; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 ; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r -; A[i / 128] -; CHECK: %pexp.div = sdiv i64 %polly.indvar, 128 +; A[i / 127] +; CHECK: %pexp.div = sdiv i64 %polly.indvar, 127 ; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.div ; ; FIXME: Make isl mark this as an udiv expression. ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d -; A[p + 128 * floord(-p - 1, 128) + 128] +; A[p + 127 * floord(-p - 1, 127) + 127] ; CHECK: %20 = sub nsw i64 0, %p ; CHECK: %21 = sub nsw i64 %20, 1 -; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 128 +; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127 ; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 ; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0 ; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21 -; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 128 -; CHECK: %22 = mul nsw i64 128, %pexp.fdiv_q.4 +; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 +; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4 ; CHECK: %23 = add nsw i64 %p, %22 -; CHECK: %24 = add nsw i64 %23, 128 +; CHECK: %24 = add nsw i64 %23, 127 ; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24 -; A[p / 128] -; CHECK: %pexp.div12 = sdiv i64 %p, 128 +; A[p / 127] +; CHECK: %pexp.div12 = sdiv i64 %p, 127 ; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div12 +; A[i % 128] +; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 +; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r + +; A[i / 128] +; POW2: %pexp.div = sdiv i64 %polly.indvar, 128 +; POW2: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.div +; +; FIXME: Make isl mark this as an udiv expression. + +; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d +; A[p + 128 * floord(-p - 1, 128) + 128] +; POW2: %20 = sub nsw i64 0, %p +; POW2: %21 = sub nsw i64 %20, 1 +; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7 +; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr +; POW2: %23 = add nsw i64 %p, %22 +; POW2: %24 = add nsw i64 %23, 128 +; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24 + +; A[p / 128] +; POW2: %pexp.div12 = sdiv i64 %p, 128 +; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div12 + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @exprModDiv(float* %A, float* %B, float* %C, i64 %N, i64 %p) { diff --git a/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop b/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop index 5309f8ab5772..066c123fa888 100644 --- a/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop +++ b/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop @@ -6,19 +6,19 @@ "accesses" : [ { "kind" : "read", - "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 128] }" + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 127] }" }, { "kind" : "read", - "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 128] }" + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 127] }" }, { "kind" : "read", - "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 128] }" + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 127] }" }, { "kind" : "read", - "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 128] }" + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 127] }" }, { "kind" : "read", diff --git a/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop.pow2 b/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop.pow2 new file mode 100644 index 000000000000..5309f8ab5772 --- /dev/null +++ b/polly/test/Isl/CodeGen/exprModDiv___%for.cond---%for.end.jscop.pow2 @@ -0,0 +1,37 @@ +{ + "context" : "[N, p] -> { : N >= -9223372036854775808 and N <= 9223372036854775807 and p >= -9223372036854775808 and p <= 9223372036854775807 }", + "name" : "for.cond => for.end", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[i0 % 128] }" + }, + { + "kind" : "read", + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[i0 / 128] }" + }, + { + "kind" : "read", + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_A[p % 128] }" + }, + { + "kind" : "read", + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_B[p / 128] }" + }, + { + "kind" : "read", + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_C[i0] }" + }, + { + "kind" : "write", + "relation" : "[N, p] -> { Stmt_for_body[i0] -> MemRef_C[i0] }" + } + ], + "domain" : "[N, p] -> { Stmt_for_body[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }", + "name" : "Stmt_for_body", + "schedule" : "[N, p] -> { Stmt_for_body[i0] -> [i0] }" + } + ] +}