forked from OSchip/llvm-project
[MLIR] Make upper bound implementation exclusive
This CL implement exclusive upper bound behavior as per b/116854378. A followup CL will update the semantics of the for loop. PiperOrigin-RevId: 220448963
This commit is contained in:
parent
6f0fb22723
commit
cde8248753
|
@ -37,7 +37,7 @@ using namespace mlir;
|
|||
/// expressible as an affine expression, and nullptr otherwise. The trip count
|
||||
/// expression is simplified before returning.
|
||||
AffineExpr mlir::getTripCountExpr(const ForStmt &forStmt) {
|
||||
// upper_bound - lower_bound + 1
|
||||
// upper_bound - lower_bound
|
||||
int64_t loopSpan;
|
||||
|
||||
int64_t step = forStmt.getStep();
|
||||
|
@ -46,7 +46,7 @@ AffineExpr mlir::getTripCountExpr(const ForStmt &forStmt) {
|
|||
if (forStmt.hasConstantBounds()) {
|
||||
int64_t lb = forStmt.getConstantLowerBound();
|
||||
int64_t ub = forStmt.getConstantUpperBound();
|
||||
loopSpan = ub - lb + 1;
|
||||
loopSpan = ub - lb;
|
||||
} else {
|
||||
auto lbMap = forStmt.getLowerBoundMap();
|
||||
auto ubMap = forStmt.getUpperBoundMap();
|
||||
|
@ -59,11 +59,11 @@ AffineExpr mlir::getTripCountExpr(const ForStmt &forStmt) {
|
|||
if (!forStmt.matchingBoundOperandList())
|
||||
return nullptr;
|
||||
|
||||
// ub_expr - lb_expr + 1
|
||||
// ub_expr - lb_expr
|
||||
AffineExpr lbExpr(lbMap.getResult(0));
|
||||
AffineExpr ubExpr(ubMap.getResult(0));
|
||||
auto loopSpanExpr = simplifyAffineExpr(
|
||||
ubExpr - lbExpr + 1, std::max(lbMap.getNumDims(), ubMap.getNumDims()),
|
||||
ubExpr - lbExpr, std::max(lbMap.getNumDims(), ubMap.getNumDims()),
|
||||
std::max(lbMap.getNumSymbols(), ubMap.getNumSymbols()));
|
||||
auto cExpr = loopSpanExpr.dyn_cast<AffineConstantExpr>();
|
||||
if (!cExpr)
|
||||
|
|
|
@ -264,21 +264,21 @@ UtilResult mlir::stmtBodySkew(ForStmt *forStmt, ArrayRef<uint64_t> delays,
|
|||
assert(d >= 1 &&
|
||||
"Queue expected to be empty when the first block is found");
|
||||
// The interval for which the loop needs to be generated here is:
|
||||
// ( lbDelay, min(lbDelay + tripCount - 1, d - 1) ] and the body of the
|
||||
// ( lbDelay, min(lbDelay + tripCount, d)) and the body of the
|
||||
// loop needs to have all statements in stmtQueue in that order.
|
||||
ForStmt *res;
|
||||
if (lbDelay + tripCount - 1 < d - 1) {
|
||||
res = generateLoop(
|
||||
b.getShiftedAffineMap(origLbMap, lbDelay),
|
||||
b.getShiftedAffineMap(origLbMap, lbDelay + tripCount - 1),
|
||||
if (lbDelay + tripCount < d) {
|
||||
res =
|
||||
generateLoop(b.getShiftedAffineMap(origLbMap, lbDelay),
|
||||
b.getShiftedAffineMap(origLbMap, lbDelay + tripCount),
|
||||
stmtGroupQueue, 0, forStmt, &b);
|
||||
// Entire loop for the queued stmt groups generated, empty it.
|
||||
stmtGroupQueue.clear();
|
||||
lbDelay += tripCount;
|
||||
} else {
|
||||
res = generateLoop(b.getShiftedAffineMap(origLbMap, lbDelay),
|
||||
b.getShiftedAffineMap(origLbMap, d - 1),
|
||||
stmtGroupQueue, 0, forStmt, &b);
|
||||
b.getShiftedAffineMap(origLbMap, d), stmtGroupQueue,
|
||||
0, forStmt, &b);
|
||||
lbDelay = d;
|
||||
}
|
||||
if (!prologue && res)
|
||||
|
@ -295,11 +295,11 @@ UtilResult mlir::stmtBodySkew(ForStmt *forStmt, ArrayRef<uint64_t> delays,
|
|||
// Those statements groups left in the queue now need to be processed (FIFO)
|
||||
// and their loops completed.
|
||||
for (unsigned i = 0, e = stmtGroupQueue.size(); i < e; ++i) {
|
||||
uint64_t ubDelay = stmtGroupQueue[i].first + tripCount - 1;
|
||||
uint64_t ubDelay = stmtGroupQueue[i].first + tripCount;
|
||||
epilogue = generateLoop(b.getShiftedAffineMap(origLbMap, lbDelay),
|
||||
b.getShiftedAffineMap(origLbMap, ubDelay),
|
||||
stmtGroupQueue, i, forStmt, &b);
|
||||
lbDelay = ubDelay + 1;
|
||||
lbDelay = ubDelay;
|
||||
if (!prologue)
|
||||
prologue = epilogue;
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
// RUN: mlir-opt %s -loop-tile | FileCheck %s
|
||||
|
||||
// CHECK: #map0 = (d0) -> (d0 + 31)
|
||||
// CHECK: #map1 = (d0) -> (d0 + 31, 50)
|
||||
// CHECK: #map1 = (d0) -> (d0 + 31, 51)
|
||||
// CHECK-LABEL: mlfunc @loop_tiling()
|
||||
// CHECK-NEXT: for %i0 = 0 to 255 step 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 511 step 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 1023 step 32 {
|
||||
// CHECK-NEXT: for %i0 = 0 to 256 step 32 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 512 step 32 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 1024 step 32 {
|
||||
// CHECK-NEXT: for %i3 = (d0) -> (d0)(%i0) to #map0(%i0) {
|
||||
// CHECK-NEXT: for %i4 = (d0) -> (d0)(%i1) to #map0(%i1) {
|
||||
// CHECK-NEXT: for %i5 = (d0) -> (d0)(%i2) to #map0(%i2) {
|
||||
|
@ -16,32 +16,32 @@
|
|||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i6 = 0 to 50 step 32 {
|
||||
// CHECK-NEXT: for %i6 = 0 to 51 step 32 {
|
||||
// CHECK-NEXT: for %i7 = (d0) -> (d0)(%i6) to min #map1(%i6) {
|
||||
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: for %i8 = 0 to 20 step 32 {
|
||||
// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
|
||||
// CHECK-NEXT: for %i9 = (d0) -> (d0)(%i8) to 20 {
|
||||
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
mlfunc @loop_tiling() {
|
||||
for %i = 0 to 255 {
|
||||
for %j = 0 to 511 {
|
||||
for %k = 0 to 1023 {
|
||||
for %i = 0 to 256 {
|
||||
for %j = 0 to 512 {
|
||||
for %k = 0 to 1024 {
|
||||
"foo"(%i, %j, %k) : (index, index, index) -> ()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for %x = 0 to 50 {
|
||||
for %x = 0 to 51 {
|
||||
"bar"(%x, %x) : (index, index) -> ()
|
||||
}
|
||||
|
||||
// Intra-tile loop won't need a min expression.
|
||||
for %y = 0 to 20 {
|
||||
for %y = 0 to 21 {
|
||||
"foobar"(%y) : (index) -> ()
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ mlfunc @loop_nest_dma() {
|
|||
// CHECK-NEXT: %c128 = constant 128 : index
|
||||
// CHECK-NEXT: %5 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: dma_start %2[%c0], %1[%5#0, %c0], %c128, %0[%5#1, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
// CHECK-NEXT: for %i0 = 1 to 7 {
|
||||
// CHECK-NEXT: for %i0 = 1 to 8 {
|
||||
// CHECK-NEXT: %6 = affine_apply #map0(%i0)
|
||||
// CHECK-NEXT: dma_start %2[%i0], %1[%6#0, %i0], %c128, %0[%6#1, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
|
||||
// CHECK-NEXT: %7 = affine_apply #map1(%i0)
|
||||
|
@ -23,7 +23,7 @@ mlfunc @loop_nest_dma() {
|
|||
// CHECK-NEXT: %10 = load %1[%9, %7] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: %11 = "compute"(%10) : (f32) -> f32
|
||||
// CHECK-NEXT: store %11, %1[%9, %7] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: for %i1 = 0 to 127 {
|
||||
// CHECK-NEXT: for %i1 = 0 to 128 {
|
||||
// CHECK-NEXT: "do_more_compute"(%7, %i1) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: }
|
||||
|
@ -34,7 +34,7 @@ mlfunc @loop_nest_dma() {
|
|||
// CHECK-NEXT: %15 = load %1[%14, %12] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: %16 = "compute"(%15) : (f32) -> f32
|
||||
// CHECK-NEXT: store %16, %1[%14, %12] : memref<2x32xf32, 1>
|
||||
// CHECK-NEXT: for %i2 = 0 to 127 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 128 {
|
||||
// CHECK-NEXT: "do_more_compute"(%12, %i2) : (index, index) -> ()
|
||||
// CHECK-NEXT: }
|
||||
// CHECK-NEXT: return
|
||||
|
@ -47,13 +47,13 @@ mlfunc @loop_nest_dma() {
|
|||
%zero = constant 0 : index
|
||||
%num_elts = constant 128 : index
|
||||
|
||||
for %i = 0 to 7 {
|
||||
for %i = 0 to 8 {
|
||||
dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
|
||||
dma_wait %tag[%zero], %num_elts : memref<1 x f32>
|
||||
%v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
|
||||
%r = "compute"(%v) : (f32) -> (f32)
|
||||
store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
|
||||
for %j = 0 to 127 {
|
||||
for %j = 0 to 128 {
|
||||
"do_more_compute"(%i, %j) : (index, index) -> ()
|
||||
}
|
||||
}
|
||||
|
@ -75,8 +75,8 @@ mlfunc @loop_dma_nested(%arg0 : memref<512x32xvector<8xf32>, #map0>, %arg1 : mem
|
|||
%5 = alloc() : memref<2xi32>
|
||||
// Prologue for DMA overlap on arg2.
|
||||
// CHECK: dma_start %arg2[
|
||||
// CHECK-NEXT: for %i0 = 1 to 7 {
|
||||
for %i0 = 0 to 7 {
|
||||
// CHECK-NEXT: for %i0 = 1 to 8 {
|
||||
for %i0 = 0 to 8 {
|
||||
%6 = affine_apply #map2(%i0)
|
||||
dma_start %arg2[%6#0, %6#1], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
|
||||
dma_wait %5[%c0], %num_elts : memref<2xi32>
|
||||
|
@ -86,8 +86,8 @@ mlfunc @loop_dma_nested(%arg0 : memref<512x32xvector<8xf32>, #map0>, %arg1 : mem
|
|||
// Prologue for DMA overlap on arg0, arg1 nested within i0
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK-NEXT for %i1 = 1 to 7 {
|
||||
for %i1 = 0 to 7 {
|
||||
// CHECK-NEXT for %i1 = 1 to 8 {
|
||||
for %i1 = 0 to 8 {
|
||||
%7 = affine_apply #map1(%i0, %i1)
|
||||
%8 = affine_apply #map2(%i1)
|
||||
dma_start %arg0[%7#0, %7#1], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
|
||||
|
@ -99,8 +99,8 @@ mlfunc @loop_dma_nested(%arg0 : memref<512x32xvector<8xf32>, #map0>, %arg1 : mem
|
|||
// CHECK: dma_start %arg1[
|
||||
// CHECK: dma_wait %3[
|
||||
// CHECK: dma_wait %2[
|
||||
// CHECK-NEXT: for %i2 = 0 to 3 {
|
||||
for %i2 = 0 to 3 {
|
||||
// CHECK-NEXT: for %i2 = 0 to 4 {
|
||||
for %i2 = 0 to 4 {
|
||||
"foo"() : () -> ()
|
||||
}
|
||||
}
|
||||
|
@ -113,16 +113,16 @@ mlfunc @loop_dma_nested(%arg0 : memref<512x32xvector<8xf32>, #map0>, %arg1 : mem
|
|||
// Within the epilogue for arg2's DMA, we have the DMAs on %arg1, %arg2 nested.
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: for %i4 = 1 to 7 {
|
||||
// CHECK: for %i4 = 1 to 8 {
|
||||
// CHECK: dma_start %arg0[
|
||||
// CHECK: dma_start %arg1[
|
||||
// CHECK: dma_wait %3[
|
||||
// CHECK: dma_wait %2[
|
||||
// CHECK: for %i5 = 0 to 3 {
|
||||
// CHECK: for %i5 = 0 to 4 {
|
||||
// CHECK: "foo"() : () -> ()
|
||||
// CHECK: dma_wait %3[
|
||||
// CHECK: dma_wait %2[
|
||||
// CHECK: for %i6 = 0 to 3 {
|
||||
// CHECK: for %i6 = 0 to 4 {
|
||||
|
||||
// The DMAs below are outgoing DMAs on arg2, not yet overlapped.
|
||||
// CHECK: dma_start %1{{.*}}, %arg2[
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
mlfunc @unroll_jam_imperfect_nest() {
|
||||
// CHECK: %c100 = constant 100 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 99 step 2 {
|
||||
for %i = 0 to 100 {
|
||||
for %i = 0 to 101 {
|
||||
// CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
|
||||
// CHECK-NEXT: %1 = affine_apply #map0(%i0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
|
|
|
@ -42,13 +42,13 @@
|
|||
|
||||
// CHECK-LABEL: mlfunc @loop_nest_simplest() {
|
||||
mlfunc @loop_nest_simplest() {
|
||||
// CHECK: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// CHECK: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: %c1_i32 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_0 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_1 = constant 1 : i32
|
||||
// CHECK-NEXT: %c1_i32_2 = constant 1 : i32
|
||||
for %j = 1 to 4 {
|
||||
for %j = 0 to 4 {
|
||||
%x = constant 1 : i32
|
||||
}
|
||||
} // CHECK: }
|
||||
|
@ -57,17 +57,17 @@ mlfunc @loop_nest_simplest() {
|
|||
|
||||
// CHECK-LABEL: mlfunc @loop_nest_simple_iv_use() {
|
||||
mlfunc @loop_nest_simple_iv_use() {
|
||||
// CHECK: %c1 = constant 1 : index
|
||||
// CHECK-NEXT: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// CHECK: %0 = "addi32"(%c1, %c1) : (index, index) -> i32
|
||||
// CHECK: %1 = affine_apply #map0(%c1)
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// CHECK: %1 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
// CHECK: %3 = affine_apply #map1(%c1)
|
||||
// CHECK: %3 = affine_apply #map1(%c0)
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32
|
||||
// CHECK: %5 = affine_apply #map2(%c1)
|
||||
// CHECK: %5 = affine_apply #map2(%c0)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
for %j = 1 to 4 {
|
||||
for %j = 0 to 4 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
}
|
||||
} // CHECK: }
|
||||
|
@ -78,8 +78,8 @@ mlfunc @loop_nest_simple_iv_use() {
|
|||
// CHECK-LABEL: mlfunc @loop_nest_body_def_use() {
|
||||
mlfunc @loop_nest_body_def_use() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// CHECK: %c0_0 = constant 0 : index
|
||||
%c0 = constant 0 : index
|
||||
// CHECK: %0 = affine_apply #map0(%c0)
|
||||
|
@ -93,7 +93,7 @@ mlfunc @loop_nest_body_def_use() {
|
|||
// CHECK-NEXT: %8 = affine_apply #map2(%c0)
|
||||
// CHECK-NEXT: %9 = affine_apply #map0(%8)
|
||||
// CHECK-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
|
||||
for %j = 0 to 3 {
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %c0) : (index, index) -> index
|
||||
|
@ -104,29 +104,29 @@ mlfunc @loop_nest_body_def_use() {
|
|||
|
||||
// CHECK-LABEL: mlfunc @loop_nest_strided() {
|
||||
mlfunc @loop_nest_strided() {
|
||||
// CHECK: %c3 = constant 3 : index
|
||||
// CHECK-NEXT: %c3_0 = constant 3 : index
|
||||
// CHECK-NEXT: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// CHECK: %0 = affine_apply #map0(%c3_0)
|
||||
// CHECK: %c2 = constant 2 : index
|
||||
// CHECK-NEXT: %c2_0 = constant 2 : index
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: %0 = affine_apply #map0(%c2_0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine_apply #map1(%c3_0)
|
||||
// CHECK-NEXT: %2 = affine_apply #map1(%c2_0)
|
||||
// CHECK-NEXT: %3 = affine_apply #map0(%2)
|
||||
// CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
|
||||
for %j = 3 to 6 step 2 {
|
||||
for %j = 2 to 6 step 2 {
|
||||
%x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
}
|
||||
// CHECK: %5 = affine_apply #map0(%c3)
|
||||
// CHECK: %5 = affine_apply #map0(%c2)
|
||||
// CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = affine_apply #map1(%c3)
|
||||
// CHECK-NEXT: %7 = affine_apply #map1(%c2)
|
||||
// CHECK-NEXT: %8 = affine_apply #map0(%7)
|
||||
// CHECK-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> index
|
||||
// CHECK-NEXT: %10 = affine_apply #map3(%c3)
|
||||
// CHECK-NEXT: %10 = affine_apply #map3(%c2)
|
||||
// CHECK-NEXT: %11 = affine_apply #map0(%10)
|
||||
// CHECK-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
|
||||
for %k = 3 to 7 step 2 {
|
||||
for %k = 2 to 7 step 2 {
|
||||
%z = "affine_apply" (%k) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%w = "addi32"(%z, %z) : (index, index) -> index
|
||||
|
@ -138,8 +138,8 @@ mlfunc @loop_nest_strided() {
|
|||
// CHECK-LABEL: mlfunc @loop_nest_multiple_results() {
|
||||
mlfunc @loop_nest_multiple_results() {
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// CHECK-NEXT: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: %0 = affine_apply #map4(%i0, %c0)
|
||||
// CHECK-NEXT: %1 = "addi32"(%0#0, %0#1) : (index, index) -> index
|
||||
// CHECK-NEXT: %2 = affine_apply #map5(%i0, %c0)
|
||||
|
@ -149,7 +149,7 @@ mlfunc @loop_nest_multiple_results() {
|
|||
// CHECK-NEXT: %6 = "addi32"(%5#0, %5#1) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = affine_apply #map5(%i0, %4)
|
||||
// CHECK-NEXT: %8 = "fma"(%7#0, %7#1, %5#0) : (index, index, index) -> (index, index)
|
||||
for %j = 0 to 1 step 1 {
|
||||
for %j = 0 to 2 step 1 {
|
||||
%x = "affine_apply" (%i, %j) { map: (d0, d1) -> (d0 + 1, d1 + 2) } :
|
||||
(index, index) -> (index, index)
|
||||
%y = "addi32"(%x#0, %x#1) : (index, index) -> index
|
||||
|
@ -165,29 +165,29 @@ mlfunc @loop_nest_multiple_results() {
|
|||
// Imperfect loop nest. Unrolling innermost here yields a perfect nest.
|
||||
// CHECK-LABEL: mlfunc @loop_nest_seq_imperfect(%arg0 : memref<128x128xf32>) {
|
||||
mlfunc @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
||||
// CHECK: %c1 = constant 1 : index
|
||||
// CHECK: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: %c128 = constant 128 : index
|
||||
%c128 = constant 128 : index
|
||||
// CHECK: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// CHECK: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// CHECK: %0 = "vld"(%i0) : (index) -> i32
|
||||
%ld = "vld"(%i) : (index) -> i32
|
||||
// CHECK: %1 = affine_apply #map0(%c1)
|
||||
// CHECK-NEXT: %2 = "vmulf"(%c1, %1) : (index, index) -> index
|
||||
// CHECK: %1 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: %2 = "vmulf"(%c0, %1) : (index, index) -> index
|
||||
// CHECK-NEXT: %3 = "vaddf"(%2, %2) : (index, index) -> index
|
||||
// CHECK-NEXT: %4 = affine_apply #map0(%c1)
|
||||
// CHECK-NEXT: %4 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: %5 = affine_apply #map0(%4)
|
||||
// CHECK-NEXT: %6 = "vmulf"(%4, %5) : (index, index) -> index
|
||||
// CHECK-NEXT: %7 = "vaddf"(%6, %6) : (index, index) -> index
|
||||
// CHECK-NEXT: %8 = affine_apply #map1(%c1)
|
||||
// CHECK-NEXT: %8 = affine_apply #map1(%c0)
|
||||
// CHECK-NEXT: %9 = affine_apply #map0(%8)
|
||||
// CHECK-NEXT: %10 = "vmulf"(%8, %9) : (index, index) -> index
|
||||
// CHECK-NEXT: %11 = "vaddf"(%10, %10) : (index, index) -> index
|
||||
// CHECK-NEXT: %12 = affine_apply #map2(%c1)
|
||||
// CHECK-NEXT: %12 = affine_apply #map2(%c0)
|
||||
// CHECK-NEXT: %13 = affine_apply #map0(%12)
|
||||
// CHECK-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
|
||||
// CHECK-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
|
||||
for %j = 1 to 4 {
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "vmulf"(%j, %x) : (index, index) -> index
|
||||
|
@ -203,20 +203,20 @@ mlfunc @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
|
|||
|
||||
// CHECK-LABEL: mlfunc @loop_nest_seq_multiple() {
|
||||
mlfunc @loop_nest_seq_multiple() {
|
||||
// CHECK: %c1 = constant 1 : index
|
||||
// CHECK-NEXT: %c0 = constant 0 : index
|
||||
// CHECK-NEXT: %0 = affine_apply #map0(%c0)
|
||||
// CHECK: c0 = constant 0 : index
|
||||
// CHECK-NEXT: %c0_0 = constant 0 : index
|
||||
// CHECK-NEXT: %0 = affine_apply #map0(%c0_0)
|
||||
// CHECK-NEXT: "mul"(%0, %0) : (index, index) -> ()
|
||||
// CHECK-NEXT: %1 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: %1 = affine_apply #map0(%c0_0)
|
||||
// CHECK-NEXT: %2 = affine_apply #map0(%1)
|
||||
// CHECK-NEXT: "mul"(%2, %2) : (index, index) -> ()
|
||||
// CHECK-NEXT: %3 = affine_apply #map1(%c0)
|
||||
// CHECK-NEXT: %3 = affine_apply #map1(%c0_0)
|
||||
// CHECK-NEXT: %4 = affine_apply #map0(%3)
|
||||
// CHECK-NEXT: "mul"(%4, %4) : (index, index) -> ()
|
||||
// CHECK-NEXT: %5 = affine_apply #map2(%c0)
|
||||
// CHECK-NEXT: %5 = affine_apply #map2(%c0_0)
|
||||
// CHECK-NEXT: %6 = affine_apply #map0(%5)
|
||||
// CHECK-NEXT: "mul"(%6, %6) : (index, index) -> ()
|
||||
for %j = 0 to 3 {
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
"mul"(%x, %x) : (index, index) -> ()
|
||||
|
@ -224,20 +224,20 @@ mlfunc @loop_nest_seq_multiple() {
|
|||
|
||||
// CHECK: %c99 = constant 99 : index
|
||||
%k = "constant"(){value: 99} : () -> index
|
||||
// CHECK: for %i0 = 1 to 100 step 2 {
|
||||
for %m = 1 to 100 step 2 {
|
||||
// CHECK: %7 = affine_apply #map0(%c1)
|
||||
// CHECK-NEXT: %8 = affine_apply #map6(%c1)[%c99]
|
||||
// CHECK-NEXT: %9 = affine_apply #map0(%c1)
|
||||
// CHECK: for %i0 = 0 to 100 step 2 {
|
||||
for %m = 0 to 100 step 2 {
|
||||
// CHECK: %7 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: %8 = affine_apply #map6(%c0)[%c99]
|
||||
// CHECK-NEXT: %9 = affine_apply #map0(%c0)
|
||||
// CHECK-NEXT: %10 = affine_apply #map0(%9)
|
||||
// CHECK-NEXT: %11 = affine_apply #map6(%9)[%c99]
|
||||
// CHECK-NEXT: %12 = affine_apply #map1(%c1)
|
||||
// CHECK-NEXT: %12 = affine_apply #map1(%c0)
|
||||
// CHECK-NEXT: %13 = affine_apply #map0(%12)
|
||||
// CHECK-NEXT: %14 = affine_apply #map6(%12)[%c99]
|
||||
// CHECK-NEXT: %15 = affine_apply #map2(%c1)
|
||||
// CHECK-NEXT: %15 = affine_apply #map2(%c0)
|
||||
// CHECK-NEXT: %16 = affine_apply #map0(%15)
|
||||
// CHECK-NEXT: %17 = affine_apply #map6(%15)[%c99]
|
||||
for %n = 1 to 4 {
|
||||
for %n = 0 to 4 {
|
||||
%y = "affine_apply" (%n) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%z = "affine_apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
|
||||
|
@ -249,16 +249,16 @@ mlfunc @loop_nest_seq_multiple() {
|
|||
|
||||
// SHORT-LABEL: mlfunc @loop_nest_outer_unroll() {
|
||||
mlfunc @loop_nest_outer_unroll() {
|
||||
// SHORT: for %i0 = 1 to 4 {
|
||||
// SHORT: for %i0 = 0 to 4 {
|
||||
// SHORT-NEXT: %0 = affine_apply #map0(%i0)
|
||||
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
|
||||
// SHORT-NEXT: }
|
||||
// SHORT-NEXT: for %i1 = 1 to 4 {
|
||||
// SHORT-NEXT: for %i1 = 0 to 4 {
|
||||
// SHORT-NEXT: %2 = affine_apply #map0(%i1)
|
||||
// SHORT-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> index
|
||||
// SHORT-NEXT: }
|
||||
for %i = 1 to 2 {
|
||||
for %j = 1 to 4 {
|
||||
for %i = 0 to 2 {
|
||||
for %j = 0 to 4 {
|
||||
%x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
|
||||
(index) -> (index)
|
||||
%y = "addi32"(%x, %x) : (index, index) -> index
|
||||
|
@ -283,27 +283,27 @@ mlfunc @loop_nest_seq_long() -> i32 {
|
|||
%zero_idx = constant 0 : index
|
||||
|
||||
for %n0 = 0 to 512 {
|
||||
for %n1 = 0 to 7 {
|
||||
for %n1 = 0 to 8 {
|
||||
store %one, %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %two, %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
}
|
||||
}
|
||||
|
||||
for %i0 = 0 to 1 {
|
||||
for %i1 = 0 to 1 {
|
||||
for %i2 = 0 to 7 {
|
||||
for %i0 = 0 to 2 {
|
||||
for %i1 = 0 to 2 {
|
||||
for %i2 = 0 to 8 {
|
||||
%b2 = "affine_apply" (%i1, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%x = load %B[%i0, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op1"(%x) : (i32) -> ()
|
||||
}
|
||||
for %j1 = 0 to 7 {
|
||||
for %j2 = 0 to 7 {
|
||||
for %j1 = 0 to 8 {
|
||||
for %j2 = 0 to 8 {
|
||||
%a2 = "affine_apply" (%i1, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
"op2"(%v203) : (i32) -> ()
|
||||
}
|
||||
for %k2 = 0 to 7 {
|
||||
for %k2 = 0 to 8 {
|
||||
%s0 = "op3"() : () -> i32
|
||||
%c2 = "affine_apply" (%i0, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
|
||||
%s1 = load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
|
||||
|
@ -320,9 +320,9 @@ mlfunc @loop_nest_seq_long() -> i32 {
|
|||
|
||||
// UNROLL-BY-4-LABEL: mlfunc @unroll_unit_stride_no_cleanup() {
|
||||
mlfunc @unroll_unit_stride_no_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 1 to 8 step 4 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = affine_apply #map{{[0-9]+}}([[L1]])
|
||||
|
@ -335,13 +335,13 @@ mlfunc @unroll_unit_stride_no_cleanup() {
|
|||
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 1 to 8 {
|
||||
for %j = 0 to 8 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
// empty loop
|
||||
// UNROLL-BY-4: for %i2 = 1 to 8 {
|
||||
for %k = 1 to 8 {
|
||||
// UNROLL-BY-4: for %i2 = 0 to 8 {
|
||||
for %k = 0 to 8 {
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -349,9 +349,9 @@ mlfunc @unroll_unit_stride_no_cleanup() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: mlfunc @unroll_unit_stride_cleanup() {
|
||||
mlfunc @unroll_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 1 to 8 step 4 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 7 step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = affine_apply #map{{[0-9]+}}([[L1]])
|
||||
|
@ -364,11 +364,11 @@ mlfunc @unroll_unit_stride_cleanup() {
|
|||
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: for [[L2:%i[0-9]+]] = 9 to 10 {
|
||||
// UNROLL-BY-4-NEXT: for [[L2:%i[0-9]+]] = 8 to 10 {
|
||||
// UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
for %j = 1 to 10 {
|
||||
for %j = 0 to 10 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
%y = "addi32"(%x, %x) : (i32, i32) -> i32
|
||||
}
|
||||
|
@ -378,8 +378,8 @@ mlfunc @unroll_unit_stride_cleanup() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: mlfunc @unroll_non_unit_stride_cleanup() {
|
||||
mlfunc @unroll_non_unit_stride_cleanup() {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 37 step 20 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
|
||||
|
@ -409,8 +409,8 @@ mlfunc @unroll_non_unit_stride_cleanup() {
|
|||
mlfunc @loop_nest_single_iteration_after_unroll(%N: index) {
|
||||
// UNROLL-BY-4: %c0 = constant 0 : index
|
||||
// UNROLL-BY-4: %c4 = constant 4 : index
|
||||
// UNROLL-BY-4: for %i0 = 1 to %arg0 {
|
||||
for %i = 1 to %N {
|
||||
// UNROLL-BY-4: for %i0 = 0 to %arg0 {
|
||||
for %i = 0 to %N {
|
||||
// UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = affine_apply #map0(%c0)
|
||||
// UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
|
||||
|
@ -420,7 +420,7 @@ mlfunc @loop_nest_single_iteration_after_unroll(%N: index) {
|
|||
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
|
||||
// UNROLL-BY-4-NOT: for
|
||||
for %j = 0 to 4 {
|
||||
for %j = 0 to 5 {
|
||||
%x = "addi32"(%j, %j) : (index, index) -> i32
|
||||
} // UNROLL-BY-4-NOT: }
|
||||
} // UNROLL-BY-4: }
|
||||
|
@ -432,8 +432,8 @@ mlfunc @loop_nest_single_iteration_after_unroll(%N: index) {
|
|||
// No cleanup will be generated here.
|
||||
// UNROLL-BY-4-LABEL: mlfunc @loop_nest_operand1() {
|
||||
mlfunc @loop_nest_operand1() {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4: %2 = "foo"() : () -> i32
|
||||
|
@ -448,8 +448,8 @@ mlfunc @loop_nest_operand1() {
|
|||
// No cleanup will be generated here.
|
||||
// UNROLL-BY-4-LABEL: mlfunc @loop_nest_operand2() {
|
||||
mlfunc @loop_nest_operand2() {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4: %2 = "foo"() : () -> i32
|
||||
|
@ -465,8 +465,8 @@ mlfunc @loop_nest_operand2() {
|
|||
// factor. The cleanup loop happens to be a single iteration one and is promoted.
|
||||
// UNROLL-BY-4-LABEL: mlfunc @loop_nest_operand3() {
|
||||
mlfunc @loop_nest_operand3() {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 step 2 {
|
||||
for %i = 1 to 100 step 2 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
|
||||
for %i = 0 to 100 step 2 {
|
||||
// UNROLL-BY-4: for %i1 = (d0) -> (d0)(%i0) to #map{{[0-9]+}}(%i0) step 4 {
|
||||
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
|
@ -474,7 +474,7 @@ mlfunc @loop_nest_operand3() {
|
|||
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: }
|
||||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 8) (%i) {
|
||||
for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
} // UNROLL-BY-4: }
|
||||
|
@ -483,9 +483,9 @@ mlfunc @loop_nest_operand3() {
|
|||
|
||||
// UNROLL-BY-4-LABEL: mlfunc @loop_nest_operand4(%arg0 : index) {
|
||||
mlfunc @loop_nest_operand4(%N : index) {
|
||||
// UNROLL-BY-4: for %i0 = 1 to 100 {
|
||||
for %i = 1 to 100 {
|
||||
// UNROLL-BY-4: for %i1 = ()[s0] -> (1)()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: for %i0 = 0 to 100 {
|
||||
for %i = 0 to 100 {
|
||||
// UNROLL-BY-4: for %i1 = ()[s0] -> (0)()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
|
||||
// UNROLL-BY-4: %0 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
|
||||
|
@ -496,7 +496,7 @@ mlfunc @loop_nest_operand4(%N : index) {
|
|||
// UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
|
||||
// UNROLL-BY-4_NEXT: }
|
||||
// Specify the lower bound so that both lb and ub operands match.
|
||||
for %j = ()[s0] -> (1)()[%N] to %N {
|
||||
for %j = ()[s0] -> (0)()[%N] to %N {
|
||||
%x = "foo"() : () -> i32
|
||||
}
|
||||
}
|
||||
|
@ -508,7 +508,7 @@ mlfunc @loop_nest_unroll_full() {
|
|||
// CHECK-NEXT: %0 = "foo"() : () -> i32
|
||||
// CHECK-NEXT: %1 = "bar"() : () -> i32
|
||||
// CHECK-NEXT: return
|
||||
for %i = 0 to 0 {
|
||||
for %i = 0 to 1 {
|
||||
%x = "foo"() : () -> i32
|
||||
%y = "bar"() : () -> i32
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue