[MLIR] Fix tilePerfectlyNested utility for handling non-unit step size

The current implementation of tilePerfectlyNested utility doesn't handle
the non-unit step size. We have added support to perform tiling
correctly even if the step size of the loop to be tiled is non-unit.
Fixes https://bugs.llvm.org/show_bug.cgi?id=49188.

Differential Revision: https://reviews.llvm.org/D97037
This commit is contained in:
Vivek 2021-02-23 00:48:04 +05:30 committed by Uday Bondhugula
parent 1b968467c0
commit 817d343fb0
2 changed files with 33 additions and 9 deletions

View File

@ -848,7 +848,9 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
newLoops[i].setStep(tileSizes[i]);
// If the step size of original loop is x and tileSize is y then after
// tiling the tile space loops' step size becomes x*y.
newLoops[i].setStep(tileSizes[i] * origLoops[i].getStep());
}
// Bounds for intra-tile loops.
for (unsigned i = 0; i < width; i++) {
@ -858,20 +860,23 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
AffineMap lbMap = b.getDimIdentityMap();
newLoops[width + i].setLowerBound(
/*operands=*/newLoops[i].getInductionVar(), lbMap);
// The step sizes of intra-tile loops is just the original loops' step size.
newLoops[width + i].setStep(origLoops[i].getStep());
// Set the upper bound.
if (mayBeConstantCount && mayBeConstantCount.getValue() < tileSizes[i]) {
// Trip count is less than the tile size: upper bound is lower bound +
// trip count.
AffineMap ubMap =
b.getSingleDimShiftAffineMap(mayBeConstantCount.getValue());
// trip count * stepSize.
AffineMap ubMap = b.getSingleDimShiftAffineMap(
mayBeConstantCount.getValue() * origLoops[i].getStep());
newLoops[width + i].setUpperBound(
/*operands=*/newLoops[i].getInductionVar(), ubMap);
} else if (largestDiv % tileSizes[i] != 0) {
// Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
// Intra-tile loop ii goes from i to min(i + tileSize * stepSize, ub_i).
// Construct the upper bound map; the operands are the original operands
// with 'i' (tile-space loop) appended to it. The new upper bound map is
// the original one with an additional expression i + tileSize appended.
// the original one with an additional expression i + tileSize * stepSize
// appended.
// Add dim operands from original upper bound.
SmallVector<Value, 4> ubOperands;
@ -892,8 +897,8 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
boundExprs.reserve(1 + origUbMap.getNumResults());
AffineExpr dim = b.getAffineDimExpr(origUbMap.getNumDims());
// The new upper bound map is the original one with an additional
// expression i + tileSize appended.
boundExprs.push_back(dim + tileSizes[i]);
// expression i + tileSize * stepSize (of original loop) appended.
boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStep());
boundExprs.append(origUbMap.getResults().begin(),
origUbMap.getResults().end());
AffineMap ubMap =
@ -903,7 +908,8 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
} else {
// No need of the min expression.
AffineExpr dim = b.getAffineDimExpr(0);
AffineMap ubMap = AffineMap::get(1, 0, dim + tileSizes[i]);
AffineMap ubMap =
AffineMap::get(1, 0, dim + tileSizes[i] * origLoops[i].getStep());
newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
}
}

View File

@ -174,6 +174,24 @@ func @tile_with_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: i
// -----
// CHECK-DAG: #[[$ID:.*]] = affine_map<(d0) -> (d0)>
// CHECK-DAG: [[$UBMAP:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + 160, s0)>
func @tile_loop_with_non_unit_step(%arg0 : memref<50xf32>, %arg1 : index) {
affine.for %i = 0 to %arg1 step 5 {
affine.load %arg0[%i] : memref<50xf32>
}
return
}
// CHECK-LABEL: func @tile_loop_with_non_unit_step(%arg{{.*}}: memref<50xf32>, %arg{{.*}}: index)
// CHECK: affine.for %[[I:.*]] = 0 to %[[N:.*]] step 160 {
// CHECK-NEXT: affine.for %[[II:.*]] = [[$ID:.*]](%[[I]]) to min
// [[$UBMAP]](%[[I]])[%[[N]]] step 5 {
// CHECK-NEXT: affine.load %arg{{.*}}[%arg{{.*}}] : memref<50xf32>
// -----
func @tile_size_larger_than_trip_count_symbolic_bound(%M: index, %N : index) {
affine.for %i = affine_map<(d0) -> (d0)>(%M) to affine_map<(d0) -> (d0 + 2)>(%M) {
affine.for %j = affine_map<(d0) -> (d0)>(%N) to affine_map<(d0) -> (d0 + 4)>(%N) {