forked from OSchip/llvm-project
[MLIR] Fix tilePerfectlyNested utility for handling non-unit step size
The current implementation of tilePerfectlyNested utility doesn't handle the non-unit step size. We have added support to perform tiling correctly even if the step size of the loop to be tiled is non-unit. Fixes https://bugs.llvm.org/show_bug.cgi?id=49188. Differential Revision: https://reviews.llvm.org/D97037
This commit is contained in:
parent
1b968467c0
commit
817d343fb0
|
@ -848,7 +848,9 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
|
|||
OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
|
||||
newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
|
||||
newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
|
||||
newLoops[i].setStep(tileSizes[i]);
|
||||
// If the step size of original loop is x and tileSize is y then after
|
||||
// tiling the tile space loops' step size becomes x*y.
|
||||
newLoops[i].setStep(tileSizes[i] * origLoops[i].getStep());
|
||||
}
|
||||
// Bounds for intra-tile loops.
|
||||
for (unsigned i = 0; i < width; i++) {
|
||||
|
@ -858,20 +860,23 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
|
|||
AffineMap lbMap = b.getDimIdentityMap();
|
||||
newLoops[width + i].setLowerBound(
|
||||
/*operands=*/newLoops[i].getInductionVar(), lbMap);
|
||||
// The step sizes of intra-tile loops is just the original loops' step size.
|
||||
newLoops[width + i].setStep(origLoops[i].getStep());
|
||||
|
||||
// Set the upper bound.
|
||||
if (mayBeConstantCount && mayBeConstantCount.getValue() < tileSizes[i]) {
|
||||
// Trip count is less than the tile size: upper bound is lower bound +
|
||||
// trip count.
|
||||
AffineMap ubMap =
|
||||
b.getSingleDimShiftAffineMap(mayBeConstantCount.getValue());
|
||||
// trip count * stepSize.
|
||||
AffineMap ubMap = b.getSingleDimShiftAffineMap(
|
||||
mayBeConstantCount.getValue() * origLoops[i].getStep());
|
||||
newLoops[width + i].setUpperBound(
|
||||
/*operands=*/newLoops[i].getInductionVar(), ubMap);
|
||||
} else if (largestDiv % tileSizes[i] != 0) {
|
||||
// Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
|
||||
// Intra-tile loop ii goes from i to min(i + tileSize * stepSize, ub_i).
|
||||
// Construct the upper bound map; the operands are the original operands
|
||||
// with 'i' (tile-space loop) appended to it. The new upper bound map is
|
||||
// the original one with an additional expression i + tileSize appended.
|
||||
// the original one with an additional expression i + tileSize * stepSize
|
||||
// appended.
|
||||
|
||||
// Add dim operands from original upper bound.
|
||||
SmallVector<Value, 4> ubOperands;
|
||||
|
@ -892,8 +897,8 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
|
|||
boundExprs.reserve(1 + origUbMap.getNumResults());
|
||||
AffineExpr dim = b.getAffineDimExpr(origUbMap.getNumDims());
|
||||
// The new upper bound map is the original one with an additional
|
||||
// expression i + tileSize appended.
|
||||
boundExprs.push_back(dim + tileSizes[i]);
|
||||
// expression i + tileSize * stepSize (of original loop) appended.
|
||||
boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStep());
|
||||
boundExprs.append(origUbMap.getResults().begin(),
|
||||
origUbMap.getResults().end());
|
||||
AffineMap ubMap =
|
||||
|
@ -903,7 +908,8 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
|
|||
} else {
|
||||
// No need of the min expression.
|
||||
AffineExpr dim = b.getAffineDimExpr(0);
|
||||
AffineMap ubMap = AffineMap::get(1, 0, dim + tileSizes[i]);
|
||||
AffineMap ubMap =
|
||||
AffineMap::get(1, 0, dim + tileSizes[i] * origLoops[i].getStep());
|
||||
newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -174,6 +174,24 @@ func @tile_with_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: i
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-DAG: #[[$ID:.*]] = affine_map<(d0) -> (d0)>
|
||||
// CHECK-DAG: [[$UBMAP:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + 160, s0)>
|
||||
|
||||
func @tile_loop_with_non_unit_step(%arg0 : memref<50xf32>, %arg1 : index) {
|
||||
affine.for %i = 0 to %arg1 step 5 {
|
||||
affine.load %arg0[%i] : memref<50xf32>
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @tile_loop_with_non_unit_step(%arg{{.*}}: memref<50xf32>, %arg{{.*}}: index)
|
||||
// CHECK: affine.for %[[I:.*]] = 0 to %[[N:.*]] step 160 {
|
||||
// CHECK-NEXT: affine.for %[[II:.*]] = [[$ID:.*]](%[[I]]) to min
|
||||
// [[$UBMAP]](%[[I]])[%[[N]]] step 5 {
|
||||
// CHECK-NEXT: affine.load %arg{{.*}}[%arg{{.*}}] : memref<50xf32>
|
||||
|
||||
// -----
|
||||
|
||||
func @tile_size_larger_than_trip_count_symbolic_bound(%M: index, %N : index) {
|
||||
affine.for %i = affine_map<(d0) -> (d0)>(%M) to affine_map<(d0) -> (d0 + 2)>(%M) {
|
||||
affine.for %j = affine_map<(d0) -> (d0)>(%N) to affine_map<(d0) -> (d0 + 4)>(%N) {
|
||||
|
|
Loading…
Reference in New Issue