[mlir][linalg] Fix incorrect bound calculation for tiling conv

For convolution, the input window dimension's access affine map
is of the form `(d0 * s0 + d1)`, where `d0`/`d1` is the output/
filter window dimension, and `s0` is the stride.

When tiling, https://reviews.llvm.org/D109267 changed how the
way dimensions are acquired. Instead of directly querying using
`*.dim` ops on the original convolution op, we now get it by
applying the access affine map to the loop upper bounds. This
is fine for dimensions having single-dimension affine maps,
like matmul, but not for convolution input. It will cause
incorrect compuation and out of bound. A concrete example, say
we have 1x225x225x3 (NHWC) input, 3x3x3x32 (HWCF) filter, and
1x112x112x3 (NHWC) output with stride 2, (112 * 2 + 3) would be
227, which is different from the correct input window dimension
size 225.

Instead, we should first calculate the max indices for each loop,
and apply the affine map to them, and then plus one to get the
dimension size. Note this makes no difference for matmul-like
ops given they will have `d0 - 1 + 1` effectively.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D110849
This commit is contained in:
Lei Zhang 2021-09-30 13:50:44 -04:00
parent b505ed9d31
commit cb2e651800
5 changed files with 34 additions and 15 deletions

View File

@ -637,14 +637,33 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
<< ", size: " << size
<< ": make sure in bound with affine.min\n");
AffineExpr dim0, dim1, dim2;
bindDims(builder.getContext(), dim0, dim1, dim2);
// Compute min(size, dim - offset) to avoid out-of-bounds accesses.
AffineMap minMap =
AffineMap::inferFromExprList(
ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
// Get the dimension size for this dimension. We need to first calculate
// the max index and then plus one. This is important because for
// convolution ops, we have its input window dimension's affine map of the
// form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
// dimension and `s0` is stride. Directly use the dimension size of
// output/filer window dimensions will cause incorrect calculation.
AffineMap minusOneMap =
AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 - 1}})
.front();
Value d = applyMapToValues(builder, loc, m, ubs).front();
AffineMap plusOneMap =
AffineMap::inferFromExprList({ArrayRef<AffineExpr>{dim0 + 1}})
.front();
auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) {
return makeComposedAffineApply(builder, loc, minusOneMap, {ub})
.getResult();
}));
Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front();
Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex});
// Compute min(size, dim - offset) to avoid out-of-bounds accesses.
AffineMap minMap = AffineMap::inferFromExprList(
{ArrayRef<AffineExpr>{dim0, dim1 - dim2}})
.front();
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
canonicalizeMapAndOperands(&minMap, &operands);

View File

@ -233,7 +233,7 @@ builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)>
#map0 = affine_map<(d0, d1) -> (d0, d0 + d1)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
@ -245,13 +245,13 @@ func @fuse_non_rectangular(%arg0: tensor<10x18xf32>,
%cst = constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32>
// CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
// CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
// CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = %c0 to %c8 step %c4
// CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = %c0 to %c10 step %c5
// Compute producer on a hyper rectangular bounding box. Along the second dimenson,
// the offset is set to the sum of the induction variables and the upper bound
// to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes)
// minus the induction variables.
// the offset is set to the sum of the induction variables, and the upper bound
// to either 8 (tile size) or 17 (sum of max indices (9+7) then + 1) minus the
// induction variables.
// CHECK: %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]]
// CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]]
// CHECK: %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]]

View File

@ -203,7 +203,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
// CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)>
// CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
// CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)>
// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)>
// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2 - 2)>
// CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
// CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
// CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>

View File

@ -1,7 +1,7 @@
// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
// TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)>
// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30 - 39)>
// TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
// TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
// TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>

View File

@ -1,8 +1,8 @@
// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" | FileCheck %s
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)>
// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1 - 1)>
// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1 - 1)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>