From cb2e6518000c7e1c5c2244592457afe4a97827e7 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 30 Sep 2021 13:50:44 -0400 Subject: [PATCH] [mlir][linalg] Fix incorrect bound calculation for tiling conv For convolution, the input window dimension's access affine map is of the form `(d0 * s0 + d1)`, where `d0`/`d1` is the output/ filter window dimension, and `s0` is the stride. When tiling, https://reviews.llvm.org/D109267 changed how the way dimensions are acquired. Instead of directly querying using `*.dim` ops on the original convolution op, we now get it by applying the access affine map to the loop upper bounds. This is fine for dimensions having single-dimension affine maps, like matmul, but not for convolution input. It will cause incorrect compuation and out of bound. A concrete example, say we have 1x225x225x3 (NHWC) input, 3x3x3x32 (HWCF) filter, and 1x112x112x3 (NHWC) output with stride 2, (112 * 2 + 3) would be 227, which is different from the correct input window dimension size 225. Instead, we should first calculate the max indices for each loop, and apply the affine map to them, and then plus one to get the dimension size. Note this makes no difference for matmul-like ops given they will have `d0 - 1 + 1` effectively. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D110849 --- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 29 +++++++++++++++---- .../Linalg/tile-and-fuse-on-tensors.mlir | 12 ++++---- .../Dialect/Linalg/tile-and-fuse-tensors.mlir | 2 +- mlir/test/Dialect/Linalg/tile-conv.mlir | 2 +- .../test/Dialect/Linalg/tile-simple-conv.mlir | 4 +-- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index b7a2becefa77..7caef6bd399f 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -637,14 +637,33 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize << ", size: " << size << ": make sure in bound with affine.min\n"); + AffineExpr dim0, dim1, dim2; bindDims(builder.getContext(), dim0, dim1, dim2); - // Compute min(size, dim - offset) to avoid out-of-bounds accesses. - AffineMap minMap = - AffineMap::inferFromExprList( - ArrayRef>{{dim0, dim1 - dim2}}) + + // Get the dimension size for this dimension. We need to first calculate + // the max index and then plus one. This is important because for + // convolution ops, we have its input window dimension's affine map of the + // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window + // dimension and `s0` is stride. Directly use the dimension size of + // output/filer window dimensions will cause incorrect calculation. + AffineMap minusOneMap = + AffineMap::inferFromExprList({ArrayRef{dim0 - 1}}) .front(); - Value d = applyMapToValues(builder, loc, m, ubs).front(); + AffineMap plusOneMap = + AffineMap::inferFromExprList({ArrayRef{dim0 + 1}}) + .front(); + auto maxIndices = llvm::to_vector<8>(llvm::map_range(ubs, [&](Value ub) { + return makeComposedAffineApply(builder, loc, minusOneMap, {ub}) + .getResult(); + })); + Value maxIndex = applyMapToValues(builder, loc, m, maxIndices).front(); + Value d = makeComposedAffineApply(builder, loc, plusOneMap, {maxIndex}); + + // Compute min(size, dim - offset) to avoid out-of-bounds accesses. + AffineMap minMap = AffineMap::inferFromExprList( + {ArrayRef{dim0, dim1 - dim2}}) + .front(); SmallVector operands{size, d, offset}; fullyComposeAffineMapAndOperands(&minMap, &operands); canonicalizeMapAndOperands(&minMap, &operands); diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir index 8b430134eb2f..f53e1708986f 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir @@ -233,7 +233,7 @@ builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>, // ----- // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 18)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (8, -d0 - d1 + 17)> // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0, -d1 - d2 + 18)> #map0 = affine_map<(d0, d1) -> (d0, d0 + d1)> #map1 = affine_map<(d0, d1) -> (d0, d1)> @@ -245,13 +245,13 @@ func @fuse_non_rectangular(%arg0: tensor<10x18xf32>, %cst = constant 0.000000e+00 : f32 %0 = linalg.fill(%cst, %arg0) : f32, tensor<10x18xf32> -> tensor<10x18xf32> - // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = - // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = + // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = %c0 to %c8 step %c4 + // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = %c0 to %c10 step %c5 // Compute producer on a hyper rectangular bounding box. Along the second dimenson, - // the offset is set to the sum of the induction variables and the upper bound - // to either eight (sum of the tile sizes) or eighteen (sum of the domain sizes) - // minus the induction variables. + // the offset is set to the sum of the induction variables, and the upper bound + // to either 8 (tile size) or 17 (sum of max indices (9+7) then + 1) minus the + // induction variables. // CHECK: %[[SUM:.*]] = affine.apply #[[MAP0]](%[[IV1]], %[[IV0]] // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]], %[[IV0]] // CHECK: %[[UB1:.*]] = affine.min #[[MAP2]](%[[TS1]], %[[IV1]], %[[IV0]] diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir index 4aef50e6c96b..324da1086af0 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -203,7 +203,7 @@ func @conv_tensors_dynamic(%input: tensor, %filter: tensor (-d0 + s0, 8, -d0 + s1)> // CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)> // CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)> -// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)> +// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2 - 2)> // CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)> // CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)> // CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir index eb4124fbb03a..b4ee26a15ba5 100644 --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 // TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)> -// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)> +// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30 - 39)> // TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> // TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> // TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> diff --git a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir index b25ad22cd309..9d3e0a5cd745 100644 --- a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" | FileCheck %s // CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> -// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)> -// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1 - 1)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1 - 1)> // CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> // CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>