From b12e4c17e07b42559fea219034dd162de25df498 Mon Sep 17 00:00:00 2001 From: thomasraoux Date: Tue, 28 Sep 2021 09:26:41 -0700 Subject: [PATCH] [mlir] Fix bug in FoldSubview with rank reducing subview Fix how we calculate the new permutation map of the transfer ops. Differential Revision: https://reviews.llvm.org/D110638 --- .../MemRef/Transforms/FoldSubViewOps.cpp | 3 +- .../test/Dialect/MemRef/fold-subview-ops.mlir | 40 ++++++++++++++++--- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldSubViewOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldSubViewOps.cpp index 17ec4a1ba7fe..cc9580fe86cc 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldSubViewOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldSubViewOps.cpp @@ -107,12 +107,11 @@ static AffineMap getPermutationMap(MLIRContext *context, AffineMap currPermutationMap) { llvm::SmallDenseSet unusedDims = subViewOp.getDroppedDims(); SmallVector exprs; - unsigned resultIdx = 0; int64_t sourceRank = subViewOp.getSourceType().getRank(); for (auto dim : llvm::seq(0, sourceRank)) { if (unusedDims.count(dim)) continue; - exprs.push_back(getAffineDimExpr(resultIdx++, context)); + exprs.push_back(getAffineDimExpr(dim, context)); } auto resultDimToSourceDimMap = AffineMap::get(sourceRank, 0, exprs, context); return currPermutationMap.compose(resultDimToSourceDimMap); diff --git a/mlir/test/Dialect/MemRef/fold-subview-ops.mlir b/mlir/test/Dialect/MemRef/fold-subview-ops.mlir index 558b44350af7..d8776ba19433 100644 --- a/mlir/test/Dialect/MemRef/fold-subview-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-subview-ops.mlir @@ -177,7 +177,6 @@ func @fold_vector_transfer_read_with_rank_reduced_subview( } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)> // CHECK: func @fold_vector_transfer_read_with_rank_reduced_subview // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index @@ -189,8 +188,7 @@ func @fold_vector_transfer_read_with_rank_reduced_subview( // CHECK-DAG: %[[C0:.+]] = constant 0 : index // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG5]])[%[[ARG1]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]] -// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] -// CHECK-SAME: permutation_map = #[[MAP2]] +// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : memref (d0 * s1 + s0 + d1 * s2 + d2 * s3)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)> // CHECK: func @fold_vector_transfer_write_with_rank_reduced_subview // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> @@ -221,5 +218,36 @@ func @fold_vector_transfer_write_with_rank_reduced_subview( // CHECK-DAG: %[[C0:.+]] = constant 0 : index // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]] // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]] -// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] -// CHECK-SAME: permutation_map = #[[MAP2]] +// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, memref, + %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, + %arg5: index, %arg6 : index, %arg7 : index) { + %cst = constant 0.0 : f32 + %0 = memref.subview %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1] + : memref to + memref + vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = [true]} + : vector<4xf32>, memref + return +} +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)> +// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_subview +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index +// CHECK-DAG: %[[C0:.+]] = constant 0 : index +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]] +// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]] +// CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, memref