[mlir] Fix bug in FoldSubview with rank reducing subview

Fix how we calculate the new permutation map of the transfer ops.

Differential Revision: https://reviews.llvm.org/D110638
This commit is contained in:
thomasraoux 2021-09-28 09:26:41 -07:00
parent abab0dbde2
commit b12e4c17e0
2 changed files with 35 additions and 8 deletions

View File

@ -107,12 +107,11 @@ static AffineMap getPermutationMap(MLIRContext *context,
AffineMap currPermutationMap) {
llvm::SmallDenseSet<unsigned> unusedDims = subViewOp.getDroppedDims();
SmallVector<AffineExpr> exprs;
unsigned resultIdx = 0;
int64_t sourceRank = subViewOp.getSourceType().getRank();
for (auto dim : llvm::seq<int64_t>(0, sourceRank)) {
if (unusedDims.count(dim))
continue;
exprs.push_back(getAffineDimExpr(resultIdx++, context));
exprs.push_back(getAffineDimExpr(dim, context));
}
auto resultDimToSourceDimMap = AffineMap::get(sourceRank, 0, exprs, context);
return currPermutationMap.compose(resultDimToSourceDimMap);

View File

@ -177,7 +177,6 @@ func @fold_vector_transfer_read_with_rank_reduced_subview(
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
// CHECK: func @fold_vector_transfer_read_with_rank_reduced_subview
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
@ -189,8 +188,7 @@ func @fold_vector_transfer_read_with_rank_reduced_subview(
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG5]])[%[[ARG1]]]
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]]
// CHECK-SAME: permutation_map = #[[MAP2]]
// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : memref<?x?x?xf32
// -----
@ -208,7 +206,6 @@ func @fold_vector_transfer_write_with_rank_reduced_subview(
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
// CHECK: func @fold_vector_transfer_write_with_rank_reduced_subview
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
@ -221,5 +218,36 @@ func @fold_vector_transfer_write_with_rank_reduced_subview(
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]]
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]]
// CHECK-SAME: permutation_map = #[[MAP2]]
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, memref<?x?x?xf32
// -----
func @fold_vector_transfer_write_with_inner_rank_reduced_subview(
%arg0 : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>,
%arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index,
%arg5: index, %arg6 : index, %arg7 : index) {
%cst = constant 0.0 : f32
%0 = memref.subview %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1]
: memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]> to
memref<?x?xf32, offset: ?, strides: [?, ?]>
vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = [true]}
: vector<4xf32>, memref<?x?xf32, offset: ?, strides: [?, ?]>
return
}
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_subview
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]]
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]]
// CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, memref<?x?x?xf32