forked from OSchip/llvm-project
[mlir] Fix bug in FoldSubview with rank reducing subview
Fix how we calculate the new permutation map of the transfer ops. Differential Revision: https://reviews.llvm.org/D110638
This commit is contained in:
parent
abab0dbde2
commit
b12e4c17e0
|
@ -107,12 +107,11 @@ static AffineMap getPermutationMap(MLIRContext *context,
|
|||
AffineMap currPermutationMap) {
|
||||
llvm::SmallDenseSet<unsigned> unusedDims = subViewOp.getDroppedDims();
|
||||
SmallVector<AffineExpr> exprs;
|
||||
unsigned resultIdx = 0;
|
||||
int64_t sourceRank = subViewOp.getSourceType().getRank();
|
||||
for (auto dim : llvm::seq<int64_t>(0, sourceRank)) {
|
||||
if (unusedDims.count(dim))
|
||||
continue;
|
||||
exprs.push_back(getAffineDimExpr(resultIdx++, context));
|
||||
exprs.push_back(getAffineDimExpr(dim, context));
|
||||
}
|
||||
auto resultDimToSourceDimMap = AffineMap::get(sourceRank, 0, exprs, context);
|
||||
return currPermutationMap.compose(resultDimToSourceDimMap);
|
||||
|
|
|
@ -177,7 +177,6 @@ func @fold_vector_transfer_read_with_rank_reduced_subview(
|
|||
}
|
||||
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
|
||||
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
|
||||
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
|
||||
// CHECK: func @fold_vector_transfer_read_with_rank_reduced_subview
|
||||
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
|
||||
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index
|
||||
|
@ -189,8 +188,7 @@ func @fold_vector_transfer_read_with_rank_reduced_subview(
|
|||
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
|
||||
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG5]])[%[[ARG1]]]
|
||||
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
|
||||
// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]]
|
||||
// CHECK-SAME: permutation_map = #[[MAP2]]
|
||||
// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : memref<?x?x?xf32
|
||||
|
||||
// -----
|
||||
|
||||
|
@ -208,7 +206,6 @@ func @fold_vector_transfer_write_with_rank_reduced_subview(
|
|||
}
|
||||
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
|
||||
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
|
||||
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
|
||||
// CHECK: func @fold_vector_transfer_write_with_rank_reduced_subview
|
||||
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
|
||||
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
|
||||
|
@ -221,5 +218,36 @@ func @fold_vector_transfer_write_with_rank_reduced_subview(
|
|||
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
|
||||
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
|
||||
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]]
|
||||
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]]
|
||||
// CHECK-SAME: permutation_map = #[[MAP2]]
|
||||
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, memref<?x?x?xf32
|
||||
|
||||
// -----
|
||||
|
||||
func @fold_vector_transfer_write_with_inner_rank_reduced_subview(
|
||||
%arg0 : memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]>,
|
||||
%arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index,
|
||||
%arg5: index, %arg6 : index, %arg7 : index) {
|
||||
%cst = constant 0.0 : f32
|
||||
%0 = memref.subview %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1]
|
||||
: memref<?x?x?xf32, offset: ?, strides: [?, ?, ?]> to
|
||||
memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
vector.transfer_write %arg1, %0[%arg6, %arg7] {in_bounds = [true]}
|
||||
: vector<4xf32>, memref<?x?xf32, offset: ?, strides: [?, ?]>
|
||||
return
|
||||
}
|
||||
// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
|
||||
// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
|
||||
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)>
|
||||
// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_subview
|
||||
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?x?xf32, #[[MAP0]]>
|
||||
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32>
|
||||
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index
|
||||
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index
|
||||
// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index
|
||||
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index
|
||||
// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index
|
||||
// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index
|
||||
// CHECK-DAG: %[[C0:.+]] = constant 0 : index
|
||||
// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]]
|
||||
// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]]
|
||||
// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]]
|
||||
// CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, memref<?x?x?xf32
|
||||
|
|
Loading…
Reference in New Issue