[mlir][linalg] Add IndexOp support to fusion on tensors.

This revision depends on https://reviews.llvm.org/D109761 and https://reviews.llvm.org/D109766.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D109774
This commit is contained in:
Tobias Gysi 2021-09-20 15:49:15 +00:00
parent 644b55d57e
commit 7be28d82b4
2 changed files with 45 additions and 4 deletions

View File

@ -181,6 +181,9 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult,
.getTypes();
LinalgOp clonedOp = producerOp.clone(b, loc, resultTypes, tiledOperands);
// Shift all IndexOp results by the tile offset.
addTileLoopIvsToIndexOpResults(b, clonedOp, allIvs);
return clonedOp;
}
@ -325,10 +328,6 @@ FailureOr<LinalgOp> TileLoopNest::fuseProducer(OpBuilder &b,
if (!producerResult || !isa<LinalgOp>(producerResult.getOwner()))
return failure();
// TODO: support producers that have index semantics.
if (cast<LinalgOp>(producerResult.getOwner()).hasIndexSemantics())
return failure();
// Compute the slice dimensions tiled by `tileLoopNest`.
SmallVector<int64_t> tiledSliceDims =
getTiledSliceDims(producerResult, rootOpOperand, loopDims);

View File

@ -188,3 +188,45 @@ builtin.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
%2 = linalg.matmul ins(%0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%1 : tensor<24x25xf32>) -> tensor<24x25xf32>
return %2 : tensor<24x25xf32>
}
// -----
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1)>
#map0 = affine_map<(d0, d1) -> (d1, d0)>
// CHECK: fuse_indexed
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xi32>
builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
%arg1: tensor<12x25xi32>,
%arg2: tensor<24x25xi32>) -> tensor<24x25xi32> {
%c0 = constant 0 : index
%c12 = constant 12 : index
%c25 = constant 25 : index
%c24 = constant 24 : index
%c4 = constant 4 : index
%0 = linalg.generic {indexing_maps = [#map0], iterator_types = ["parallel", "parallel"]} outs(%arg1 : tensor<12x25xi32>) {
^bb0(%arg3: i32): // no predecessors
%6 = linalg.index 0 : index
%7 = linalg.index 1 : index
%8 = addi %6, %7 : index
%9 = index_cast %8 : index to i32
linalg.yield %9 : i32
} -> tensor<12x25xi32>
// CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
// CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] =
// CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] =
// Shift the indexes by the slice offsets and swap the offsets due to the transposed indexing map.
// CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG1]]
// CHECK-SAME: %[[IV2]], %[[IV0]]
// CHECK: linalg.generic {{.*}} outs(%[[T1]]
// CHECK: %[[IDX0:.*]] = linalg.index 0
// CHECK: %[[IDX0_SHIFTED:.*]] = affine.apply #[[MAP0]](%[[IDX0]], %[[IV0]])
// CHECK: %[[IDX1:.*]] = linalg.index 1
// CHECK: %[[IDX1_SHIFTED:.*]] = affine.apply #[[MAP0]](%[[IDX1]], %[[IV2]])
// CHECK: %{{.*}} = addi %[[IDX0_SHIFTED]], %[[IDX1_SHIFTED]]
%1 = linalg.matmul ins(%arg0, %0 : tensor<24x12xi32>, tensor<12x25xi32>) outs(%arg2 : tensor<24x25xi32>) -> tensor<24x25xi32>
return %1 : tensor<24x25xi32>
}