[mlir] Extract offsets-sizes-strides computation from `makeTiledShape(s)`.

This change separates computation of the actual parameters of the subset and
the materialization of subview/extract_slice. That way the users can still use
Linalg tiling logic even if they use different operations to materialize the
subsets.

Differential Revision: https://reviews.llvm.org/D131053
This commit is contained in:
Alexander Belyaev 2022-08-04 11:18:04 +02:00
parent 57a9bccec7
commit 56d94b3b90
8 changed files with 168 additions and 87 deletions

View File

@ -214,6 +214,44 @@ Value materializeOpFoldResult(ImplicitLocOpBuilder &builder,
Value materializeOpFoldResult(OpBuilder &b, Location loc,
OpFoldResult opFoldResult);
/// A struct containg offsets-sizes-strides arguments of the tiled shape.
struct SliceParameters {
SmallVector<OpFoldResult, 3> offsets;
SmallVector<OpFoldResult, 3> sizes;
SmallVector<OpFoldResult, 3> strides;
};
/// Computes SliceParameters for a single `valueToTile`. `omitPartialTileCheck`
/// controls whether to omit the partial/boundary tile condition check in cases
/// where we statically know that it is unnecessary.
SliceParameters
computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
ArrayRef<OpFoldResult> tileSizes, AffineMap map,
ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
ArrayRef<OpFoldResult> subShapeSizes,
bool omitPartialTileCheck);
/// Computes SliceParamaters for all `valuesToTile` of the given
/// `linalgOp`, assuming `linalgOp` is being fused into a loop
/// nest for tiling with the given induction variables `ivs` and tile sizes
/// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the
/// implicit loops in `linalgOp`. `omitPartialTileCheck` controls whether to
/// omit the partial/boundary tile condition check in cases where we statically
/// know that it is unnecessary.
///
/// Note that a constant zero in `tileSizes` means no tiling at that implicit
/// loop. The number of non-zero values in `tileSizes` should be equal to the
/// number of values in `ivs`.
///
/// Some of the `valuesToTile` won't be affected by tiling. For these values,
/// llvm::None will be returned.
SmallVector<Optional<SliceParameters>>
computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
ArrayRef<OpFoldResult> tileSizes,
ArrayRef<OpFoldResult> sizeBounds,
bool omitPartialTileCheck);
/// Creates an extract_slice/subview op for a single `valueToTile` with
/// `builder`. This new operation extracts a tile of `valueToTile`, starting
/// at offsets `lbs` and with sizes `subShapeSizes`. `omitPartialTileCheck`

View File

@ -802,28 +802,61 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
}
static Value materializeTiledShape(OpBuilder &builder, Location loc,
Value valueToTile,
const SliceParameters &sliceParams) {
auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
.Case([&](MemRefType) {
return builder.create<memref::SubViewOp>(
loc, valueToTile, sliceParams.offsets,
sliceParams.sizes, sliceParams.strides);
})
.Case([&](RankedTensorType) {
return makeComposedExtractSliceOp(
builder, loc, valueToTile, sliceParams.offsets,
sliceParams.sizes, sliceParams.strides);
})
.Default([](ShapedType) -> Operation * {
llvm_unreachable("Unexpected shaped type");
});
return sliceOp->getResult(0);
}
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
ArrayRef<OpFoldResult> tileSizes, AffineMap map,
ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
ArrayRef<OpFoldResult> subShapeSizes,
bool omitPartialTileCheck) {
SliceParameters sliceParams =
computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
ubs, subShapeSizes, omitPartialTileCheck);
return materializeTiledShape(builder, loc, valueToTile, sliceParams);
}
SliceParameters
computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile,
ArrayRef<OpFoldResult> tileSizes, AffineMap map,
ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
ArrayRef<OpFoldResult> subShapeSizes,
bool omitPartialTileCheck) {
auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
assert(shapedType && "only shaped types can be tiled");
ArrayRef<int64_t> shape = shapedType.getShape();
int64_t rank = shapedType.getRank();
// Construct a new subview / extract_slice for the tile.
SmallVector<OpFoldResult, 4> offsets, sizes, strides;
offsets.reserve(rank);
sizes.reserve(rank);
strides.reserve(rank);
SliceParameters sliceParams;
sliceParams.offsets.reserve(rank);
sliceParams.sizes.reserve(rank);
sliceParams.strides.reserve(rank);
for (unsigned r = 0; r < rank; ++r) {
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r);
LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
if (!isTiled(map.getSubMap({r}), tileSizes)) {
offsets.push_back(builder.getIndexAttr(0));
sliceParams.offsets.push_back(builder.getIndexAttr(0));
OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
sizes.push_back(dim);
strides.push_back(builder.getIndexAttr(1));
sliceParams.sizes.push_back(dim);
sliceParams.strides.push_back(builder.getIndexAttr(1));
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
continue;
}
@ -832,26 +865,27 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
// Tiling creates a new slice at the proper index, the slice step is 1
// (i.e. the op does not subsample, stepping occurs in the loop).
auto m = map.getSubMap({r});
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n");
LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
IRRewriter rewriter(builder);
OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs);
offsets.push_back(offset);
sliceParams.offsets.push_back(offset);
OpFoldResult closedIntSize =
makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
// Resulting size needs to be made half open interval again.
AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
OpFoldResult size =
makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n");
LLVM_DEBUG(llvm::dbgs()
<< "makeTiledShape: new offset: " << offset << "\n");
strides.push_back(builder.getIndexAttr(1));
<< "computeSliceParameters: raw size: " << size << "\n");
LLVM_DEBUG(llvm::dbgs()
<< "computeSliceParameters: new offset: " << offset << "\n");
sliceParams.strides.push_back(builder.getIndexAttr(1));
if (omitPartialTileCheck) {
// We statically know that the partial/boundary tile condition is
// unnecessary.
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
sizes.push_back(size);
sliceParams.sizes.push_back(size);
continue;
}
@ -903,22 +937,9 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
}
LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
sizes.push_back(size);
sliceParams.sizes.push_back(size);
}
auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
.Case([&](MemRefType) {
return builder.create<memref::SubViewOp>(
loc, valueToTile, offsets, sizes, strides);
})
.Case([&](RankedTensorType) {
return makeComposedExtractSliceOp(
builder, loc, valueToTile, offsets, sizes, strides);
})
.Default([](ShapedType) -> Operation * {
llvm_unreachable("Unexpected shaped type");
});
return sliceOp->getResult(0);
return sliceParams;
}
SmallVector<OpFoldResult> computeTileOffsets(OpBuilder &b, Location loc,
@ -1003,12 +1024,12 @@ Value materializeOpFoldResult(OpBuilder &builder, Location loc,
return materializeOpFoldResult(b, opFoldResult);
}
SmallVector<Value> makeTiledShapes(OpBuilder &b, Location loc,
LinalgOp linalgOp, ValueRange valuesToTile,
ArrayRef<OpFoldResult> ivs,
ArrayRef<OpFoldResult> tileSizes,
ArrayRef<OpFoldResult> sizeBounds,
bool omitPartialTileCheck) {
SmallVector<Optional<SliceParameters>>
computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
ArrayRef<OpFoldResult> tileSizes,
ArrayRef<OpFoldResult> sizeBounds,
bool omitPartialTileCheck) {
assert(ivs.size() == static_cast<size_t>(llvm::count_if(
llvm::make_range(tileSizes.begin(), tileSizes.end()),
[](OpFoldResult v) { return !isZero(v); })) &&
@ -1016,15 +1037,16 @@ SmallVector<Value> makeTiledShapes(OpBuilder &b, Location loc,
// Construct (potentially temporary) mins and maxes on which to apply maps
// that define tile subshapes.
SmallVector<OpFoldResult> lbs = computeTileOffsets(b, loc, ivs, tileSizes);
SmallVector<OpFoldResult> lbs =
computeTileOffsets(builder, loc, ivs, tileSizes);
SmallVector<OpFoldResult> subShapeSizes =
computeTileSizes(b, loc, tileSizes, sizeBounds);
computeTileSizes(builder, loc, tileSizes, sizeBounds);
assert(static_cast<int64_t>(valuesToTile.size()) ==
linalgOp.getNumInputsAndOutputs() &&
"expected one value to tile for every operand");
SmallVector<Value> tiledShapes;
tiledShapes.reserve(valuesToTile.size());
SmallVector<Optional<SliceParameters>> allSliceParams;
allSliceParams.reserve(valuesToTile.size());
for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) {
Value shapedOp = valuesToTile[opOperand->getOperandNumber()];
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
@ -1035,18 +1057,39 @@ SmallVector<Value> makeTiledShapes(OpBuilder &b, Location loc,
// extract/insert slice pairs make the accessed iteration argument
// subdomains explicit.
if (!isTiled(map, tileSizes) && !linalgOp.isOutputTensor(opOperand)) {
tiledShapes.push_back(shapedOp);
allSliceParams.push_back(llvm::None);
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use shape: "
<< opOperand->get().getType() << "\n");
continue;
}
LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
tiledShapes.push_back(makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs,
sizeBounds, subShapeSizes,
omitPartialTileCheck));
allSliceParams.push_back(computeSliceParameters(
builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
omitPartialTileCheck));
}
return allSliceParams;
}
SmallVector<Value> makeTiledShapes(OpBuilder &builder, Location loc,
LinalgOp linalgOp, ValueRange valuesToTile,
ArrayRef<OpFoldResult> ivs,
ArrayRef<OpFoldResult> tileSizes,
ArrayRef<OpFoldResult> sizeBounds,
bool omitPartialTileCheck) {
SmallVector<Optional<SliceParameters>> allSliceParameter =
computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
tileSizes, sizeBounds, omitPartialTileCheck);
SmallVector<Value> tiledShapes;
for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
Value valueToTile = std::get<0>(item);
Optional<SliceParameters> sliceParams = std::get<1>(item);
tiledShapes.push_back(
sliceParams.hasValue()
? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
: valueToTile);
}
return tiledShapes;
}

View File

@ -16,12 +16,12 @@ func.func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
// CHECK: scf.for %[[ARG3:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]]
// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
// -----
@ -48,11 +48,11 @@ func.func @gemm2(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32
// CHECK: scf.if %[[INBOUNDS]]
// CHECK: scf.for %[[ARG3:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
@ -106,11 +106,11 @@ func.func @gemm4(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32
// CHECK: scf.if %[[INBOUNDS]]
// CHECK: scf.for %[[ARG3:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
@ -139,9 +139,9 @@ func.func @gemm5(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32
// CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]])
// CHECK: scf.for %[[ARG4:.*]] =
// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]]
// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
@ -166,10 +166,10 @@ func.func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32
// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
// CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]])
// CHECK: scf.for %[[ARG4:.*]] =
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
// CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]

View File

@ -241,9 +241,9 @@ func.func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?
// CHECK-NEXT: %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]]
// CHECK-NEXT: %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]])
// CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[IV2]], %[[SIZE_ELEM_OW]])[%[[FILL_W]], %[[FILTER_W]]]
// CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]]
// CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
// CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]]
// CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]]
// CHECK-NEXT: scf.for %[[IV3:.+]] = %{{.+}} to %[[ELEM_OC]] step %{{.+}} iter_args(%[[ARG:[a-z0-9]+]]
// CHECK-NEXT: %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
// CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]

View File

@ -26,9 +26,9 @@ func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref
// CHECK: scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]]
// CHECK: %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]], %[[T0]]]
// CHECK: %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T3]], %[[T1]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]]
// CHECK: %[[T6:.*]] = affine.min #[[MAP2]](%[[ARG3]])[%[[T2]]
// CHECK: %[[T7:.*]] = affine.min #[[MAP3]](%[[ARG4]])[%[[T3]]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]] [%[[T6]], %[[T7]]]
// CHECK: linalg.conv_2d
// CHECK-SAME: ins(%[[SV1]], %[[ARG1]]

View File

@ -64,12 +64,12 @@ func.func @matmul_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf32>, %C: t
// CHECK-NOT: affine.min
// CHECK-NOT: affine.max
// CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[LB0_1:.+]] = affine.apply #[[$map2]](%[[IV0]])
// CHECK: %[[LB1_1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] :
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] :
// CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] :
// CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0_1]], %[[LB1_1]]] [10, %[[TS]]] [1, 1] :
// CHECK: linalg.matmul
// CHECK: scf.foreach_thread.perform_concurrently
// CHECK-NEXT: tensor.parallel_insert_slice
@ -155,12 +155,12 @@ func.func @matmul_tile_size_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf
// CHECK-NOT: affine.max
// CHECK-NOT: affine.min
// CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[LB0_1:.+]] = affine.apply #[[$map2]](%[[IV0]])
// CHECK: %[[LB1_1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] :
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] :
// CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
// CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] :
// CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0_1]], %[[LB1_1]]] [10, %[[TS]]] [1, 1] :
// CHECK: linalg.matmul
// CHECK: scf.foreach_thread.perform_concurrently
// CHECK-NEXT: tensor.parallel_insert_slice

View File

@ -40,9 +40,9 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-2: linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]]
@ -53,9 +53,9 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
@ -66,10 +66,10 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
// TILE-002: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
// TILE-002: %[[szK_1:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
// TILE-002: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK_1]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
// TILE-234-LABEL: func @matmul(
@ -85,13 +85,13 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
// TILE-234: %[[szK_1:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
// TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK_1]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM_1]], %[[szN_1]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
//
// TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
@ -172,8 +172,8 @@ func.func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: mem
// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-2: linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]]
@ -187,9 +187,9 @@ func.func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: mem
// TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
// TILE-02: %[[szN_1:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
// TILE-02: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-02: linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
// TILE-002-LABEL: func @matvec(
@ -211,11 +211,11 @@ func.func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: mem
// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
// TILE-234: %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
//
// TILE-234: linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]]
@ -231,9 +231,9 @@ func.func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3
// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[szM_1:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-2: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
// TILE-02-LABEL: func @dot(
@ -248,9 +248,9 @@ func.func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3
// TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
// TILE-234: scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
// TILE-234: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
func.func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {

View File

@ -102,8 +102,8 @@ func.func @dynamic(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100
// CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1]
//
// CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
// CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_2]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[SPLIT_HIGH_3:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
// CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_2]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor<?xf32>
// CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
// CHECK: ins(%[[IN_SLICE_HIGH]]