forked from OSchip/llvm-project
[mlir] Add `distributionTypes` to LinalgTilingOptions.
Differential Revision: https://reviews.llvm.org/D103161
This commit is contained in:
parent
de9df3f5b9
commit
74a89cba8c
|
@ -615,6 +615,12 @@ def Linalg_TiledLoopOp : Linalg_Op<"tiled_loop", [
|
|||
return getBody()->getArguments().take_back(outputs().size());
|
||||
}
|
||||
|
||||
void setDistributionTypes(Builder& b, ArrayRef<StringRef> types) {
|
||||
assert(types.size() == getNumLoops() &&
|
||||
"expected distribution type for every dimension");
|
||||
distribution_typesAttr(b.getStrArrayAttr(types));
|
||||
}
|
||||
|
||||
void setLowerBounds(ValueRange lowerBounds) {
|
||||
unsigned numLoops = getNumLoops();
|
||||
assert(lowerBounds.size() == numLoops &&
|
||||
|
|
|
@ -30,7 +30,8 @@ std::unique_ptr<OperationPass<FuncOp>>
|
|||
createLinalgTilingToParallelLoopsPass(ArrayRef<int64_t> tileSizes = {});
|
||||
|
||||
std::unique_ptr<OperationPass<FuncOp>>
|
||||
createLinalgTilingToTiledLoopPass(ArrayRef<int64_t> tileSizes = {});
|
||||
createLinalgTilingToTiledLoopPass(ArrayRef<int64_t> tileSizes = {},
|
||||
ArrayRef<StringRef> distributionTypes = {});
|
||||
|
||||
std::unique_ptr<OperationPass<FuncOp>>
|
||||
createLinalgPromotionPass(bool dynamicBuffers, bool useAlloca);
|
||||
|
|
|
@ -182,6 +182,9 @@ def LinalgTilingToTiledLoops
|
|||
let constructor = "mlir::createLinalgTilingToTiledLoopPass()";
|
||||
let options = [
|
||||
ListOption<"tileSizes", "linalg-tile-sizes", "int64_t", "Tile sizes",
|
||||
"llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">,
|
||||
ListOption<"distributionTypes", "linalg-distribution-types", "std::string",
|
||||
"DistributionTypes",
|
||||
"llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">
|
||||
];
|
||||
let dependentDialects = [
|
||||
|
|
|
@ -493,6 +493,14 @@ struct LinalgTilingOptions {
|
|||
return *this;
|
||||
}
|
||||
|
||||
/// Specification markers of how to distribute the `linalg.tiled_loop`.
|
||||
SmallVector<StringRef, 2> distributionTypes = {};
|
||||
|
||||
LinalgTilingOptions &setDistributionTypes(ArrayRef<StringRef> types) {
|
||||
distributionTypes.assign(types.begin(), types.end());
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Computation function that returns a padding value to use when padding to
|
||||
/// force static sizes. When `paddingValueComputationFunction` is set, padding
|
||||
/// operations are introduced, that guarantee the underlying op is statically
|
||||
|
|
|
@ -247,7 +247,8 @@ struct GenerateLoopNest {
|
|||
function_ref<scf::ValueVector(OpBuilder &, Location,
|
||||
ValueRange, ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> = None);
|
||||
Optional<LinalgLoopDistributionOptions> = None,
|
||||
ArrayRef<StringRef> distributionTypes = {});
|
||||
};
|
||||
|
||||
} // namespace linalg
|
||||
|
|
|
@ -278,7 +278,8 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
|
|||
return scf::ValueVector(tensorResults.begin(), tensorResults.end());
|
||||
};
|
||||
GenerateLoopNest<LoopTy>::doit(b, op.getLoc(), loopRanges, op, iteratorTypes,
|
||||
tiledLoopBodyBuilder, options.distribution);
|
||||
tiledLoopBodyBuilder, options.distribution,
|
||||
options.distributionTypes);
|
||||
|
||||
// 3. Transform IndexOp results w.r.t. the tiling.
|
||||
transformIndexOps(b, res, ivs, loopIndexToRangeIndex);
|
||||
|
@ -428,11 +429,14 @@ static void insertTilingPatterns(RewritePatternSet &patterns,
|
|||
>::insert(patterns, options);
|
||||
}
|
||||
|
||||
static void applyTilingToLoopPatterns(LinalgTilingLoopType loopType,
|
||||
FuncOp funcOp,
|
||||
ArrayRef<int64_t> tileSizes) {
|
||||
auto options =
|
||||
LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(loopType);
|
||||
static void
|
||||
applyTilingToLoopPatterns(LinalgTilingLoopType loopType, FuncOp funcOp,
|
||||
ArrayRef<int64_t> tileSizes,
|
||||
ArrayRef<StringRef> distributionTypes = {}) {
|
||||
auto options = LinalgTilingOptions()
|
||||
.setTileSizes(tileSizes)
|
||||
.setLoopType(loopType)
|
||||
.setDistributionTypes(distributionTypes);
|
||||
MLIRContext *ctx = funcOp.getContext();
|
||||
RewritePatternSet patterns(ctx);
|
||||
insertTilingPatterns(patterns, options);
|
||||
|
@ -472,11 +476,19 @@ struct LinalgTilingToParallelLoopsPass
|
|||
struct LinalgTilingToTiledLoopsPass
|
||||
: public LinalgTilingToTiledLoopsBase<LinalgTilingToTiledLoopsPass> {
|
||||
LinalgTilingToTiledLoopsPass() = default;
|
||||
LinalgTilingToTiledLoopsPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; }
|
||||
LinalgTilingToTiledLoopsPass(ArrayRef<int64_t> sizes,
|
||||
ArrayRef<StringRef> types) {
|
||||
tileSizes = sizes;
|
||||
distributionTypes = llvm::to_vector<2>(
|
||||
llvm::map_range(types, [](StringRef ref) { return ref.str(); }));
|
||||
}
|
||||
|
||||
void runOnFunction() override {
|
||||
applyTilingToLoopPatterns(LinalgTilingLoopType::TiledLoops, getFunction(),
|
||||
tileSizes);
|
||||
applyTilingToLoopPatterns(
|
||||
LinalgTilingLoopType::TiledLoops, getFunction(), tileSizes,
|
||||
llvm::to_vector<2>(
|
||||
llvm::map_range(distributionTypes,
|
||||
[](std::string &str) { return StringRef(str); })));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -493,6 +505,8 @@ mlir::createLinalgTilingToParallelLoopsPass(ArrayRef<int64_t> tileSizes) {
|
|||
}
|
||||
|
||||
std::unique_ptr<OperationPass<FuncOp>>
|
||||
mlir::createLinalgTilingToTiledLoopPass(ArrayRef<int64_t> tileSizes) {
|
||||
return std::make_unique<LinalgTilingToTiledLoopsPass>(tileSizes);
|
||||
mlir::createLinalgTilingToTiledLoopPass(ArrayRef<int64_t> tileSizes,
|
||||
ArrayRef<StringRef> distributionTypes) {
|
||||
return std::make_unique<LinalgTilingToTiledLoopsPass>(tileSizes,
|
||||
distributionTypes);
|
||||
}
|
||||
|
|
|
@ -198,7 +198,8 @@ void GenerateLoopNest<scf::ForOp>::doit(
|
|||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions) {
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions,
|
||||
ArrayRef<StringRef> distributionTypes) {
|
||||
auto iterArgInitValues = linalgOp.getOutputTensors();
|
||||
// Create procInfo so it dominates loops, if appropriate.
|
||||
SmallVector<ProcInfo, 4> procInfo;
|
||||
|
@ -246,7 +247,7 @@ void GenerateLoopNest<AffineForOp>::doit(
|
|||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions>) {
|
||||
Optional<LinalgLoopDistributionOptions>, ArrayRef<StringRef>) {
|
||||
auto iterArgInitValues = linalgOp.getOutputTensors();
|
||||
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
|
@ -275,7 +276,8 @@ void GenerateLoopNest<TiledLoopOp>::doit(
|
|||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions>) {
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions,
|
||||
ArrayRef<StringRef> distributionTypes) {
|
||||
SmallVector<ProcInfo, 2> procInfo;
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
unpackRanges(loopRanges, lbs, ubs, steps);
|
||||
|
@ -291,6 +293,8 @@ void GenerateLoopNest<TiledLoopOp>::doit(
|
|||
auto tiledLoop = b.create<TiledLoopOp>(
|
||||
loc, lbs, ubs, steps, linalgOp.getInputs(), linalgOp.getOutputs(),
|
||||
b.getArrayAttr(iteratorTypes), wrappedBuilderFn);
|
||||
if (!distributionTypes.empty())
|
||||
tiledLoop.setDistributionTypes(b, distributionTypes);
|
||||
|
||||
// Replace inputs/outputs with the corresponding region args.
|
||||
auto isInsideTiledLoop = [&](OpOperand &operand) {
|
||||
|
@ -446,7 +450,8 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
|
|||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions) {
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions,
|
||||
ArrayRef<StringRef> distributionTypes) {
|
||||
auto iterArgInitValues = linalgOp.getOutputTensors();
|
||||
assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
|
||||
// This function may be passed more iterator types than ranges.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -split-input-file | FileCheck %s
|
||||
// RUN: mlir-opt %s -linalg-tile-to-tiled-loop="linalg-tile-sizes=2,3,4" -split-input-file | FileCheck %s -check-prefix=TLOOP
|
||||
// RUN: mlir-opt %s -linalg-tile-to-tiled-loop="linalg-tile-sizes=2,3,4 linalg-distribution-types=block_x,block_y,none" -split-input-file | FileCheck %s -check-prefix=TLOOP
|
||||
|
||||
// CHECK-LABEL: func @matmul_tensors(
|
||||
// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
|
||||
|
@ -48,7 +48,8 @@ func @matmul_tensors(
|
|||
// TLOOP-SAME: step (%[[C2]], %[[C3]], %[[C4]])
|
||||
// TLOOP-SAME: ins (%[[A0:.*]] = %[[ARG_0]]: [[TY]], %[[A1:.*]] = %[[ARG_1]]: [[TY]])
|
||||
// TLOOP-SAME: outs (%[[A2:.*]] = %[[ARG_2]]: [[TY]])
|
||||
// TLOOP-SAME: iterators["parallel", "parallel", "reduction"] {
|
||||
// TLOOP-SAME: iterators["parallel", "parallel", "reduction"]
|
||||
// TLOOP-SAME: distribution["block_x", "block_y", "none"] {
|
||||
|
||||
// TLOOP: %[[SUB_ARG_0:.*]] = subtensor %[[A0]][%[[I]], %[[K]]]
|
||||
// TLOOP: %[[SUB_ARG_1:.*]] = subtensor %[[A1]][%[[K]], %[[J]]]
|
||||
|
@ -128,26 +129,4 @@ func @generic_op_tensors(
|
|||
// TLOOP-SAME: step (%[[C2]], %[[C3]], %[[C4]])
|
||||
// TLOOP-SAME: ins (%{{.*}} = %[[ARG_0]]: [[TY]], %{{.*}} = %[[ARG_1]]: [[TY]])
|
||||
// TLOOP-SAME: outs (%{{.*}} = %[[INIT]]: [[TY]])
|
||||
|
||||
// -----
|
||||
|
||||
func @fill_tensors(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor<?x?xf32> {
|
||||
%0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
|
||||
%1 = linalg.fill(%0, %arg2) : tensor<?x?xf32>, f32 -> tensor<?x?xf32>
|
||||
return %1 : tensor<?x?xf32>
|
||||
}
|
||||
// CHECK: func @fill_tensors
|
||||
// CHECK: %[[INIT:.+]] = linalg.init_tensor
|
||||
// CHECK: %[[RESULT:.+]] = scf.for %[[IV0:[a-zA-z0-9_]+]]
|
||||
// CHECK-SAME: iter_args(%[[ARG4:.+]] = %[[INIT]]) -> (tensor<?x?xf32>) {
|
||||
// CHECK: %[[YIELD_1:.+]] = scf.for %[[IV1:[a-zA-Z0-9_]+]]
|
||||
// CHECK-SAME: iter_args(%[[ARG6:.+]] = %[[ARG4]]) -> (tensor<?x?xf32>) {
|
||||
// CHECK: %[[FILL_TILE:.+]] = subtensor %[[ARG6]][%[[IV0]], %[[IV1]]]
|
||||
// CHECK: %[[RESULT_TILE:.+]] = linalg.fill(%[[FILL_TILE]], %{{.+}})
|
||||
// CHECK: %[[YIELD_2:.+]] = subtensor_insert %[[RESULT_TILE]]
|
||||
// CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]]
|
||||
// CHECK: scf.yield %[[YIELD_2]]
|
||||
// CHECK: }
|
||||
// CHECK: scf.yield %[[YIELD_1]]
|
||||
// CHECK: }
|
||||
// CHECK: return %[[RESULT]]
|
||||
// TLOOP-SAME: distribution["block_x", "block_y", "none"] {
|
||||
|
|
Loading…
Reference in New Issue