llvm-project/mlir/lib/Dialect/Linalg/Utils/Utils.cpp

396 lines
16 KiB
C++

//===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements utilities for the Linalg dialect.
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
using namespace mlir;
using namespace mlir::linalg;
using namespace mlir::scf;
Optional<RegionMatcher::BinaryOpKind>
RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
auto &region = op.region();
if (!llvm::hasSingleElement(region))
return llvm::None;
Block &block = region.front();
if (block.getNumArguments() != 2 ||
!block.getArgument(0).getType().isSignlessIntOrFloat() ||
!block.getArgument(1).getType().isSignlessIntOrFloat())
return llvm::None;
auto &ops = block.getOperations();
if (!llvm::hasSingleElement(block.without_terminator()))
return llvm::None;
using mlir::matchers::m_Val;
auto a = m_Val(block.getArgument(0));
auto b = m_Val(block.getArgument(1));
auto addPattern = m_Op<linalg::YieldOp>(m_Op<AddIOp>(a, b));
if (addPattern.match(&ops.back()))
return BinaryOpKind::IAdd;
return llvm::None;
}
bool mlir::linalg::isParallelIteratorType(Attribute attr) {
if (auto strAttr = attr.dyn_cast<StringAttr>()) {
return strAttr.getValue() == getParallelIteratorTypeName();
}
return false;
}
bool mlir::linalg::isReductionIteratorType(Attribute attr) {
if (auto strAttr = attr.dyn_cast<StringAttr>()) {
return strAttr.getValue() == getReductionIteratorTypeName();
}
return false;
}
bool mlir::linalg::isWindowIteratorType(Attribute attr) {
if (auto strAttr = attr.dyn_cast<StringAttr>()) {
return strAttr.getValue() == getWindowIteratorTypeName();
}
return false;
}
/// Explicit instantiation of loop nest generator for different loop types.
template struct mlir::linalg::GenerateLoopNest<scf::ForOp>;
template struct mlir::linalg::GenerateLoopNest<scf::ParallelOp>;
template struct mlir::linalg::GenerateLoopNest<AffineForOp>;
/// Given a list of subview ranges, extract individual values for lower, upper
/// bounds and steps and put them into the corresponding vectors.
static void unpackRanges(ArrayRef<Range> ranges, SmallVectorImpl<Value> &lbs,
SmallVectorImpl<Value> &ubs,
SmallVectorImpl<Value> &steps) {
for (Range range : ranges) {
lbs.emplace_back(range.offset);
ubs.emplace_back(range.size);
steps.emplace_back(range.stride);
}
}
namespace mlir {
namespace linalg {
SmallVector<int64_t, 8> getStaticShape(LinalgOp linalgOp) {
SmallVector<int64_t, 8> res;
for (Value v : linalgOp.getShapedOperands()) {
auto shape = v.getType().cast<ShapedType>().getShape();
res.append(shape.begin(), shape.end());
}
return res;
}
Optional<SmallVector<int64_t, 4>> getStaticLoopRanges(LinalgOp linalgOp) {
SmallVector<int64_t, 8> viewSizes = getStaticShape(linalgOp);
AffineMap invertedMap = linalgOp.getShapesToLoopsMap();
if (!invertedMap)
return {};
return invertedMap.compose(viewSizes);
}
/// If `size` comes from an AffineMinOp and one of the values of AffineMinOp
/// is a constant then return a new value set to the smallest such constant.
/// Otherwise returngetSmallestBoundingIndex nullptr.
IntegerAttr getSmallestBoundingIndex(Value size) {
Optional<int64_t> boundingConst = {};
if (auto affineMinOp = size.getDefiningOp<AffineMinOp>()) {
for (auto e : affineMinOp.getAffineMap().getResults())
if (auto cst = e.dyn_cast<AffineConstantExpr>())
boundingConst = boundingConst
? std::min(boundingConst.getValue(), cst.getValue())
: cst.getValue();
} else if (auto constIndexOp = size.getDefiningOp<ConstantOp>()) {
if (constIndexOp.getType().isa<IndexType>())
boundingConst = constIndexOp.value().cast<IntegerAttr>().getInt();
} else if (auto affineApplyOp = size.getDefiningOp<AffineApplyOp>()) {
if (auto cExpr = affineApplyOp.getAffineMap()
.getResult(0)
.dyn_cast<AffineConstantExpr>())
boundingConst = cExpr.getValue();
}
if (boundingConst && *boundingConst >= 0)
return Builder(size.getContext()).getIndexAttr(*boundingConst);
return nullptr;
}
/// Specialization to build an scf "for" nest.
template <>
void GenerateLoopNest<scf::ForOp>::doit(
ArrayRef<Range> loopRanges, ValueRange iterArgInitValues,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {
// Create procInfo so it dominates loops, if appropriate.
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();
SmallVector<ProcInfo, 2> procInfo;
if (distributionOptions.hasValue())
procInfo = distributionOptions->procInfo(builder, loc, loopRanges);
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
LoopNest loopNest =
edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
if (!distributionOptions.hasValue() || loopNest.loops.empty())
return;
// Only supports cyclic distribution for now.
for (auto it : llvm::zip(loopNest.loops, procInfo,
distributionOptions->distributionMethod))
if (std::get<2>(it) == DistributionMethod::Cyclic)
mapLoopToProcessorIds(std::get<0>(it), std::get<1>(it).procId,
std::get<1>(it).nprocs);
}
/// Specialization to build affine "for" nest.
template <>
void GenerateLoopNest<AffineForOp>::doit(
ArrayRef<Range> loopRanges, ValueRange iterArgInitValues,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
Optional<LinalgLoopDistributionOptions>) {
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
// Affine loops require constant steps.
SmallVector<int64_t, 4> constantSteps;
constantSteps.reserve(steps.size());
for (Value v : steps) {
auto op = v.getDefiningOp<ConstantIndexOp>();
assert(op && "Affine loops require constant steps");
constantSteps.push_back(op.getValue());
}
auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
bodyBuilderFn(ivs, {});
};
edsc::affineLoopNestBuilder(lbs, ubs, constantSteps,
bodyBuilderWithoutIterArgsFn);
}
/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
static void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
Value procId, Value nprocs,
Value &lb, Value &ub,
Value &step) {
using edsc::op::operator+;
using edsc::op::operator*;
lb = lb + (procId * step);
step = nprocs * step;
}
/// Generates a loop nest consisting of scf.parallel and scf.for, depending
/// on the `iteratorTypes.` Consecutive parallel loops create a single
/// scf.parallel operation; each sequential loop creates a new scf.for
/// operation. The body of the innermost loop is populated by
/// `bodyBuilderFn` that accepts a range of induction variables for all
/// loops. `ivStorage` is used to store the partial list of induction
/// variables.
// TODO: this function can be made iterative instead. However, it
// will have at most as many recursive calls as nested loops, which rarely
// exceeds 10.
static void
generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
ArrayRef<Attribute> iteratorTypes,
function_ref<void(ValueRange)> bodyBuilderFn,
SmallVectorImpl<Value> &ivStorage,
ArrayRef<DistributionMethod> distributionMethod = {}) {
assert(lbs.size() == ubs.size());
assert(lbs.size() == steps.size());
assert(lbs.size() == iteratorTypes.size());
// If there are no (more) loops to be generated, generate the body and be
// done with it.
if (iteratorTypes.empty())
return bodyBuilderFn(ivStorage);
// Find the outermost parallel loops and drop their types from the list.
unsigned nLoops = iteratorTypes.size();
unsigned nOuterPar =
nLoops - iteratorTypes.drop_while(isParallelIteratorType).size();
// If there are no outer parallel loops, generate one sequential loop and
// recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
// in this case.
if (nOuterPar == 0) {
edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) {
ivStorage.push_back(iv);
generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(),
steps.drop_front(), iteratorTypes.drop_front(),
bodyBuilderFn, ivStorage, distributionMethod);
});
return;
}
if (distributionMethod.empty()) {
// Generate a single parallel loop-nest operation for all outermost
// parallel loops and recurse.
edsc::OperationBuilder<scf::ParallelOp>(
lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
steps.take_front(nOuterPar),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
edsc::ScopedContext context(nestedBuilder, nestedLoc);
ivStorage.append(localIvs.begin(), localIvs.end());
generateParallelLoopNest(
lbs.drop_front(nOuterPar), ubs.drop_front(nOuterPar),
steps.drop_front(nOuterPar), iteratorTypes.drop_front(nOuterPar),
bodyBuilderFn, ivStorage,
(distributionMethod.size() < nOuterPar)
? ArrayRef<DistributionMethod>()
: distributionMethod.drop_front(nOuterPar));
});
return;
}
// Process all consecutive similarly distributed loops simultaneously.
DistributionMethod methodToUse = distributionMethod[0];
unsigned numProcessed = 1;
for (unsigned i = 1; i < nOuterPar && i < distributionMethod.size(); ++i) {
if (distributionMethod[i] != methodToUse)
break;
numProcessed++;
}
switch (methodToUse) {
case DistributionMethod::Cyclic: {
// Generate a single parallel loop-nest operation for all outermost
// parallel loops and recurse.
edsc::OperationBuilder<scf::ParallelOp>(
lbs.take_front(numProcessed), ubs.take_front(numProcessed),
steps.take_front(numProcessed),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
edsc::ScopedContext context(nestedBuilder, nestedLoc);
ivStorage.append(localIvs.begin(), localIvs.end());
generateParallelLoopNest(
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed),
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
(distributionMethod.size() < numProcessed)
? ArrayRef<DistributionMethod>()
: distributionMethod.drop_front(numProcessed));
});
return;
}
case DistributionMethod::CyclicNumProcsGeNumIters: {
// Check (for the processed loops) that the iteration is in-bounds.
using edsc::op::slt;
using edsc::op::operator&&;
Value cond = slt(lbs[0], ubs[0]);
for (unsigned i = 1; i < numProcessed; ++i)
cond = cond && slt(lbs[i], ubs[i]);
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
edsc::conditionBuilder(cond, [&]() {
generateParallelLoopNest(
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed),
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
distributionMethod.drop_front(numProcessed));
});
return;
}
case DistributionMethod::CyclicNumProcsEqNumIters:
// No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
// with inner loop generation.
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
generateParallelLoopNest(
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
return;
}
}
/// Specialization for generating a mix of parallel and sequential scf loops.
template <>
void GenerateLoopNest<scf::ParallelOp>::doit(
ArrayRef<Range> loopRanges, ValueRange iterArgInitValues,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {
assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
// This function may be passed more iterator types than ranges.
assert(iteratorTypes.size() >= loopRanges.size() &&
"expected iterator type for all ranges");
iteratorTypes = iteratorTypes.take_front(loopRanges.size());
SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
unsigned numLoops = iteratorTypes.size();
ivs.reserve(numLoops);
lbsStorage.reserve(numLoops);
ubsStorage.reserve(numLoops);
stepsStorage.reserve(numLoops);
// Get the loop lb, ub, and step.
unpackRanges(loopRanges, lbsStorage, ubsStorage, stepsStorage);
// Modify the lb, ub, and step based on the distribution options.
SmallVector<DistributionMethod, 0> distributionMethod;
if (distributionOptions) {
auto &options = distributionOptions.getValue();
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();
distributionMethod.assign(distributionOptions->distributionMethod.begin(),
distributionOptions->distributionMethod.end());
SmallVector<Range, 2> parallelLoopRanges;
for (auto iteratorType : enumerate(iteratorTypes)) {
if (isParallelIteratorType(iteratorType.value()))
parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
}
if (distributionMethod.size() < parallelLoopRanges.size())
parallelLoopRanges.resize(distributionMethod.size());
SmallVector<ProcInfo, 2> procInfo =
options.procInfo(builder, loc, parallelLoopRanges);
unsigned index = 0;
for (auto iteratorType : enumerate(iteratorTypes)) {
if (index >= procInfo.size())
break;
if (isParallelIteratorType(iteratorType.value())) {
unsigned i = iteratorType.index();
updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId,
procInfo[index].nprocs, lbsStorage[i],
ubsStorage[i], stepsStorage[i]);
index++;
}
}
}
ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
bodyBuilderFn(ivs, {});
};
generateParallelLoopNest(lbs, ubs, steps, iteratorTypes,
bodyBuilderWithoutIterArgsFn, ivs,
distributionMethod);
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
}
} // namespace linalg
} // namespace mlir