[mlir][SCF] NFC - Drop SCF EDSC usage

Drop the SCF dialect EDSC subdirectory and update all uses.

Differential Revision: https://reviews.llvm.org/D102780
This commit is contained in:
Nicolas Vasilache 2021-05-19 15:41:54 +00:00
parent 9383e9c1e6
commit 84a880e1e2
16 changed files with 989 additions and 1472 deletions

View File

@ -89,132 +89,6 @@ Value uge(Value lhs, Value rhs);
} // namespace op
/// Arithmetic operator overloadings.
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator+(Value e) {
using op::operator+;
return static_cast<Value>(*this) + e;
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator-(Value e) {
using op::operator-;
return static_cast<Value>(*this) - e;
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator*(Value e) {
using op::operator*;
return static_cast<Value>(*this) * e;
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator/(Value e) {
using op::operator/;
return static_cast<Value>(*this) / e;
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator%(Value e) {
using op::operator%;
return static_cast<Value>(*this) % e;
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator^(Value e) {
using op::operator^;
return static_cast<Value>(*this) ^ e;
}
/// Assignment-arithmetic operator overloadings.
template <typename Load, typename Store>
Store TemplatedIndexedValue<Load, Store>::operator+=(Value e) {
using op::operator+;
return Store(*this + e, getBase(), indices);
}
template <typename Load, typename Store>
Store TemplatedIndexedValue<Load, Store>::operator-=(Value e) {
using op::operator-;
return Store(*this - e, getBase(), indices);
}
template <typename Load, typename Store>
Store TemplatedIndexedValue<Load, Store>::operator*=(Value e) {
using op::operator*;
return Store(*this * e, getBase(), indices);
}
template <typename Load, typename Store>
Store TemplatedIndexedValue<Load, Store>::operator/=(Value e) {
using op::operator/;
return Store(*this / e, getBase(), indices);
}
template <typename Load, typename Store>
Store TemplatedIndexedValue<Load, Store>::operator%=(Value e) {
using op::operator%;
return Store(*this % e, getBase(), indices);
}
template <typename Load, typename Store>
Store TemplatedIndexedValue<Load, Store>::operator^=(Value e) {
using op::operator^;
return Store(*this ^ e, getBase(), indices);
}
/// Logical operator overloadings.
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator&&(Value e) {
using op::operator&&;
return static_cast<Value>(*this) && e;
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::operator||(Value e) {
using op::operator||;
return static_cast<Value>(*this) || e;
}
/// Comparison operator overloadings.
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::eq(Value e) {
return eq(value, e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::ne(Value e) {
return ne(value, e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::slt(Value e) {
using op::slt;
return slt(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::sle(Value e) {
using op::sle;
return sle(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::sgt(Value e) {
using op::sgt;
return sgt(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::sge(Value e) {
using op::sge;
return sge(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::ult(Value e) {
using op::ult;
return ult(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::ule(Value e) {
using op::ule;
return ule(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::ugt(Value e) {
using op::ugt;
return ugt(static_cast<Value>(*this), e);
}
template <typename Load, typename Store>
Value TemplatedIndexedValue<Load, Store>::uge(Value e) {
using op::uge;
return uge(static_cast<Value>(*this), e);
}
} // namespace edsc
} // namespace mlir

View File

@ -21,9 +21,6 @@ using affine_min = ValueBuilder<AffineMinOp>;
using affine_max = ValueBuilder<AffineMaxOp>;
using affine_store = OperationBuilder<AffineStoreOp>;
/// Provide an index notation around affine_load and affine_store.
using AffineIndexedValue = TemplatedIndexedValue<affine_load, affine_store>;
} // namespace intrinsics
} // namespace edsc
} // namespace mlir

View File

@ -244,19 +244,15 @@ struct RegionMatcher {
/// Utility class used to generate nested loops with ranges described by
/// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
/// is used to generate the body of the innermost loop. It is passed a range
/// of loop induction variables.
/// of loop induction variables and a range of iterArgs.
template <typename LoopTy>
struct GenerateLoopNest {
using IndexedValueTy =
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
edsc::intrinsics::AffineIndexedValue,
edsc::intrinsics::MemRefIndexedValue>::type;
static void
doit(ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> = None);
static void doit(OpBuilder &b, Location loc, ArrayRef<Range> loopRanges,
LinalgOp linalgOp, ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(OpBuilder &, Location,
ValueRange, ValueRange)>
bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> = None);
};
} // namespace linalg

View File

@ -31,9 +31,6 @@ using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
using memref_view = ValueBuilder<memref::ViewOp>;
/// Provide an index notation around memref_load and memref_store.
using MemRefIndexedValue =
TemplatedIndexedValue<intrinsics::memref_load, intrinsics::memref_store>;
} // namespace intrinsics
} // namespace edsc
} // namespace mlir

View File

@ -1,56 +0,0 @@
//===- Builders.h - MLIR Declarative Builder Classes ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Provides intuitive composable interfaces for building structured MLIR
// snippets in a declarative fashion.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
#define MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/EDSC/Builders.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Types.h"
namespace mlir {
namespace edsc {
/// Adapters for building loop nests using the builder and the location stored
/// in ScopedContext. Actual builders are in scf::buildLoopNest.
scf::LoopNest loopNestBuilder(ValueRange lbs, ValueRange ubs,
ValueRange steps,
function_ref<void(ValueRange)> fun = nullptr);
scf::LoopNest loopNestBuilder(Value lb, Value ub, Value step,
function_ref<void(Value)> fun = nullptr);
scf::LoopNest loopNestBuilder(
Value lb, Value ub, Value step, ValueRange iterArgInitValues,
function_ref<scf::ValueVector(Value, ValueRange)> fun = nullptr);
scf::LoopNest loopNestBuilder(
ValueRange lbs, ValueRange ubs, ValueRange steps,
ValueRange iterArgInitValues,
function_ref<scf::ValueVector(ValueRange, ValueRange)> fun = nullptr);
/// Adapters for building if conditions using the builder and the location
/// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted
/// if the condition should not have an 'else' part.
/// When `ifOp` is specified, the scf::IfOp is captured. This is particularly
/// convenient for 0-result conditions.
ValueRange conditionBuilder(TypeRange results, Value condition,
function_ref<scf::ValueVector()> thenBody,
function_ref<scf::ValueVector()> elseBody = nullptr,
scf::IfOp *ifOp = nullptr);
ValueRange conditionBuilder(Value condition, function_ref<void()> thenBody,
function_ref<void()> elseBody = nullptr,
scf::IfOp *ifOp = nullptr);
} // namespace edsc
} // namespace mlir
#endif // MLIR_DIALECT_SCF_EDSC_BUILDERS_H_

View File

@ -1,24 +0,0 @@
//===- Intrinsics.h - MLIR EDSC Intrinsics for SCF --------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM
// Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
#define MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
#include "mlir/Dialect/SCF/EDSC/Builders.h"
namespace mlir {
namespace edsc {
namespace intrinsics {
using loop_yield = OperationBuilder<scf::YieldOp>;
} // namespace intrinsics
} // namespace edsc
} // namespace mlir
#endif // MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_

View File

@ -169,172 +169,6 @@ private:
SmallVector<AffineExpr, 4> exprs;
};
/// A TemplatedIndexedValue brings an index notation over the template Load and
/// Store parameters. Assigning to an IndexedValue emits an actual `Store`
/// operation, while converting an IndexedValue to a Value emits an actual
/// `Load` operation.
template <typename Load, typename Store>
class TemplatedIndexedValue {
public:
explicit TemplatedIndexedValue(Value v) : value(v) {}
TemplatedIndexedValue(const TemplatedIndexedValue &rhs) = default;
TemplatedIndexedValue operator()() { return *this; }
/// Returns a new `TemplatedIndexedValue`.
TemplatedIndexedValue operator()(Value index) {
TemplatedIndexedValue res(value);
res.indices.push_back(index);
return res;
}
template <typename... Args>
TemplatedIndexedValue operator()(Value index, Args... indices) {
return TemplatedIndexedValue(value, index).append(indices...);
}
TemplatedIndexedValue operator()(ValueRange indices) {
return TemplatedIndexedValue(value, indices);
}
/// Emits a `store`.
Store operator=(const TemplatedIndexedValue &rhs) {
return Store(rhs, value, indices);
}
Store operator=(Value rhs) { return Store(rhs, value, indices); }
/// Emits a `load` when converting to a Value.
operator Value() const { return Load(value, indices); }
/// Returns the base memref.
Value getBase() const { return value; }
/// Returns the underlying memref.
MemRefType getMemRefType() const {
return value.getType().template cast<MemRefType>();
}
/// Returns the underlying MemRef elemental type cast as `T`.
template <typename T>
T getElementalTypeAs() const {
return value.getType()
.template cast<MemRefType>()
.getElementType()
.template cast<T>();
}
/// Arithmetic operator overloadings.
Value operator+(Value e);
Value operator-(Value e);
Value operator*(Value e);
Value operator/(Value e);
Value operator%(Value e);
Value operator^(Value e);
Value operator+(TemplatedIndexedValue e) {
return *this + static_cast<Value>(e);
}
Value operator-(TemplatedIndexedValue e) {
return *this - static_cast<Value>(e);
}
Value operator*(TemplatedIndexedValue e) {
return *this * static_cast<Value>(e);
}
Value operator/(TemplatedIndexedValue e) {
return *this / static_cast<Value>(e);
}
Value operator%(TemplatedIndexedValue e) {
return *this % static_cast<Value>(e);
}
Value operator^(TemplatedIndexedValue e) {
return *this ^ static_cast<Value>(e);
}
/// Assignment-arithmetic operator overloadings.
Store operator+=(Value e);
Store operator-=(Value e);
Store operator*=(Value e);
Store operator/=(Value e);
Store operator%=(Value e);
Store operator^=(Value e);
Store operator+=(TemplatedIndexedValue e) {
return this->operator+=(static_cast<Value>(e));
}
Store operator-=(TemplatedIndexedValue e) {
return this->operator-=(static_cast<Value>(e));
}
Store operator*=(TemplatedIndexedValue e) {
return this->operator*=(static_cast<Value>(e));
}
Store operator/=(TemplatedIndexedValue e) {
return this->operator/=(static_cast<Value>(e));
}
Store operator%=(TemplatedIndexedValue e) {
return this->operator%=(static_cast<Value>(e));
}
Store operator^=(TemplatedIndexedValue e) {
return this->operator^=(static_cast<Value>(e));
}
/// Logical operator overloadings.
Value operator&&(Value e);
Value operator||(Value e);
Value operator&&(TemplatedIndexedValue e) {
return *this && static_cast<Value>(e);
}
Value operator||(TemplatedIndexedValue e) {
return *this || static_cast<Value>(e);
}
/// Comparison operator overloadings.
Value eq(Value e);
Value ne(Value e);
Value slt(Value e);
Value sle(Value e);
Value sgt(Value e);
Value sge(Value e);
Value ult(Value e);
Value ule(Value e);
Value ugt(Value e);
Value uge(Value e);
Value slt(TemplatedIndexedValue e) {
return slt(*this, static_cast<Value>(e));
}
Value sle(TemplatedIndexedValue e) {
return sle(*this, static_cast<Value>(e));
}
Value sgt(TemplatedIndexedValue e) {
return sgt(*this, static_cast<Value>(e));
}
Value sge(TemplatedIndexedValue e) {
return sge(*this, static_cast<Value>(e));
}
Value ult(TemplatedIndexedValue e) {
return ult(*this, static_cast<Value>(e));
}
Value ule(TemplatedIndexedValue e) {
return ule(*this, static_cast<Value>(e));
}
Value ugt(TemplatedIndexedValue e) {
return ugt(*this, static_cast<Value>(e));
}
Value uge(TemplatedIndexedValue e) {
return uge(*this, static_cast<Value>(e));
}
private:
TemplatedIndexedValue(Value value, ValueRange indices)
: value(value), indices(indices.begin(), indices.end()) {}
TemplatedIndexedValue &append() { return *this; }
template <typename T, typename... Args>
TemplatedIndexedValue &append(T index, Args... indices) {
this->indices.push_back(static_cast<Value>(index));
append(indices...);
return *this;
}
Value value;
SmallVector<Value, 8> indices;
};
} // namespace edsc
} // namespace mlir

View File

@ -14,7 +14,7 @@
#include "mlir/Dialect/GPU/MemoryPromotion.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
@ -41,7 +41,7 @@ static StringRef getDimName(unsigned dim) {
/// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
/// single-iteration loops. Maps the innermost loops to thread dimensions, in
/// reverse order to enable access coalescing in the innermost loop.
static void insertCopyLoops(OpBuilder &builder, Location loc,
static void insertCopyLoops(OpBuilder &b, Location loc,
MemRefBoundsCapture &bounds, Value from, Value to) {
// Create EDSC handles for bounds.
unsigned rank = bounds.rank();
@ -68,24 +68,24 @@ static void insertCopyLoops(OpBuilder &builder, Location loc,
[](int64_t step) { return std_constant_index(step); });
// Obtain thread identifiers and block sizes, necessary to map to them.
auto indexType = builder.getIndexType();
auto indexType = b.getIndexType();
SmallVector<Value, 3> threadIds, blockDims;
for (unsigned i = 0; i < 3; ++i) {
auto dimName = builder.getStringAttr(getDimName(i));
threadIds.push_back(
builder.create<gpu::ThreadIdOp>(loc, indexType, dimName));
blockDims.push_back(
builder.create<gpu::BlockDimOp>(loc, indexType, dimName));
auto dimName = b.getStringAttr(getDimName(i));
threadIds.push_back(b.create<gpu::ThreadIdOp>(loc, indexType, dimName));
blockDims.push_back(b.create<gpu::BlockDimOp>(loc, indexType, dimName));
}
// Produce the loop nest with copies.
SmallVector<Value, 8> ivs(lbs.size());
loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
ivs.assign(loopIvs.begin(), loopIvs.end());
auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
MemRefIndexedValue fromHandle(from), toHandle(to);
toHandle(activeIvs) = fromHandle(activeIvs);
});
mlir::scf::buildLoopNest(
b, loc, lbs, ubs, steps,
[&](OpBuilder &b, Location loc, ValueRange loopIvs) {
ivs.assign(loopIvs.begin(), loopIvs.end());
auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
Value loaded = b.create<memref::LoadOp>(loc, from, activeIvs);
b.create<memref::StoreOp>(loc, loaded, to, activeIvs);
});
// Map the innermost loops to threads in reverse order.
for (auto en :
@ -142,17 +142,17 @@ static void insertCopies(Region &region, Location loc, Value from, Value to) {
assert(llvm::hasSingleElement(region) &&
"unstructured control flow not supported");
OpBuilder builder(region.getContext());
builder.setInsertionPointToStart(&region.front());
OpBuilder b(region.getContext());
b.setInsertionPointToStart(&region.front());
ScopedContext edscContext(builder, loc);
ScopedContext edscContext(b, loc);
MemRefBoundsCapture fromBoundsCapture(from);
insertCopyLoops(builder, loc, fromBoundsCapture, from, to);
builder.create<gpu::BarrierOp>(loc);
insertCopyLoops(b, loc, fromBoundsCapture, from, to);
b.create<gpu::BarrierOp>(loc);
builder.setInsertionPoint(&region.front().back());
builder.create<gpu::BarrierOp>(loc);
insertCopyLoops(builder, loc, fromBoundsCapture, to, from);
b.setInsertionPoint(&region.front().back());
b.create<gpu::BarrierOp>(loc);
insertCopyLoops(b, loc, fromBoundsCapture, to, from);
}
/// Promotes a function argument to workgroup memory in the given function. The

View File

@ -11,7 +11,6 @@
#include "mlir/Dialect/Linalg/EDSC/Builders.h"
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
#include "mlir/Dialect/Math/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/IR/AffineExpr.h"

View File

@ -7,16 +7,11 @@
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/BlockAndValueMapping.h"
@ -27,38 +22,67 @@
#include "llvm/ADT/TypeSwitch.h"
using namespace mlir;
using namespace mlir::edsc;
using namespace mlir::edsc::intrinsics;
using namespace mlir::linalg;
using edsc::op::operator+;
namespace {
/// Helper struct to build simple arithmetic quantities with minimal type
/// inference support.
struct ArithBuilder {
ArithBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {}
static SmallVector<Value, 8> makeCanonicalAffineApplies(OpBuilder &b,
Location loc,
AffineMap map,
ArrayRef<Value> vals) {
Value select(Value cmp, Value lhs, Value rhs) {
return b.create<SelectOp>(loc, cmp, lhs, rhs);
}
Value slt(Value lhs, Value rhs) {
if (lhs.getType().isa<IntegerType>())
return b.create<CmpIOp>(loc, CmpIPredicate::slt, lhs, rhs);
return b.create<CmpFOp>(loc, CmpFPredicate::OLT, lhs, rhs);
}
Value sgt(Value lhs, Value rhs) {
if (lhs.getType().isa<IntegerType>())
return b.create<CmpIOp>(loc, CmpIPredicate::sgt, lhs, rhs);
return b.create<CmpFOp>(loc, CmpFPredicate::OGT, lhs, rhs);
}
Value add(Value lhs, Value rhs) {
if (lhs.getType().isa<IntegerType>())
return b.create<AddIOp>(loc, lhs, rhs);
return b.create<AddFOp>(loc, lhs, rhs);
}
Value mul(Value lhs, Value rhs) {
if (lhs.getType().isa<IntegerType>())
return b.create<MulIOp>(loc, lhs, rhs);
return b.create<MulFOp>(loc, lhs, rhs);
}
OpBuilder &b;
Location loc;
};
} // namespace
static SmallVector<Value> makeCanonicalAffineApplies(OpBuilder &b, Location loc,
AffineMap map,
ArrayRef<Value> vals) {
if (map.isEmpty())
return {};
assert(map.getNumInputs() == vals.size());
SmallVector<Value, 8> res;
SmallVector<Value> res;
res.reserve(map.getNumResults());
auto dims = map.getNumDims();
for (auto e : map.getResults()) {
auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e);
SmallVector<Value, 4> operands(vals.begin(), vals.end());
SmallVector<Value> operands(vals.begin(), vals.end());
canonicalizeMapAndOperands(&exprMap, &operands);
res.push_back(affine_apply(exprMap, operands));
res.push_back(b.create<AffineApplyOp>(loc, exprMap, operands));
}
return res;
}
template <typename IndexedValueType, typename OpType>
static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
ArrayRef<SmallVector<Value, 8>> indexing,
template <typename LoadOpTy, typename StoreOpTy, typename OpType>
static void inlineRegionAndEmitStore(OpBuilder &b, Location loc, OpType op,
ArrayRef<Value> indexedValues,
ArrayRef<SmallVector<Value>> indexing,
ArrayRef<Value> outputBuffers) {
assert(op->getNumRegions() == 1 && "Expected single region op");
auto &b = ScopedContext::getBuilderRef();
auto &block = op->getRegion(0).front();
BlockAndValueMapping map;
map.map(block.getArguments(), indexedValues);
@ -67,26 +91,24 @@ static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
map.map(op.getResults(), newOp->getResults());
}
Operation &terminator = block.back();
assert(isa<linalg::YieldOp>(terminator) &&
"expected a yield op in the end of the region");
for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {
IndexedValueType O(outputBuffers[i]);
O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i));
Operation *terminator = block.getTerminator();
for (OpOperand &operand : terminator->getOpOperands()) {
Value toStore = map.lookupOrDefault(operand.get());
b.create<StoreOpTy>(loc, toStore, outputBuffers[operand.getOperandNumber()],
indexing[operand.getOperandNumber()]);
}
}
// Returns a pair that contains input indices and output indices of a
// SingleInputPoolingOp `op`.
struct InputAndOutputIndices {
SmallVector<Value, 8> inputs;
SmallVector<Value, 8> outputs;
SmallVector<Value> inputs;
SmallVector<Value> outputs;
};
template <typename SingleInputPoolingOp>
static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
SingleInputPoolingOp op) {
auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();
static InputAndOutputIndices
getInputAndOutputIndices(OpBuilder &b, Location loc, ArrayRef<Value> allIvs,
SingleInputPoolingOp op) {
auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
@ -125,19 +147,18 @@ static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
/// }
/// }
/// ```
template <typename IndexedValueType>
static void emitScalarImplementation(ArrayRef<Value> allIvs,
template <typename LoadOpTy, typename StoreOpTy>
static void emitScalarImplementation(OpBuilder &b, Location loc,
ArrayRef<Value> allIvs,
LinalgOp linalgOp) {
assert(linalgOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();
unsigned nInputs = linalgOp.getNumInputs();
unsigned nOutputs = linalgOp.getNumOutputs();
SmallVector<Value, 4> indexedValues;
SmallVector<Value> indexedValues;
indexedValues.reserve(nInputs + nOutputs);
auto allIvsPlusDims = SmallVector<Value, 4>(allIvs.begin(), allIvs.end());
auto allIvsPlusDims = SmallVector<Value>(allIvs.begin(), allIvs.end());
// TODO: Avoid the loads if the corresponding argument of the
// region has no uses.
@ -145,46 +166,40 @@ static void emitScalarImplementation(ArrayRef<Value> allIvs,
for (unsigned i = 0; i < nInputs; ++i) {
auto indexing = makeCanonicalAffineApplies(
b, loc, linalgOp.getInputIndexingMap(i), allIvsPlusDims);
// Passing through IndexedValueType emits the proper load operation.
indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing));
indexedValues.push_back(
b.create<LoadOpTy>(loc, linalgOp.getInput(i), indexing));
}
// 1.b. Emit load from output views.
for (unsigned i = 0; i < nOutputs; ++i) {
auto indexing = makeCanonicalAffineApplies(
b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims);
// Passing through IndexedValueType emits the proper load operation.
indexedValues.push_back(
IndexedValueType(linalgOp.getOutputBuffer(i))(indexing));
b.create<LoadOpTy>(loc, linalgOp.getOutputBuffer(i), indexing));
}
// TODO: When a region inliner exists, use it.
// 2. Inline region, currently only works for a single basic block.
// 3. Emit store.
SmallVector<SmallVector<Value, 8>, 8> indexing;
SmallVector<Value, 8> outputBuffers;
SmallVector<SmallVector<Value>, 8> indexing;
SmallVector<Value> outputBuffers;
for (unsigned i = 0; i < nOutputs; ++i) {
indexing.push_back(makeCanonicalAffineApplies(
b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims));
outputBuffers.push_back(linalgOp.getOutputBuffer(i));
}
inlineRegionAndEmitStore<IndexedValueType>(linalgOp, indexedValues, indexing,
outputBuffers);
inlineRegionAndEmitStore<LoadOpTy, StoreOpTy>(b, loc, linalgOp, indexedValues,
indexing, outputBuffers);
}
// Create a padded view into the given `input` tensor using the 'indices'
// to access the tensor. `skipPadding` lists the dimensions for which no padding
// is needed e.g. the non-spatial dimensions for convolutions.
template <typename IndexedValueType>
Value getPaddedInput(Value input, ArrayRef<Value> indices,
ArrayRef<int> skipPadding, Value padValue) {
// TODO: add a level of indirection to linalg.generic.
IndexedValueType indexedInput(input);
auto *context = ScopedContext::getContext();
Value zeroIndex = std_constant_index(0);
SmallVector<Value, 8> conds;
SmallVector<Value, 8> clampedImIdx;
Value getPaddedInput(OpBuilder &b, Location loc, Value input,
ArrayRef<Value> indices, ArrayRef<int> skipPadding,
Value padValue) {
Value zeroIndex = b.create<ConstantIndexOp>(loc, 0);
SmallVector<Value> conds;
SmallVector<Value> clampedImIdx;
for (auto iter : llvm::enumerate(indices)) {
int idx = iter.index();
auto dim = iter.value();
@ -193,29 +208,33 @@ Value getPaddedInput(Value input, ArrayRef<Value> indices,
continue;
}
using edsc::op::sge;
using edsc::op::slt;
using edsc::op::operator||;
Value leftOutOfBound = slt(dim, zeroIndex);
Value leftOutOfBound =
b.create<CmpIOp>(loc, CmpIPredicate::slt, dim, zeroIndex);
if (conds.empty())
conds.push_back(leftOutOfBound);
else
conds.push_back(conds.back() || leftOutOfBound);
Value rightBound = memref_dim(input, idx);
conds.push_back(conds.back() || (sge(dim, rightBound)));
conds.push_back(b.create<OrOp>(loc, conds.back(), leftOutOfBound));
Value rightBound = b.create<memref::DimOp>(loc, input, idx);
Value rightOutOfBound =
b.create<CmpIOp>(loc, CmpIPredicate::sge, dim, rightBound);
conds.push_back(b.create<OrOp>(loc, conds.back(), rightOutOfBound));
// When padding is involved, the indices will only be shifted to negative,
// so having a max op is enough.
auto maxMap = AffineMap::get(/*dimCount=*/1, 0,
{getAffineDimExpr(/*position=*/0, context),
getAffineConstantExpr(0, context)},
context);
clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));
MLIRContext *ctx = input.getContext();
AffineExpr m = getAffineDimExpr(/*position=*/0, ctx),
zero = getAffineConstantExpr(0, ctx);
AffineMap maxMap =
AffineMap::inferFromExprList(ArrayRef<ArrayRef<AffineExpr>>{{m, zero}})
.front();
clampedImIdx.push_back(b.create<AffineMaxOp>(loc, maxMap, ValueRange{dim}));
}
Value readInput = indexedInput(clampedImIdx);
return conds.empty() ? readInput
: (Value)std_select(conds.back(), padValue, readInput);
Value readInput = b.create<memref::LoadOp>(loc, input, clampedImIdx);
if (conds.empty())
return readInput;
return b.create<SelectOp>(loc, conds.back(), padValue, readInput);
}
namespace {
@ -229,48 +248,47 @@ template <typename OpType> Attribute getPadValueAttr(Type type) {
}
template <> Attribute getPadValueAttr<PoolingMaxOp>(Type type) {
auto &b = ScopedContext::getBuilderRef();
if (auto floatType = type.dyn_cast<FloatType>()) {
return b.getFloatAttr(
floatType,
APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true));
return OpBuilder(type.getContext())
.getFloatAttr(floatType, APFloat::getInf(floatType.getFloatSemantics(),
/*Negative*/ true));
}
if (auto intType = type.dyn_cast<IntegerType>()) {
unsigned width = intType.getWidth();
// The select instruction used to lower the PoolingMin uses a signed
// comparison, use a signed constant irrespective of the signedness of the
// integer type.
return b.getIntegerAttr(intType, APInt::getSignedMinValue(width));
return OpBuilder(type.getContext())
.getIntegerAttr(intType, APInt::getSignedMinValue(width));
}
llvm_unreachable("Unsupported data type for PoolingMaxOp");
return {};
}
template <> Attribute getPadValueAttr<PoolingMinOp>(Type type) {
auto &b = ScopedContext::getBuilderRef();
if (auto floatType = type.dyn_cast<FloatType>()) {
return b.getFloatAttr(floatType,
APFloat::getInf(floatType.getFloatSemantics()));
return OpBuilder(type.getContext())
.getFloatAttr(floatType,
APFloat::getInf(floatType.getFloatSemantics()));
}
if (auto intType = type.dyn_cast<IntegerType>()) {
unsigned width = intType.getWidth();
// The select instruction used to lower the PoolingMin uses a signed
// comparison, use a signed constant irrespective of the signedness of the
// integer type.
return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
return OpBuilder(type.getContext())
.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
}
llvm_unreachable("Unsupported data type for PoolingMinOp");
return {};
}
template <> Attribute getPadValueAttr<PoolingSumOp>(Type type) {
auto &b = ScopedContext::getBuilderRef();
return b.getZeroAttr(type);
return OpBuilder(type.getContext()).getZeroAttr(type);
}
template <> Attribute getPadValueAttr<ConvOp>(Type type) {
auto &b = ScopedContext::getBuilderRef();
return b.getZeroAttr(type);
return OpBuilder(type.getContext()).getZeroAttr(type);
}
} // namespace
@ -284,38 +302,43 @@ static bool hasPadding(ConvOp convOp) {
return false;
}
template <typename IndexedValueType>
static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
template <typename LoadOpTy, typename StoreOpTy>
static void emitScalarImplementation(OpBuilder &b, Location loc,
ArrayRef<Value> allIvs, ConvOp convOp) {
assert(convOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();
auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();
auto maps = llvm::to_vector<8>(
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
SmallVector<Value, 8> fIdx(
makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
SmallVector<Value, 8> imIdx(
makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
SmallVector<Value, 8> oIdx(
makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
SmallVector<Value> fIdx(makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
SmallVector<Value> imIdx(makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
SmallVector<Value> oIdx(makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
IndexedValueType F(convOp.filter()), O(convOp.output());
Value filter = convOp.filter(), output = convOp.output();
// Emit scalar form. Padded conv involves an affine.max in the memory access
// which is not allowed by affine.load. Override to use an MemRefIndexedValue
// when there is non-zero padding.
if (hasPadding(convOp)) {
Type type = convOp.input().getType().cast<MemRefType>().getElementType();
Value padValue = std_constant(type, getPadValueAttr<ConvOp>(type));
Value paddedInput = getPaddedInput<MemRefIndexedValue>(
convOp.input(), imIdx,
/* Only need to pad the window dimensions */
{0, static_cast<int>(imIdx.size()) - 1}, padValue);
O(oIdx) += F(fIdx) * paddedInput;
Value padValue =
b.create<ConstantOp>(loc, type, getPadValueAttr<ConvOp>(type));
Value paddedInput =
getPaddedInput(b, loc, convOp.input(), imIdx,
/* Only need to pad the window dimensions */
{0, static_cast<int>(imIdx.size()) - 1}, padValue);
Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
Value mulVal = ArithBuilder(b, loc).mul(filterVal, paddedInput);
Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
b.create<StoreOpTy>(loc, addVal, output, oIdx);
} else {
IndexedValueType I(convOp.input());
O(oIdx) += F(fIdx) * I(imIdx);
Value inputVal = b.create<LoadOpTy>(loc, convOp.input(), imIdx);
Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
Value mulVal = ArithBuilder(b, loc).mul(filterVal, inputVal);
Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
b.create<StoreOpTy>(loc, addVal, output, oIdx);
}
}
@ -327,55 +350,62 @@ template <typename PoolingOp> static bool hasPadding(PoolingOp poolingOp) {
return false;
}
template <typename IndexedValueType, typename PoolingOp>
static Value getPoolingInput(PoolingOp op, ArrayRef<Value> inputIndices) {
template <typename LoadOpTy, typename StoreOpTy, typename PoolingOp>
static Value getPoolingInput(OpBuilder &b, Location loc, PoolingOp op,
ArrayRef<Value> inputIndices) {
if (hasPadding(op)) {
Type type =
op.input().getType().template cast<MemRefType>().getElementType();
Value padValue = std_constant(type, getPadValueAttr<PoolingOp>(type));
return getPaddedInput<MemRefIndexedValue>(op.input(), inputIndices,
/*Pad every dimension*/ {},
padValue);
Value padValue =
b.create<ConstantOp>(loc, type, getPadValueAttr<PoolingOp>(type));
return getPaddedInput(b, loc, op.input(), inputIndices,
/*Pad every dimension*/ {}, padValue);
}
IndexedValueType input(op.input());
return input(inputIndices);
return b.create<LoadOpTy>(loc, op.input(), inputIndices);
}
template <typename IndexedValueType, typename OpType>
void emitPoolingMinMaxScalarImplementation(ArrayRef<Value> allIvs, OpType op) {
InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
// Emit scalar form.
IndexedValueType output(op.output());
Value lhs = output(indices.outputs);
Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
using edsc::op::sgt;
using edsc::op::slt;
Value value = std::is_same<OpType, PoolingMinOp>()
? std_select(slt(lhs, rhs), lhs, rhs)
: std_select(sgt(lhs, rhs), lhs, rhs);
output(indices.outputs) = value;
template <typename LoadOpTy, typename StoreOpTy, typename OpType>
void emitPoolingMinMaxScalarImplementation(OpBuilder &b, Location loc,
ArrayRef<Value> allIvs, OpType op) {
InputAndOutputIndices indices = getInputAndOutputIndices(b, loc, allIvs, op);
Value lhs = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
Value rhs = getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
Value value = llvm::TypeSwitch<Operation *, Value>(op)
.Case([&](PoolingMinOp poolingOp) {
return ArithBuilder(b, loc).select(
ArithBuilder(b, loc).slt(lhs, rhs), lhs, rhs);
})
.Case([&](PoolingMaxOp poolingOp) {
return ArithBuilder(b, loc).select(
ArithBuilder(b, loc).sgt(lhs, rhs), lhs, rhs);
})
.Default([&](auto) { return Value(); });
b.create<StoreOpTy>(loc, value, op.output(), indices.outputs);
}
template <typename IndexedValueType>
static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {
emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMaxOp>(allIvs,
op);
template <typename LoadOpTy, typename StoreOpTy>
static void emitScalarImplementation(OpBuilder &b, Location loc,
ArrayRef<Value> allIvs, PoolingMaxOp op) {
emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMaxOp>(
b, loc, allIvs, op);
}
template <typename IndexedValueType>
static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {
emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMinOp>(allIvs,
op);
template <typename LoadOpTy, typename StoreOpTy>
static void emitScalarImplementation(OpBuilder &b, Location loc,
ArrayRef<Value> allIvs, PoolingMinOp op) {
emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMinOp>(
b, loc, allIvs, op);
}
template <typename IndexedValueType>
static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {
auto indices = getInputAndOutputIndices(allIvs, op);
IndexedValueType output(op.output());
// Emit scalar form.
output(indices.outputs) +=
getPoolingInput<IndexedValueType>(op, indices.inputs);
template <typename LoadOpTy, typename StoreOpTy>
static void emitScalarImplementation(OpBuilder &b, Location loc,
ArrayRef<Value> allIvs, PoolingSumOp op) {
auto indices = getInputAndOutputIndices(b, loc, allIvs, op);
Value inputVal =
getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
Value outputVal = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
Value added = ArithBuilder(b, loc).add(outputVal, inputVal);
b.create<StoreOpTy>(loc, added, op.output(), indices.outputs);
}
/// Replace the index operations in the body of the loop nest by the matching
@ -413,8 +443,12 @@ static void replaceIndexOpsByInductionVariables(LinalgOp linalgOp,
template <typename LoopTy>
static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
LinalgOp linalgOp) {
using IndexedValueTy = typename GenerateLoopNest<LoopTy>::IndexedValueTy;
ScopedContext scope(rewriter, linalgOp.getLoc());
using LoadOpTy =
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
AffineLoadOp, memref::LoadOp>::type;
using StoreOpTy =
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
AffineStoreOp, memref::StoreOp>::type;
// Canonicalize indexed_generic operations before lowering them to loops.
if (isa<IndexedGenericOp>(linalgOp))
@ -428,16 +462,18 @@ static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
auto loopRanges = linalgOp.createLoopRanges(rewriter, linalgOp.getLoc());
auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue());
SmallVector<Value, 4> allIvs;
SmallVector<Value> allIvs;
GenerateLoopNest<LoopTy>::doit(
loopRanges, linalgOp, iteratorTypes,
[&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector {
rewriter, linalgOp.getLoc(), loopRanges, linalgOp, iteratorTypes,
[&](OpBuilder &b, Location loc, ValueRange ivs,
ValueRange iterArgs) -> scf::ValueVector {
assert(iterArgs.empty() && "unexpected iterArgs");
allIvs.append(ivs.begin(), ivs.end());
llvm::TypeSwitch<Operation *>(linalgOp)
.Case<ConvOp, PoolingMaxOp, PoolingMinOp, PoolingSumOp, LinalgOp>(
[&](auto op) {
emitScalarImplementation<IndexedValueTy>(allIvs, op);
emitScalarImplementation<LoadOpTy, StoreOpTy>(b, loc, allIvs,
op);
})
.Default([&](Operation *op) { assert(false && "unexpected op"); });
return scf::ValueVector{};
@ -499,7 +535,7 @@ struct TiledLoopToSCFPattern : public OpRewritePattern<TiledLoopOp> {
tiledLoop.upperBound(), tiledLoop.step(),
[&](OpBuilder &builder, Location loc, ValueRange ivs) {
// Move body without its terminator.
SmallVector<Value, 16> newBlockArgs;
SmallVector<Value> newBlockArgs;
newBlockArgs.append(ivs.begin(), ivs.end());
newBlockArgs.append(tiledLoop.inputs().begin(),
tiledLoop.inputs().end());

View File

@ -19,7 +19,6 @@
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/AffineExpr.h"
@ -225,69 +224,67 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
// 2. Create the tiled loops.
LinalgOp res = op;
SmallVector<Value, 4> ivs, tensorResults;
GenerateLoopNest<LoopTy>::doit(
loopRanges, op, iteratorTypes,
[&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector {
auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();
ivs.assign(localIvs.begin(), localIvs.end());
auto tiledLoopBodyBuilder = [&](OpBuilder &b, Location loc,
ValueRange localIvs,
ValueRange iterArgs) -> scf::ValueVector {
ivs.assign(localIvs.begin(), localIvs.end());
// When an `interchangeVector` is present, it has been applied to the
// loop ranges and the iterator types. Apply its inverse to the
// resulting loop `ivs` to match the op definition.
SmallVector<Value, 4> interchangedIvs;
if (!options.interchangeVector.empty())
interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
else
interchangedIvs.assign(ivs.begin(), ivs.end());
// When an `interchangeVector` is present, it has been applied to the
// loop ranges and the iterator types. Apply its inverse to the
// resulting loop `ivs` to match the op definition.
SmallVector<Value, 4> interchangedIvs;
if (!options.interchangeVector.empty())
interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
else
interchangedIvs.assign(ivs.begin(), ivs.end());
assert(op.getNumOutputTensors() == iterArgs.size() &&
"num output tensors must match number of loop iter arguments");
assert(op.getNumOutputTensors() == iterArgs.size() &&
"num output tensors must match number of loop iter arguments");
auto operands = llvm::to_vector<4>(op.getInputs());
SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
// TODO: thanks to simplifying assumption we do not need to worry about
// order of output buffers and tensors: there is only ever one kind.
assert(outputBuffers.empty() || iterArgs.empty());
operands.append(outputBuffers.begin(), outputBuffers.end());
operands.append(iterArgs.begin(), iterArgs.end());
auto sizeBounds =
applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
SmallVector<Value, 4> tiledOperands = makeTiledShapes(
b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
auto nonShapedOperands = op.getAssumedNonShapedOperands();
tiledOperands.append(nonShapedOperands.begin(),
nonShapedOperands.end());
auto operands = llvm::to_vector<4>(op.getInputs());
SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
// TODO: thanks to simplifying assumption we do not need to worry about
// order of output buffers and tensors: there is only ever one kind.
assert(outputBuffers.empty() || iterArgs.empty());
operands.append(outputBuffers.begin(), outputBuffers.end());
operands.append(iterArgs.begin(), iterArgs.end());
auto sizeBounds =
applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
SmallVector<Value, 4> tiledOperands = makeTiledShapes(
b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
auto nonShapedOperands = op.getAssumedNonShapedOperands();
tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end());
// TODO: use an interface/adaptor to avoid leaking position in
// `tiledOperands`.
SmallVector<Type, 4> resultTensorTypes;
for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
resultTensorTypes.push_back(
tiledOperands[opOperand->getOperandNumber()].getType());
// TODO: use an interface/adaptor to avoid leaking position in
// `tiledOperands`.
SmallVector<Type, 4> resultTensorTypes;
for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
resultTensorTypes.push_back(
tiledOperands[opOperand->getOperandNumber()].getType());
res = op.clone(b, loc, resultTensorTypes, tiledOperands);
res = op.clone(b, loc, resultTensorTypes, tiledOperands);
// Insert a subtensor_insert for each output tensor.
unsigned resultIdx = 0;
for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
// TODO: use an interface/adaptor to avoid leaking position in
// `tiledOperands`.
Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
tensorResults.push_back(b.create<SubTensorInsertOp>(
loc, subtensor.source().getType(), res->getResult(resultIdx),
subtensor.source(), subtensor.offsets(), subtensor.sizes(),
subtensor.strides(), subtensor.static_offsets(),
subtensor.static_sizes(), subtensor.static_strides()));
} else {
tensorResults.push_back(res->getResult(resultIdx));
}
++resultIdx;
}
return scf::ValueVector(tensorResults.begin(), tensorResults.end());
},
options.distribution);
// Insert a subtensor_insert for each output tensor.
unsigned resultIdx = 0;
for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
// TODO: use an interface/adaptor to avoid leaking position in
// `tiledOperands`.
Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
tensorResults.push_back(b.create<SubTensorInsertOp>(
loc, subtensor.source().getType(), res->getResult(resultIdx),
subtensor.source(), subtensor.offsets(), subtensor.sizes(),
subtensor.strides(), subtensor.static_offsets(),
subtensor.static_sizes(), subtensor.static_strides()));
} else {
tensorResults.push_back(res->getResult(resultIdx));
}
++resultIdx;
}
return scf::ValueVector(tensorResults.begin(), tensorResults.end());
};
GenerateLoopNest<LoopTy>::doit(b, op.getLoc(), loopRanges, op, iteratorTypes,
tiledLoopBodyBuilder, options.distribution);
// 3. Transform IndexOp results w.r.t. the tiling.
transformIndexOps(b, res, ivs, loopIndexToRangeIndex);

View File

@ -16,7 +16,6 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
@ -197,15 +196,14 @@ IntegerAttr getSmallestBoundingIndex(Value size) {
/// Specialization to build an scf "for" nest.
template <>
void GenerateLoopNest<scf::ForOp>::doit(
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
ValueRange)>
bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {
auto iterArgInitValues = linalgOp.getOutputTensors();
// Create procInfo so it dominates loops, if appropriate.
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();
SmallVector<ProcInfo, 4> procInfo;
SmallVector<DistributionMethod, 0> distributionMethod;
if (distributionOptions.hasValue()) {
@ -219,13 +217,13 @@ void GenerateLoopNest<scf::ForOp>::doit(
distributionMethod = distributionOptions->distributionMethod;
if (distributionMethod.size() < parallelLoopRanges.size())
parallelLoopRanges.resize(distributionMethod.size());
procInfo = distributionOptions->procInfo(builder, loc, parallelLoopRanges);
procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges);
}
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
LoopNest loopNest =
edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
LoopNest loopNest = mlir::scf::buildLoopNest(
b, loc, lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
if (!distributionOptions || loopNest.loops.empty())
return;
@ -246,9 +244,11 @@ void GenerateLoopNest<scf::ForOp>::doit(
/// Specialization to build affine "for" nest.
template <>
void GenerateLoopNest<AffineForOp>::doit(
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
ValueRange)>
bodyBuilderFn,
Optional<LinalgLoopDistributionOptions>) {
auto iterArgInitValues = linalgOp.getOutputTensors();
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
@ -264,38 +264,36 @@ void GenerateLoopNest<AffineForOp>::doit(
constantSteps.push_back(op.getValue());
}
auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
bodyBuilderFn(ivs, {});
};
edsc::affineLoopNestBuilder(lbs, ubs, constantSteps,
bodyBuilderWithoutIterArgsFn);
mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
[&](OpBuilder &b, Location loc, ValueRange ivs) {
bodyBuilderFn(b, loc, ivs, {});
});
}
/// Specialization to build an linalg.tiled_loop
template <>
void GenerateLoopNest<TiledLoopOp>::doit(
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
ValueRange)>
bodyBuilderFn,
Optional<LinalgLoopDistributionOptions>) {
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();
SmallVector<ProcInfo, 2> procInfo;
SmallVector<Value, 4> lbs, ubs, steps;
unpackRanges(loopRanges, lbs, ubs, steps);
auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc,
ValueRange ivs, ValueRange inputs,
ValueRange outputs) {
ScopedContext context(nestedBuilder, nestedLoc);
scf::ValueVector results = bodyBuilderFn(ivs, linalgOp.getOutputTensors());
scf::ValueVector results = bodyBuilderFn(nestedBuilder, nestedLoc, ivs,
linalgOp.getOutputTensors());
nestedBuilder.create<linalg::YieldOp>(nestedLoc, results);
};
auto tiledLoop = builder.create<TiledLoopOp>(
auto tiledLoop = b.create<TiledLoopOp>(
loc, lbs, ubs, steps, linalgOp.getInputs(), linalgOp.getOutputs(),
builder.getArrayAttr(iteratorTypes), wrappedBuilderFn);
b.getArrayAttr(iteratorTypes), wrappedBuilderFn);
// Replace inputs/outputs with the corresponding region args.
auto isInsideTiledLoop = [&](OpOperand &operand) {
@ -310,9 +308,9 @@ void GenerateLoopNest<TiledLoopOp>::doit(
}
/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
Value procId, Value nprocs, Value &lb,
Value &ub, Value &step) {
void updateBoundsForCyclicDistribution(OpBuilder &b, Location loc, Value procId,
Value nprocs, Value &lb, Value &ub,
Value &step) {
using edsc::op::operator+;
using edsc::op::operator*;
lb = lb + (procId * step);
@ -329,20 +327,22 @@ void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
// TODO: this function can be made iterative instead. However, it
// will have at most as many recursive calls as nested loops, which rarely
// exceeds 10.
static void
generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
ArrayRef<Attribute> iteratorTypes,
function_ref<void(ValueRange)> bodyBuilderFn,
SmallVectorImpl<Value> &ivStorage,
ArrayRef<DistributionMethod> distributionMethod = {}) {
static void generateParallelLoopNest(
OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
ValueRange steps, ArrayRef<Attribute> iteratorTypes,
function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
SmallVectorImpl<Value> &ivStorage,
ArrayRef<DistributionMethod> distributionMethod = {}) {
assert(lbs.size() == ubs.size());
assert(lbs.size() == steps.size());
assert(lbs.size() == iteratorTypes.size());
// If there are no (more) loops to be generated, generate the body and be
// done with it.
if (iteratorTypes.empty())
return bodyBuilderFn(ivStorage);
if (iteratorTypes.empty()) {
bodyBuilderFn(b, loc, ivStorage);
return;
}
// Find the outermost parallel loops and drop their types from the list.
unsigned nLoops = iteratorTypes.size();
@ -353,27 +353,29 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
// recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
// in this case.
if (nOuterPar == 0) {
edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) {
ivStorage.push_back(iv);
generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(),
steps.drop_front(), iteratorTypes.drop_front(),
bodyBuilderFn, ivStorage, distributionMethod);
});
LoopNest singleLoop = buildLoopNest(
b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
[&](OpBuilder &b, Location loc, ValueRange ivs) {
ivStorage.append(ivs.begin(), ivs.end());
generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(),
steps.drop_front(),
iteratorTypes.drop_front(), bodyBuilderFn,
ivStorage, distributionMethod);
});
return;
}
if (distributionMethod.empty()) {
// Generate a single parallel loop-nest operation for all outermost
// parallel loops and recurse.
edsc::OperationBuilder<scf::ParallelOp>(
lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
b.create<scf::ParallelOp>(
loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
steps.take_front(nOuterPar),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
edsc::ScopedContext context(nestedBuilder, nestedLoc);
ivStorage.append(localIvs.begin(), localIvs.end());
generateParallelLoopNest(
lbs.drop_front(nOuterPar), ubs.drop_front(nOuterPar),
steps.drop_front(nOuterPar), iteratorTypes.drop_front(nOuterPar),
bodyBuilderFn, ivStorage,
nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar),
ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar),
iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage,
(distributionMethod.size() < nOuterPar)
? ArrayRef<DistributionMethod>()
: distributionMethod.drop_front(nOuterPar));
@ -394,15 +396,14 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
case DistributionMethod::Cyclic: {
// Generate a single parallel loop-nest operation for all outermost
// parallel loops and recurse.
edsc::OperationBuilder<scf::ParallelOp>(
lbs.take_front(numProcessed), ubs.take_front(numProcessed),
b.create<scf::ParallelOp>(
loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
steps.take_front(numProcessed),
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
edsc::ScopedContext context(nestedBuilder, nestedLoc);
ivStorage.append(localIvs.begin(), localIvs.end());
generateParallelLoopNest(
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed),
nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
(distributionMethod.size() < numProcessed)
? ArrayRef<DistributionMethod>()
@ -418,12 +419,13 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
for (unsigned i = 1; i < numProcessed; ++i)
cond = cond && slt(lbs[i], ubs[i]);
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
edsc::conditionBuilder(cond, [&]() {
b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
generateParallelLoopNest(
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed),
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
distributionMethod.drop_front(numProcessed));
b.create<scf::YieldOp>(loc, ValueRange{});
});
return;
}
@ -432,7 +434,7 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
// with inner loop generation.
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
generateParallelLoopNest(
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
return;
@ -442,9 +444,11 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
/// Specialization for generating a mix of parallel and sequential scf loops.
template <>
void GenerateLoopNest<scf::ParallelOp>::doit(
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
ArrayRef<Attribute> iteratorTypes,
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
ValueRange)>
bodyBuilderFn,
Optional<LinalgLoopDistributionOptions> distributionOptions) {
auto iterArgInitValues = linalgOp.getOutputTensors();
assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
@ -466,7 +470,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
SmallVector<DistributionMethod, 0> distributionMethod;
if (distributionOptions) {
auto &options = distributionOptions.getValue();
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
OpBuilder &b = edsc::ScopedContext::getBuilderRef();
Location loc = edsc::ScopedContext::getLocation();
distributionMethod.assign(distributionOptions->distributionMethod.begin(),
distributionOptions->distributionMethod.end());
@ -478,14 +482,14 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
if (distributionMethod.size() < parallelLoopRanges.size())
parallelLoopRanges.resize(distributionMethod.size());
SmallVector<ProcInfo, 2> procInfo =
options.procInfo(builder, loc, parallelLoopRanges);
options.procInfo(b, loc, parallelLoopRanges);
unsigned index = 0;
for (auto iteratorType : enumerate(iteratorTypes)) {
if (index >= procInfo.size())
break;
if (isParallelIteratorType(iteratorType.value())) {
unsigned i = iteratorType.index();
updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId,
updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId,
procInfo[index].nprocs, lbsStorage[i],
ubsStorage[i], stepsStorage[i]);
index++;
@ -493,17 +497,17 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
}
}
ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
bodyBuilderFn(ivs, {});
};
generateParallelLoopNest(lbs, ubs, steps, iteratorTypes,
bodyBuilderWithoutIterArgsFn, ivs,
distributionMethod);
generateParallelLoopNest(
b, loc, lbs, ubs, steps, iteratorTypes,
[&](OpBuilder &b, Location loc, ValueRange ivs) {
bodyBuilderFn(b, loc, ivs, {});
},
ivs, distributionMethod);
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
}
SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
LinalgOp linalgOp,
ArrayRef<Value> tiledOperands,
ValueRange ivs, ValueRange tileSizes,
@ -529,7 +533,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n");
}
MLIRContext *context = builder.getContext();
MLIRContext *context = b.getContext();
SmallVector<Value, 4> tiledShapes;
tiledShapes.reserve(tiledOperands.size());
for (auto en : llvm::enumerate(tiledOperands)) {
@ -555,10 +559,10 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
for (unsigned r = 0; r < rank; ++r) {
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
if (!isTiled(map.getSubMap({r}), tileSizes)) {
offsets.push_back(builder.getIndexAttr(0));
offsets.push_back(b.getIndexAttr(0));
Value dim = memref_dim(shapedOp, r).value;
sizes.push_back(dim);
strides.push_back(builder.getIndexAttr(1));
strides.push_back(b.getIndexAttr(1));
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
continue;
}
@ -568,10 +572,9 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
// (i.e. the op does not subsample, stepping occurs in the loop).
auto m = map.getSubMap({r});
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: submap: " << map << "\n");
auto offset = applyMapToValues(builder, loc, m, lbs).front();
auto offset = applyMapToValues(b, loc, m, lbs).front();
offsets.push_back(offset);
auto closedIntSize =
applyMapToValues(builder, loc, m, subShapeSizes).front();
auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
// Resulting size needs to be made half open interval again.
auto size = closedIntSize + std_constant_index(1);
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
@ -589,27 +592,29 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
AffineExpr dim0, dim1, dim2;
bindDims(context, dim0, dim1, dim2);
// Compute min(size, dim - offset) to avoid out-of-bounds accesses.
auto minMap = AffineMap::get(
/*dimCount=*/3, /*symbolCount=*/0, {dim0, dim1 - dim2}, context);
Value d = memref_dim(shapedOp, r);
AffineMap minMap =
AffineMap::inferFromExprList(
ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
.front();
Value d = b.create<memref::DimOp>(loc, shapedOp, r);
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
size = affine_min(builder.getIndexType(), minMap, operands);
size = b.create<AffineMinOp>(loc, b.getIndexType(), minMap, operands);
}
sizes.push_back(size);
LLVM_DEBUG(llvm::dbgs()
<< "makeTiledShapes: new offset: " << offset << "\n");
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new size: " << size << "\n");
strides.push_back(builder.getIndexAttr(1));
strides.push_back(b.getIndexAttr(1));
}
if (shapedType.isa<MemRefType>())
tiledShapes.push_back(builder.create<memref::SubViewOp>(
loc, shapedOp, offsets, sizes, strides));
tiledShapes.push_back(
b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
else
tiledShapes.push_back(
builder.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
}
return tiledShapes;

View File

@ -1,6 +1,5 @@
add_mlir_dialect_library(MLIRSCF
SCF.cpp
EDSC/Builders.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/LoopOps
@ -9,7 +8,6 @@ add_mlir_dialect_library(MLIRSCF
MLIRSCFOpsIncGen
LINK_LIBS PUBLIC
MLIREDSC
MLIRIR
MLIRLoopLikeInterface
MLIRMemRef

View File

@ -1,135 +0,0 @@
//===- Builders.cpp - MLIR Declarative Builder Classes --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
using namespace mlir;
using namespace mlir::edsc;
mlir::scf::LoopNest
mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
function_ref<void(ValueRange)> fun) {
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
// the expected function interface.
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
return mlir::scf::buildLoopNest(
ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) {
ScopedContext context(builder, loc);
if (fun)
fun(ivs);
});
}
mlir::scf::LoopNest
mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
function_ref<void(Value)> fun) {
// Delegates to the ValueRange-based version by wrapping the lambda.
auto wrapper = [&](ValueRange ivs) {
assert(ivs.size() == 1);
if (fun)
fun(ivs[0]);
};
return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step),
wrapper);
}
mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
Value lb, Value ub, Value step, ValueRange iterArgInitValues,
function_ref<scf::ValueVector(Value, ValueRange)> fun) {
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
// the expected function interface.
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
return mlir::scf::buildLoopNest(
ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub,
step, iterArgInitValues,
[&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
assert(ivs.size() == 1 && "expected one induction variable");
ScopedContext context(builder, loc);
if (fun)
return fun(ivs[0], args);
return scf::ValueVector(iterArgInitValues.begin(),
iterArgInitValues.end());
});
}
mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
ValueRange lbs, ValueRange ubs, ValueRange steps,
ValueRange iterArgInitValues,
function_ref<scf::ValueVector(ValueRange, ValueRange)> fun) {
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
// the expected function interface.
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
return mlir::scf::buildLoopNest(
ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
steps, iterArgInitValues,
[&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
ScopedContext context(builder, loc);
if (fun)
return fun(ivs, args);
return scf::ValueVector(iterArgInitValues.begin(),
iterArgInitValues.end());
});
}
static std::function<void(OpBuilder &, Location)>
wrapIfBody(function_ref<scf::ValueVector()> body, TypeRange expectedTypes) {
(void)expectedTypes;
return [=](OpBuilder &builder, Location loc) {
ScopedContext context(builder, loc);
scf::ValueVector returned = body();
assert(ValueRange(returned).getTypes() == expectedTypes &&
"'if' body builder returned values of unexpected type");
builder.create<scf::YieldOp>(loc, returned);
};
}
ValueRange
mlir::edsc::conditionBuilder(TypeRange results, Value condition,
function_ref<scf::ValueVector()> thenBody,
function_ref<scf::ValueVector()> elseBody,
scf::IfOp *ifOp) {
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
assert(thenBody && "thenBody is mandatory");
auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
ScopedContext::getLocation(), results, condition,
wrapIfBody(thenBody, results), wrapIfBody(elseBody, results));
if (ifOp)
*ifOp = newOp;
return newOp.getResults();
}
static std::function<void(OpBuilder &, Location)>
wrapZeroResultIfBody(function_ref<void()> body) {
return [=](OpBuilder &builder, Location loc) {
ScopedContext context(builder, loc);
body();
builder.create<scf::YieldOp>(loc);
};
}
ValueRange mlir::edsc::conditionBuilder(Value condition,
function_ref<void()> thenBody,
function_ref<void()> elseBody,
scf::IfOp *ifOp) {
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
assert(thenBody && "thenBody is mandatory");
auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
ScopedContext::getLocation(), condition, wrapZeroResultIfBody(thenBody),
elseBody ? llvm::function_ref<void(OpBuilder &, Location)>(
wrapZeroResultIfBody(elseBody))
: llvm::function_ref<void(OpBuilder &, Location)>(nullptr));
if (ifOp)
*ifOp = newOp;
return {};
}

View File

@ -24,18 +24,18 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
// CHECK-SAME: [[M:arg[0-9]+]]: index
// CHECK-SAME: [[N:arg[0-9]+]]: index
// CHECK-SAME: [[K:arg[0-9]+]]: index
// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
// CHECK-DAG: %[[a:.*]] = affine.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECK-DAG: %[[b:.*]] = affine.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECK: %[[A:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
// CHECK: %[[B:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
// CHECK: %[[C:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
// CHECK: affine.for
// CHECK: affine.for
// CHECK: affine.for
// CHECK-DAG: %[[a:.*]] = affine.load %[[A]]{{.*}} : memref<?x?xf32>
// CHECK-DAG: %[[b:.*]] = affine.load %[[B]]{{.*}} : memref<?x?xf32>
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
// CHECK-DAG: %[[c:.*]] = affine.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECK-DAG: %[[c:.*]] = affine.load %[[C]]{{.*}} : memref<?x?xf32>
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
// CHECK: affine.store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
// CHECK: affine.store %[[res]], %[[C]]{{.*}} : memref<?x?xf32>
func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
@ -49,12 +49,12 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
// CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
// CHECK: affine.for {{.*}}0 to %[[B]] {
// CHECK: affine.for {{.*}}0 to %[[X0]] {
// CHECK: affine.for {{.*}}0 to %[[K]] {
// CHECK: affine.for {{.*}}0 to %[[Q]] {
// CHECK: affine.for {{.*}}0 to %[[Z0]] {
// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]{{.*}}
// No padding needed here; only affine loads.
// CHECK-NEXT: affine.load
// CHECK-NEXT: affine.load
@ -78,26 +78,26 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
// CHECK: affine.for %{{.*}} = 0 to %[[X1]] {
// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] {
// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECK: affine.for {{.*}}0 to %[[B]] {
// CHECK: affine.for {{.*}}0 to %[[X0]] {
// CHECK: affine.for {{.*}}0 to %[[X1]] {
// CHECK: affine.for {{.*}}0 to %[[K]] {
// CHECK: affine.for {{.*}}0 to %[[Q]] {
// CHECK: affine.for {{.*}}0 to %[[Z0]] {
// CHECK: affine.for {{.*}}0 to %[[Z1]] {
// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}
// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}
// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
// Padded conv involves an affine.max in the memory access and this is not
// allowed by affine.load. Use memref.load in such cases.
// CHECK: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
// CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: select {{.*}} : f32
// CHECK: affine.load
// CHECK: mulf {{.*}} : f32
// CHECK: affine.load
// CHECK: addf {{.*}} : f32
// CHECK: affine.store
//----------------------------------------------------------------------------//
// Named ops to loops.
@ -115,10 +115,10 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
// CHECK: affine.for %[[b:.*]] = 0 to %[[B]] {
// CHECK: affine.for %[[m:.*]] = 0 to %[[M]] {
// CHECK: affine.for %[[n:.*]] = 0 to %[[N]] {
// CHECK: affine.for %[[k:.*]] = 0 to %[[K]] {
// CHECK: affine.for %[[b:.*]] = {{.*}}0 to %[[B]] {
// CHECK: affine.for %[[m:.*]] = {{.*}}0 to %[[M]] {
// CHECK: affine.for %[[n:.*]] = {{.*}}0 to %[[N]] {
// CHECK: affine.for %[[k:.*]] = {{.*}}0 to %[[K]] {
// CHECK: %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
// CHECK: %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
// CHECK: %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>

File diff suppressed because it is too large Load Diff