forked from OSchip/llvm-project
[mlir][SCF] NFC - Drop SCF EDSC usage
Drop the SCF dialect EDSC subdirectory and update all uses. Differential Revision: https://reviews.llvm.org/D102780
This commit is contained in:
parent
9383e9c1e6
commit
84a880e1e2
|
@ -89,132 +89,6 @@ Value uge(Value lhs, Value rhs);
|
|||
|
||||
} // namespace op
|
||||
|
||||
/// Arithmetic operator overloadings.
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator+(Value e) {
|
||||
using op::operator+;
|
||||
return static_cast<Value>(*this) + e;
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator-(Value e) {
|
||||
using op::operator-;
|
||||
return static_cast<Value>(*this) - e;
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator*(Value e) {
|
||||
using op::operator*;
|
||||
return static_cast<Value>(*this) * e;
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator/(Value e) {
|
||||
using op::operator/;
|
||||
return static_cast<Value>(*this) / e;
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator%(Value e) {
|
||||
using op::operator%;
|
||||
return static_cast<Value>(*this) % e;
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator^(Value e) {
|
||||
using op::operator^;
|
||||
return static_cast<Value>(*this) ^ e;
|
||||
}
|
||||
|
||||
/// Assignment-arithmetic operator overloadings.
|
||||
template <typename Load, typename Store>
|
||||
Store TemplatedIndexedValue<Load, Store>::operator+=(Value e) {
|
||||
using op::operator+;
|
||||
return Store(*this + e, getBase(), indices);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Store TemplatedIndexedValue<Load, Store>::operator-=(Value e) {
|
||||
using op::operator-;
|
||||
return Store(*this - e, getBase(), indices);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Store TemplatedIndexedValue<Load, Store>::operator*=(Value e) {
|
||||
using op::operator*;
|
||||
return Store(*this * e, getBase(), indices);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Store TemplatedIndexedValue<Load, Store>::operator/=(Value e) {
|
||||
using op::operator/;
|
||||
return Store(*this / e, getBase(), indices);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Store TemplatedIndexedValue<Load, Store>::operator%=(Value e) {
|
||||
using op::operator%;
|
||||
return Store(*this % e, getBase(), indices);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Store TemplatedIndexedValue<Load, Store>::operator^=(Value e) {
|
||||
using op::operator^;
|
||||
return Store(*this ^ e, getBase(), indices);
|
||||
}
|
||||
|
||||
/// Logical operator overloadings.
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator&&(Value e) {
|
||||
using op::operator&&;
|
||||
return static_cast<Value>(*this) && e;
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::operator||(Value e) {
|
||||
using op::operator||;
|
||||
return static_cast<Value>(*this) || e;
|
||||
}
|
||||
|
||||
/// Comparison operator overloadings.
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::eq(Value e) {
|
||||
return eq(value, e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::ne(Value e) {
|
||||
return ne(value, e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::slt(Value e) {
|
||||
using op::slt;
|
||||
return slt(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::sle(Value e) {
|
||||
using op::sle;
|
||||
return sle(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::sgt(Value e) {
|
||||
using op::sgt;
|
||||
return sgt(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::sge(Value e) {
|
||||
using op::sge;
|
||||
return sge(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::ult(Value e) {
|
||||
using op::ult;
|
||||
return ult(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::ule(Value e) {
|
||||
using op::ule;
|
||||
return ule(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::ugt(Value e) {
|
||||
using op::ugt;
|
||||
return ugt(static_cast<Value>(*this), e);
|
||||
}
|
||||
template <typename Load, typename Store>
|
||||
Value TemplatedIndexedValue<Load, Store>::uge(Value e) {
|
||||
using op::uge;
|
||||
return uge(static_cast<Value>(*this), e);
|
||||
}
|
||||
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
|
|
|
@ -21,9 +21,6 @@ using affine_min = ValueBuilder<AffineMinOp>;
|
|||
using affine_max = ValueBuilder<AffineMaxOp>;
|
||||
using affine_store = OperationBuilder<AffineStoreOp>;
|
||||
|
||||
/// Provide an index notation around affine_load and affine_store.
|
||||
using AffineIndexedValue = TemplatedIndexedValue<affine_load, affine_store>;
|
||||
|
||||
} // namespace intrinsics
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
|
|
@ -244,19 +244,15 @@ struct RegionMatcher {
|
|||
/// Utility class used to generate nested loops with ranges described by
|
||||
/// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
|
||||
/// is used to generate the body of the innermost loop. It is passed a range
|
||||
/// of loop induction variables.
|
||||
/// of loop induction variables and a range of iterArgs.
|
||||
template <typename LoopTy>
|
||||
struct GenerateLoopNest {
|
||||
using IndexedValueTy =
|
||||
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
|
||||
edsc::intrinsics::AffineIndexedValue,
|
||||
edsc::intrinsics::MemRefIndexedValue>::type;
|
||||
|
||||
static void
|
||||
doit(ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> = None);
|
||||
static void doit(OpBuilder &b, Location loc, ArrayRef<Range> loopRanges,
|
||||
LinalgOp linalgOp, ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<scf::ValueVector(OpBuilder &, Location,
|
||||
ValueRange, ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> = None);
|
||||
};
|
||||
|
||||
} // namespace linalg
|
||||
|
|
|
@ -31,9 +31,6 @@ using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
|
|||
using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
|
||||
using memref_view = ValueBuilder<memref::ViewOp>;
|
||||
|
||||
/// Provide an index notation around memref_load and memref_store.
|
||||
using MemRefIndexedValue =
|
||||
TemplatedIndexedValue<intrinsics::memref_load, intrinsics::memref_store>;
|
||||
} // namespace intrinsics
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
|
|
@ -1,56 +0,0 @@
|
|||
//===- Builders.h - MLIR Declarative Builder Classes ------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Provides intuitive composable interfaces for building structured MLIR
|
||||
// snippets in a declarative fashion.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
|
||||
#define MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
|
||||
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/EDSC/Builders.h"
|
||||
#include "mlir/IR/Builders.h"
|
||||
#include "mlir/IR/Types.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace edsc {
|
||||
|
||||
/// Adapters for building loop nests using the builder and the location stored
|
||||
/// in ScopedContext. Actual builders are in scf::buildLoopNest.
|
||||
scf::LoopNest loopNestBuilder(ValueRange lbs, ValueRange ubs,
|
||||
ValueRange steps,
|
||||
function_ref<void(ValueRange)> fun = nullptr);
|
||||
scf::LoopNest loopNestBuilder(Value lb, Value ub, Value step,
|
||||
function_ref<void(Value)> fun = nullptr);
|
||||
scf::LoopNest loopNestBuilder(
|
||||
Value lb, Value ub, Value step, ValueRange iterArgInitValues,
|
||||
function_ref<scf::ValueVector(Value, ValueRange)> fun = nullptr);
|
||||
scf::LoopNest loopNestBuilder(
|
||||
ValueRange lbs, ValueRange ubs, ValueRange steps,
|
||||
ValueRange iterArgInitValues,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> fun = nullptr);
|
||||
|
||||
/// Adapters for building if conditions using the builder and the location
|
||||
/// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted
|
||||
/// if the condition should not have an 'else' part.
|
||||
/// When `ifOp` is specified, the scf::IfOp is captured. This is particularly
|
||||
/// convenient for 0-result conditions.
|
||||
ValueRange conditionBuilder(TypeRange results, Value condition,
|
||||
function_ref<scf::ValueVector()> thenBody,
|
||||
function_ref<scf::ValueVector()> elseBody = nullptr,
|
||||
scf::IfOp *ifOp = nullptr);
|
||||
ValueRange conditionBuilder(Value condition, function_ref<void()> thenBody,
|
||||
function_ref<void()> elseBody = nullptr,
|
||||
scf::IfOp *ifOp = nullptr);
|
||||
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
|
|
@ -1,24 +0,0 @@
|
|||
//===- Intrinsics.h - MLIR EDSC Intrinsics for SCF --------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM
|
||||
// Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
|
||||
#define MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
|
||||
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace edsc {
|
||||
namespace intrinsics {
|
||||
|
||||
using loop_yield = OperationBuilder<scf::YieldOp>;
|
||||
|
||||
} // namespace intrinsics
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
|
|
@ -169,172 +169,6 @@ private:
|
|||
SmallVector<AffineExpr, 4> exprs;
|
||||
};
|
||||
|
||||
/// A TemplatedIndexedValue brings an index notation over the template Load and
|
||||
/// Store parameters. Assigning to an IndexedValue emits an actual `Store`
|
||||
/// operation, while converting an IndexedValue to a Value emits an actual
|
||||
/// `Load` operation.
|
||||
template <typename Load, typename Store>
|
||||
class TemplatedIndexedValue {
|
||||
public:
|
||||
explicit TemplatedIndexedValue(Value v) : value(v) {}
|
||||
|
||||
TemplatedIndexedValue(const TemplatedIndexedValue &rhs) = default;
|
||||
|
||||
TemplatedIndexedValue operator()() { return *this; }
|
||||
/// Returns a new `TemplatedIndexedValue`.
|
||||
TemplatedIndexedValue operator()(Value index) {
|
||||
TemplatedIndexedValue res(value);
|
||||
res.indices.push_back(index);
|
||||
return res;
|
||||
}
|
||||
template <typename... Args>
|
||||
TemplatedIndexedValue operator()(Value index, Args... indices) {
|
||||
return TemplatedIndexedValue(value, index).append(indices...);
|
||||
}
|
||||
TemplatedIndexedValue operator()(ValueRange indices) {
|
||||
return TemplatedIndexedValue(value, indices);
|
||||
}
|
||||
|
||||
/// Emits a `store`.
|
||||
Store operator=(const TemplatedIndexedValue &rhs) {
|
||||
return Store(rhs, value, indices);
|
||||
}
|
||||
Store operator=(Value rhs) { return Store(rhs, value, indices); }
|
||||
|
||||
/// Emits a `load` when converting to a Value.
|
||||
operator Value() const { return Load(value, indices); }
|
||||
|
||||
/// Returns the base memref.
|
||||
Value getBase() const { return value; }
|
||||
|
||||
/// Returns the underlying memref.
|
||||
MemRefType getMemRefType() const {
|
||||
return value.getType().template cast<MemRefType>();
|
||||
}
|
||||
|
||||
/// Returns the underlying MemRef elemental type cast as `T`.
|
||||
template <typename T>
|
||||
T getElementalTypeAs() const {
|
||||
return value.getType()
|
||||
.template cast<MemRefType>()
|
||||
.getElementType()
|
||||
.template cast<T>();
|
||||
}
|
||||
|
||||
/// Arithmetic operator overloadings.
|
||||
Value operator+(Value e);
|
||||
Value operator-(Value e);
|
||||
Value operator*(Value e);
|
||||
Value operator/(Value e);
|
||||
Value operator%(Value e);
|
||||
Value operator^(Value e);
|
||||
Value operator+(TemplatedIndexedValue e) {
|
||||
return *this + static_cast<Value>(e);
|
||||
}
|
||||
Value operator-(TemplatedIndexedValue e) {
|
||||
return *this - static_cast<Value>(e);
|
||||
}
|
||||
Value operator*(TemplatedIndexedValue e) {
|
||||
return *this * static_cast<Value>(e);
|
||||
}
|
||||
Value operator/(TemplatedIndexedValue e) {
|
||||
return *this / static_cast<Value>(e);
|
||||
}
|
||||
Value operator%(TemplatedIndexedValue e) {
|
||||
return *this % static_cast<Value>(e);
|
||||
}
|
||||
Value operator^(TemplatedIndexedValue e) {
|
||||
return *this ^ static_cast<Value>(e);
|
||||
}
|
||||
|
||||
/// Assignment-arithmetic operator overloadings.
|
||||
Store operator+=(Value e);
|
||||
Store operator-=(Value e);
|
||||
Store operator*=(Value e);
|
||||
Store operator/=(Value e);
|
||||
Store operator%=(Value e);
|
||||
Store operator^=(Value e);
|
||||
Store operator+=(TemplatedIndexedValue e) {
|
||||
return this->operator+=(static_cast<Value>(e));
|
||||
}
|
||||
Store operator-=(TemplatedIndexedValue e) {
|
||||
return this->operator-=(static_cast<Value>(e));
|
||||
}
|
||||
Store operator*=(TemplatedIndexedValue e) {
|
||||
return this->operator*=(static_cast<Value>(e));
|
||||
}
|
||||
Store operator/=(TemplatedIndexedValue e) {
|
||||
return this->operator/=(static_cast<Value>(e));
|
||||
}
|
||||
Store operator%=(TemplatedIndexedValue e) {
|
||||
return this->operator%=(static_cast<Value>(e));
|
||||
}
|
||||
Store operator^=(TemplatedIndexedValue e) {
|
||||
return this->operator^=(static_cast<Value>(e));
|
||||
}
|
||||
|
||||
/// Logical operator overloadings.
|
||||
Value operator&&(Value e);
|
||||
Value operator||(Value e);
|
||||
Value operator&&(TemplatedIndexedValue e) {
|
||||
return *this && static_cast<Value>(e);
|
||||
}
|
||||
Value operator||(TemplatedIndexedValue e) {
|
||||
return *this || static_cast<Value>(e);
|
||||
}
|
||||
|
||||
/// Comparison operator overloadings.
|
||||
Value eq(Value e);
|
||||
Value ne(Value e);
|
||||
Value slt(Value e);
|
||||
Value sle(Value e);
|
||||
Value sgt(Value e);
|
||||
Value sge(Value e);
|
||||
Value ult(Value e);
|
||||
Value ule(Value e);
|
||||
Value ugt(Value e);
|
||||
Value uge(Value e);
|
||||
Value slt(TemplatedIndexedValue e) {
|
||||
return slt(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value sle(TemplatedIndexedValue e) {
|
||||
return sle(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value sgt(TemplatedIndexedValue e) {
|
||||
return sgt(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value sge(TemplatedIndexedValue e) {
|
||||
return sge(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value ult(TemplatedIndexedValue e) {
|
||||
return ult(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value ule(TemplatedIndexedValue e) {
|
||||
return ule(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value ugt(TemplatedIndexedValue e) {
|
||||
return ugt(*this, static_cast<Value>(e));
|
||||
}
|
||||
Value uge(TemplatedIndexedValue e) {
|
||||
return uge(*this, static_cast<Value>(e));
|
||||
}
|
||||
|
||||
private:
|
||||
TemplatedIndexedValue(Value value, ValueRange indices)
|
||||
: value(value), indices(indices.begin(), indices.end()) {}
|
||||
|
||||
TemplatedIndexedValue &append() { return *this; }
|
||||
|
||||
template <typename T, typename... Args>
|
||||
TemplatedIndexedValue &append(T index, Args... indices) {
|
||||
this->indices.push_back(static_cast<Value>(index));
|
||||
append(indices...);
|
||||
return *this;
|
||||
}
|
||||
Value value;
|
||||
SmallVector<Value, 8> indices;
|
||||
};
|
||||
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include "mlir/Dialect/GPU/MemoryPromotion.h"
|
||||
#include "mlir/Dialect/GPU/GPUDialect.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
|
@ -41,7 +41,7 @@ static StringRef getDimName(unsigned dim) {
|
|||
/// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
|
||||
/// single-iteration loops. Maps the innermost loops to thread dimensions, in
|
||||
/// reverse order to enable access coalescing in the innermost loop.
|
||||
static void insertCopyLoops(OpBuilder &builder, Location loc,
|
||||
static void insertCopyLoops(OpBuilder &b, Location loc,
|
||||
MemRefBoundsCapture &bounds, Value from, Value to) {
|
||||
// Create EDSC handles for bounds.
|
||||
unsigned rank = bounds.rank();
|
||||
|
@ -68,24 +68,24 @@ static void insertCopyLoops(OpBuilder &builder, Location loc,
|
|||
[](int64_t step) { return std_constant_index(step); });
|
||||
|
||||
// Obtain thread identifiers and block sizes, necessary to map to them.
|
||||
auto indexType = builder.getIndexType();
|
||||
auto indexType = b.getIndexType();
|
||||
SmallVector<Value, 3> threadIds, blockDims;
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
auto dimName = builder.getStringAttr(getDimName(i));
|
||||
threadIds.push_back(
|
||||
builder.create<gpu::ThreadIdOp>(loc, indexType, dimName));
|
||||
blockDims.push_back(
|
||||
builder.create<gpu::BlockDimOp>(loc, indexType, dimName));
|
||||
auto dimName = b.getStringAttr(getDimName(i));
|
||||
threadIds.push_back(b.create<gpu::ThreadIdOp>(loc, indexType, dimName));
|
||||
blockDims.push_back(b.create<gpu::BlockDimOp>(loc, indexType, dimName));
|
||||
}
|
||||
|
||||
// Produce the loop nest with copies.
|
||||
SmallVector<Value, 8> ivs(lbs.size());
|
||||
loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
|
||||
ivs.assign(loopIvs.begin(), loopIvs.end());
|
||||
auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
|
||||
MemRefIndexedValue fromHandle(from), toHandle(to);
|
||||
toHandle(activeIvs) = fromHandle(activeIvs);
|
||||
});
|
||||
mlir::scf::buildLoopNest(
|
||||
b, loc, lbs, ubs, steps,
|
||||
[&](OpBuilder &b, Location loc, ValueRange loopIvs) {
|
||||
ivs.assign(loopIvs.begin(), loopIvs.end());
|
||||
auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
|
||||
Value loaded = b.create<memref::LoadOp>(loc, from, activeIvs);
|
||||
b.create<memref::StoreOp>(loc, loaded, to, activeIvs);
|
||||
});
|
||||
|
||||
// Map the innermost loops to threads in reverse order.
|
||||
for (auto en :
|
||||
|
@ -142,17 +142,17 @@ static void insertCopies(Region ®ion, Location loc, Value from, Value to) {
|
|||
assert(llvm::hasSingleElement(region) &&
|
||||
"unstructured control flow not supported");
|
||||
|
||||
OpBuilder builder(region.getContext());
|
||||
builder.setInsertionPointToStart(®ion.front());
|
||||
OpBuilder b(region.getContext());
|
||||
b.setInsertionPointToStart(®ion.front());
|
||||
|
||||
ScopedContext edscContext(builder, loc);
|
||||
ScopedContext edscContext(b, loc);
|
||||
MemRefBoundsCapture fromBoundsCapture(from);
|
||||
insertCopyLoops(builder, loc, fromBoundsCapture, from, to);
|
||||
builder.create<gpu::BarrierOp>(loc);
|
||||
insertCopyLoops(b, loc, fromBoundsCapture, from, to);
|
||||
b.create<gpu::BarrierOp>(loc);
|
||||
|
||||
builder.setInsertionPoint(®ion.front().back());
|
||||
builder.create<gpu::BarrierOp>(loc);
|
||||
insertCopyLoops(builder, loc, fromBoundsCapture, to, from);
|
||||
b.setInsertionPoint(®ion.front().back());
|
||||
b.create<gpu::BarrierOp>(loc);
|
||||
insertCopyLoops(b, loc, fromBoundsCapture, to, from);
|
||||
}
|
||||
|
||||
/// Promotes a function argument to workgroup memory in the given function. The
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#include "mlir/Dialect/Linalg/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Math/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
|
|
|
@ -7,16 +7,11 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PassDetail.h"
|
||||
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
|
||||
#include "mlir/Dialect/Linalg/Passes.h"
|
||||
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
|
||||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/AffineMap.h"
|
||||
#include "mlir/IR/BlockAndValueMapping.h"
|
||||
|
@ -27,38 +22,67 @@
|
|||
#include "llvm/ADT/TypeSwitch.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::edsc;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
using namespace mlir::linalg;
|
||||
|
||||
using edsc::op::operator+;
|
||||
namespace {
|
||||
/// Helper struct to build simple arithmetic quantities with minimal type
|
||||
/// inference support.
|
||||
struct ArithBuilder {
|
||||
ArithBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {}
|
||||
|
||||
static SmallVector<Value, 8> makeCanonicalAffineApplies(OpBuilder &b,
|
||||
Location loc,
|
||||
AffineMap map,
|
||||
ArrayRef<Value> vals) {
|
||||
Value select(Value cmp, Value lhs, Value rhs) {
|
||||
return b.create<SelectOp>(loc, cmp, lhs, rhs);
|
||||
}
|
||||
Value slt(Value lhs, Value rhs) {
|
||||
if (lhs.getType().isa<IntegerType>())
|
||||
return b.create<CmpIOp>(loc, CmpIPredicate::slt, lhs, rhs);
|
||||
return b.create<CmpFOp>(loc, CmpFPredicate::OLT, lhs, rhs);
|
||||
}
|
||||
Value sgt(Value lhs, Value rhs) {
|
||||
if (lhs.getType().isa<IntegerType>())
|
||||
return b.create<CmpIOp>(loc, CmpIPredicate::sgt, lhs, rhs);
|
||||
return b.create<CmpFOp>(loc, CmpFPredicate::OGT, lhs, rhs);
|
||||
}
|
||||
Value add(Value lhs, Value rhs) {
|
||||
if (lhs.getType().isa<IntegerType>())
|
||||
return b.create<AddIOp>(loc, lhs, rhs);
|
||||
return b.create<AddFOp>(loc, lhs, rhs);
|
||||
}
|
||||
Value mul(Value lhs, Value rhs) {
|
||||
if (lhs.getType().isa<IntegerType>())
|
||||
return b.create<MulIOp>(loc, lhs, rhs);
|
||||
return b.create<MulFOp>(loc, lhs, rhs);
|
||||
}
|
||||
|
||||
OpBuilder &b;
|
||||
Location loc;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static SmallVector<Value> makeCanonicalAffineApplies(OpBuilder &b, Location loc,
|
||||
AffineMap map,
|
||||
ArrayRef<Value> vals) {
|
||||
if (map.isEmpty())
|
||||
return {};
|
||||
|
||||
assert(map.getNumInputs() == vals.size());
|
||||
SmallVector<Value, 8> res;
|
||||
SmallVector<Value> res;
|
||||
res.reserve(map.getNumResults());
|
||||
auto dims = map.getNumDims();
|
||||
for (auto e : map.getResults()) {
|
||||
auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e);
|
||||
SmallVector<Value, 4> operands(vals.begin(), vals.end());
|
||||
SmallVector<Value> operands(vals.begin(), vals.end());
|
||||
canonicalizeMapAndOperands(&exprMap, &operands);
|
||||
res.push_back(affine_apply(exprMap, operands));
|
||||
res.push_back(b.create<AffineApplyOp>(loc, exprMap, operands));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename IndexedValueType, typename OpType>
|
||||
static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
|
||||
ArrayRef<SmallVector<Value, 8>> indexing,
|
||||
template <typename LoadOpTy, typename StoreOpTy, typename OpType>
|
||||
static void inlineRegionAndEmitStore(OpBuilder &b, Location loc, OpType op,
|
||||
ArrayRef<Value> indexedValues,
|
||||
ArrayRef<SmallVector<Value>> indexing,
|
||||
ArrayRef<Value> outputBuffers) {
|
||||
assert(op->getNumRegions() == 1 && "Expected single region op");
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto &block = op->getRegion(0).front();
|
||||
BlockAndValueMapping map;
|
||||
map.map(block.getArguments(), indexedValues);
|
||||
|
@ -67,26 +91,24 @@ static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
|
|||
map.map(op.getResults(), newOp->getResults());
|
||||
}
|
||||
|
||||
Operation &terminator = block.back();
|
||||
assert(isa<linalg::YieldOp>(terminator) &&
|
||||
"expected a yield op in the end of the region");
|
||||
for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {
|
||||
IndexedValueType O(outputBuffers[i]);
|
||||
O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i));
|
||||
Operation *terminator = block.getTerminator();
|
||||
for (OpOperand &operand : terminator->getOpOperands()) {
|
||||
Value toStore = map.lookupOrDefault(operand.get());
|
||||
b.create<StoreOpTy>(loc, toStore, outputBuffers[operand.getOperandNumber()],
|
||||
indexing[operand.getOperandNumber()]);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a pair that contains input indices and output indices of a
|
||||
// SingleInputPoolingOp `op`.
|
||||
struct InputAndOutputIndices {
|
||||
SmallVector<Value, 8> inputs;
|
||||
SmallVector<Value, 8> outputs;
|
||||
SmallVector<Value> inputs;
|
||||
SmallVector<Value> outputs;
|
||||
};
|
||||
template <typename SingleInputPoolingOp>
|
||||
static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
|
||||
SingleInputPoolingOp op) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto loc = ScopedContext::getLocation();
|
||||
static InputAndOutputIndices
|
||||
getInputAndOutputIndices(OpBuilder &b, Location loc, ArrayRef<Value> allIvs,
|
||||
SingleInputPoolingOp op) {
|
||||
auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();
|
||||
auto maps = llvm::to_vector<8>(
|
||||
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
|
||||
|
@ -125,19 +147,18 @@ static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
|
|||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
template <typename IndexedValueType>
|
||||
static void emitScalarImplementation(ArrayRef<Value> allIvs,
|
||||
template <typename LoadOpTy, typename StoreOpTy>
|
||||
static void emitScalarImplementation(OpBuilder &b, Location loc,
|
||||
ArrayRef<Value> allIvs,
|
||||
LinalgOp linalgOp) {
|
||||
assert(linalgOp.hasBufferSemantics() &&
|
||||
"expected linalg op with buffer semantics");
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto loc = ScopedContext::getLocation();
|
||||
unsigned nInputs = linalgOp.getNumInputs();
|
||||
unsigned nOutputs = linalgOp.getNumOutputs();
|
||||
SmallVector<Value, 4> indexedValues;
|
||||
SmallVector<Value> indexedValues;
|
||||
indexedValues.reserve(nInputs + nOutputs);
|
||||
|
||||
auto allIvsPlusDims = SmallVector<Value, 4>(allIvs.begin(), allIvs.end());
|
||||
auto allIvsPlusDims = SmallVector<Value>(allIvs.begin(), allIvs.end());
|
||||
|
||||
// TODO: Avoid the loads if the corresponding argument of the
|
||||
// region has no uses.
|
||||
|
@ -145,46 +166,40 @@ static void emitScalarImplementation(ArrayRef<Value> allIvs,
|
|||
for (unsigned i = 0; i < nInputs; ++i) {
|
||||
auto indexing = makeCanonicalAffineApplies(
|
||||
b, loc, linalgOp.getInputIndexingMap(i), allIvsPlusDims);
|
||||
// Passing through IndexedValueType emits the proper load operation.
|
||||
indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing));
|
||||
indexedValues.push_back(
|
||||
b.create<LoadOpTy>(loc, linalgOp.getInput(i), indexing));
|
||||
}
|
||||
// 1.b. Emit load from output views.
|
||||
for (unsigned i = 0; i < nOutputs; ++i) {
|
||||
auto indexing = makeCanonicalAffineApplies(
|
||||
b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims);
|
||||
// Passing through IndexedValueType emits the proper load operation.
|
||||
indexedValues.push_back(
|
||||
IndexedValueType(linalgOp.getOutputBuffer(i))(indexing));
|
||||
b.create<LoadOpTy>(loc, linalgOp.getOutputBuffer(i), indexing));
|
||||
}
|
||||
|
||||
// TODO: When a region inliner exists, use it.
|
||||
// 2. Inline region, currently only works for a single basic block.
|
||||
// 3. Emit store.
|
||||
SmallVector<SmallVector<Value, 8>, 8> indexing;
|
||||
SmallVector<Value, 8> outputBuffers;
|
||||
SmallVector<SmallVector<Value>, 8> indexing;
|
||||
SmallVector<Value> outputBuffers;
|
||||
for (unsigned i = 0; i < nOutputs; ++i) {
|
||||
indexing.push_back(makeCanonicalAffineApplies(
|
||||
b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims));
|
||||
outputBuffers.push_back(linalgOp.getOutputBuffer(i));
|
||||
}
|
||||
inlineRegionAndEmitStore<IndexedValueType>(linalgOp, indexedValues, indexing,
|
||||
outputBuffers);
|
||||
inlineRegionAndEmitStore<LoadOpTy, StoreOpTy>(b, loc, linalgOp, indexedValues,
|
||||
indexing, outputBuffers);
|
||||
}
|
||||
|
||||
// Create a padded view into the given `input` tensor using the 'indices'
|
||||
// to access the tensor. `skipPadding` lists the dimensions for which no padding
|
||||
// is needed e.g. the non-spatial dimensions for convolutions.
|
||||
template <typename IndexedValueType>
|
||||
Value getPaddedInput(Value input, ArrayRef<Value> indices,
|
||||
ArrayRef<int> skipPadding, Value padValue) {
|
||||
// TODO: add a level of indirection to linalg.generic.
|
||||
|
||||
IndexedValueType indexedInput(input);
|
||||
|
||||
auto *context = ScopedContext::getContext();
|
||||
Value zeroIndex = std_constant_index(0);
|
||||
SmallVector<Value, 8> conds;
|
||||
SmallVector<Value, 8> clampedImIdx;
|
||||
Value getPaddedInput(OpBuilder &b, Location loc, Value input,
|
||||
ArrayRef<Value> indices, ArrayRef<int> skipPadding,
|
||||
Value padValue) {
|
||||
Value zeroIndex = b.create<ConstantIndexOp>(loc, 0);
|
||||
SmallVector<Value> conds;
|
||||
SmallVector<Value> clampedImIdx;
|
||||
for (auto iter : llvm::enumerate(indices)) {
|
||||
int idx = iter.index();
|
||||
auto dim = iter.value();
|
||||
|
@ -193,29 +208,33 @@ Value getPaddedInput(Value input, ArrayRef<Value> indices,
|
|||
continue;
|
||||
}
|
||||
|
||||
using edsc::op::sge;
|
||||
using edsc::op::slt;
|
||||
using edsc::op::operator||;
|
||||
Value leftOutOfBound = slt(dim, zeroIndex);
|
||||
Value leftOutOfBound =
|
||||
b.create<CmpIOp>(loc, CmpIPredicate::slt, dim, zeroIndex);
|
||||
if (conds.empty())
|
||||
conds.push_back(leftOutOfBound);
|
||||
else
|
||||
conds.push_back(conds.back() || leftOutOfBound);
|
||||
Value rightBound = memref_dim(input, idx);
|
||||
conds.push_back(conds.back() || (sge(dim, rightBound)));
|
||||
conds.push_back(b.create<OrOp>(loc, conds.back(), leftOutOfBound));
|
||||
Value rightBound = b.create<memref::DimOp>(loc, input, idx);
|
||||
Value rightOutOfBound =
|
||||
b.create<CmpIOp>(loc, CmpIPredicate::sge, dim, rightBound);
|
||||
conds.push_back(b.create<OrOp>(loc, conds.back(), rightOutOfBound));
|
||||
|
||||
// When padding is involved, the indices will only be shifted to negative,
|
||||
// so having a max op is enough.
|
||||
auto maxMap = AffineMap::get(/*dimCount=*/1, 0,
|
||||
{getAffineDimExpr(/*position=*/0, context),
|
||||
getAffineConstantExpr(0, context)},
|
||||
context);
|
||||
clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));
|
||||
MLIRContext *ctx = input.getContext();
|
||||
AffineExpr m = getAffineDimExpr(/*position=*/0, ctx),
|
||||
zero = getAffineConstantExpr(0, ctx);
|
||||
AffineMap maxMap =
|
||||
AffineMap::inferFromExprList(ArrayRef<ArrayRef<AffineExpr>>{{m, zero}})
|
||||
.front();
|
||||
clampedImIdx.push_back(b.create<AffineMaxOp>(loc, maxMap, ValueRange{dim}));
|
||||
}
|
||||
|
||||
Value readInput = indexedInput(clampedImIdx);
|
||||
return conds.empty() ? readInput
|
||||
: (Value)std_select(conds.back(), padValue, readInput);
|
||||
Value readInput = b.create<memref::LoadOp>(loc, input, clampedImIdx);
|
||||
if (conds.empty())
|
||||
return readInput;
|
||||
|
||||
return b.create<SelectOp>(loc, conds.back(), padValue, readInput);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -229,48 +248,47 @@ template <typename OpType> Attribute getPadValueAttr(Type type) {
|
|||
}
|
||||
|
||||
template <> Attribute getPadValueAttr<PoolingMaxOp>(Type type) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
if (auto floatType = type.dyn_cast<FloatType>()) {
|
||||
return b.getFloatAttr(
|
||||
floatType,
|
||||
APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true));
|
||||
return OpBuilder(type.getContext())
|
||||
.getFloatAttr(floatType, APFloat::getInf(floatType.getFloatSemantics(),
|
||||
/*Negative*/ true));
|
||||
}
|
||||
if (auto intType = type.dyn_cast<IntegerType>()) {
|
||||
unsigned width = intType.getWidth();
|
||||
// The select instruction used to lower the PoolingMin uses a signed
|
||||
// comparison, use a signed constant irrespective of the signedness of the
|
||||
// integer type.
|
||||
return b.getIntegerAttr(intType, APInt::getSignedMinValue(width));
|
||||
return OpBuilder(type.getContext())
|
||||
.getIntegerAttr(intType, APInt::getSignedMinValue(width));
|
||||
}
|
||||
llvm_unreachable("Unsupported data type for PoolingMaxOp");
|
||||
return {};
|
||||
}
|
||||
|
||||
template <> Attribute getPadValueAttr<PoolingMinOp>(Type type) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
if (auto floatType = type.dyn_cast<FloatType>()) {
|
||||
return b.getFloatAttr(floatType,
|
||||
APFloat::getInf(floatType.getFloatSemantics()));
|
||||
return OpBuilder(type.getContext())
|
||||
.getFloatAttr(floatType,
|
||||
APFloat::getInf(floatType.getFloatSemantics()));
|
||||
}
|
||||
if (auto intType = type.dyn_cast<IntegerType>()) {
|
||||
unsigned width = intType.getWidth();
|
||||
// The select instruction used to lower the PoolingMin uses a signed
|
||||
// comparison, use a signed constant irrespective of the signedness of the
|
||||
// integer type.
|
||||
return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
|
||||
return OpBuilder(type.getContext())
|
||||
.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
|
||||
}
|
||||
llvm_unreachable("Unsupported data type for PoolingMinOp");
|
||||
return {};
|
||||
}
|
||||
|
||||
template <> Attribute getPadValueAttr<PoolingSumOp>(Type type) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
return b.getZeroAttr(type);
|
||||
return OpBuilder(type.getContext()).getZeroAttr(type);
|
||||
}
|
||||
|
||||
template <> Attribute getPadValueAttr<ConvOp>(Type type) {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
return b.getZeroAttr(type);
|
||||
return OpBuilder(type.getContext()).getZeroAttr(type);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -284,38 +302,43 @@ static bool hasPadding(ConvOp convOp) {
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename IndexedValueType>
|
||||
static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
|
||||
template <typename LoadOpTy, typename StoreOpTy>
|
||||
static void emitScalarImplementation(OpBuilder &b, Location loc,
|
||||
ArrayRef<Value> allIvs, ConvOp convOp) {
|
||||
assert(convOp.hasBufferSemantics() &&
|
||||
"expected linalg op with buffer semantics");
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto loc = ScopedContext::getLocation();
|
||||
auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();
|
||||
auto maps = llvm::to_vector<8>(
|
||||
llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
|
||||
SmallVector<Value, 8> fIdx(
|
||||
makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
|
||||
SmallVector<Value, 8> imIdx(
|
||||
makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
|
||||
SmallVector<Value, 8> oIdx(
|
||||
makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
|
||||
SmallVector<Value> fIdx(makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
|
||||
SmallVector<Value> imIdx(makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
|
||||
SmallVector<Value> oIdx(makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
|
||||
|
||||
IndexedValueType F(convOp.filter()), O(convOp.output());
|
||||
Value filter = convOp.filter(), output = convOp.output();
|
||||
|
||||
// Emit scalar form. Padded conv involves an affine.max in the memory access
|
||||
// which is not allowed by affine.load. Override to use an MemRefIndexedValue
|
||||
// when there is non-zero padding.
|
||||
if (hasPadding(convOp)) {
|
||||
Type type = convOp.input().getType().cast<MemRefType>().getElementType();
|
||||
Value padValue = std_constant(type, getPadValueAttr<ConvOp>(type));
|
||||
Value paddedInput = getPaddedInput<MemRefIndexedValue>(
|
||||
convOp.input(), imIdx,
|
||||
/* Only need to pad the window dimensions */
|
||||
{0, static_cast<int>(imIdx.size()) - 1}, padValue);
|
||||
O(oIdx) += F(fIdx) * paddedInput;
|
||||
Value padValue =
|
||||
b.create<ConstantOp>(loc, type, getPadValueAttr<ConvOp>(type));
|
||||
Value paddedInput =
|
||||
getPaddedInput(b, loc, convOp.input(), imIdx,
|
||||
/* Only need to pad the window dimensions */
|
||||
{0, static_cast<int>(imIdx.size()) - 1}, padValue);
|
||||
Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
|
||||
Value mulVal = ArithBuilder(b, loc).mul(filterVal, paddedInput);
|
||||
Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
|
||||
Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
|
||||
b.create<StoreOpTy>(loc, addVal, output, oIdx);
|
||||
} else {
|
||||
IndexedValueType I(convOp.input());
|
||||
O(oIdx) += F(fIdx) * I(imIdx);
|
||||
Value inputVal = b.create<LoadOpTy>(loc, convOp.input(), imIdx);
|
||||
Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
|
||||
Value mulVal = ArithBuilder(b, loc).mul(filterVal, inputVal);
|
||||
Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
|
||||
Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
|
||||
b.create<StoreOpTy>(loc, addVal, output, oIdx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,55 +350,62 @@ template <typename PoolingOp> static bool hasPadding(PoolingOp poolingOp) {
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename IndexedValueType, typename PoolingOp>
|
||||
static Value getPoolingInput(PoolingOp op, ArrayRef<Value> inputIndices) {
|
||||
template <typename LoadOpTy, typename StoreOpTy, typename PoolingOp>
|
||||
static Value getPoolingInput(OpBuilder &b, Location loc, PoolingOp op,
|
||||
ArrayRef<Value> inputIndices) {
|
||||
if (hasPadding(op)) {
|
||||
Type type =
|
||||
op.input().getType().template cast<MemRefType>().getElementType();
|
||||
Value padValue = std_constant(type, getPadValueAttr<PoolingOp>(type));
|
||||
return getPaddedInput<MemRefIndexedValue>(op.input(), inputIndices,
|
||||
/*Pad every dimension*/ {},
|
||||
padValue);
|
||||
Value padValue =
|
||||
b.create<ConstantOp>(loc, type, getPadValueAttr<PoolingOp>(type));
|
||||
return getPaddedInput(b, loc, op.input(), inputIndices,
|
||||
/*Pad every dimension*/ {}, padValue);
|
||||
}
|
||||
IndexedValueType input(op.input());
|
||||
return input(inputIndices);
|
||||
return b.create<LoadOpTy>(loc, op.input(), inputIndices);
|
||||
}
|
||||
|
||||
template <typename IndexedValueType, typename OpType>
|
||||
void emitPoolingMinMaxScalarImplementation(ArrayRef<Value> allIvs, OpType op) {
|
||||
InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
|
||||
// Emit scalar form.
|
||||
IndexedValueType output(op.output());
|
||||
Value lhs = output(indices.outputs);
|
||||
Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
|
||||
using edsc::op::sgt;
|
||||
using edsc::op::slt;
|
||||
Value value = std::is_same<OpType, PoolingMinOp>()
|
||||
? std_select(slt(lhs, rhs), lhs, rhs)
|
||||
: std_select(sgt(lhs, rhs), lhs, rhs);
|
||||
output(indices.outputs) = value;
|
||||
template <typename LoadOpTy, typename StoreOpTy, typename OpType>
|
||||
void emitPoolingMinMaxScalarImplementation(OpBuilder &b, Location loc,
|
||||
ArrayRef<Value> allIvs, OpType op) {
|
||||
InputAndOutputIndices indices = getInputAndOutputIndices(b, loc, allIvs, op);
|
||||
Value lhs = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
|
||||
Value rhs = getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
|
||||
Value value = llvm::TypeSwitch<Operation *, Value>(op)
|
||||
.Case([&](PoolingMinOp poolingOp) {
|
||||
return ArithBuilder(b, loc).select(
|
||||
ArithBuilder(b, loc).slt(lhs, rhs), lhs, rhs);
|
||||
})
|
||||
.Case([&](PoolingMaxOp poolingOp) {
|
||||
return ArithBuilder(b, loc).select(
|
||||
ArithBuilder(b, loc).sgt(lhs, rhs), lhs, rhs);
|
||||
})
|
||||
.Default([&](auto) { return Value(); });
|
||||
b.create<StoreOpTy>(loc, value, op.output(), indices.outputs);
|
||||
}
|
||||
|
||||
template <typename IndexedValueType>
|
||||
static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {
|
||||
emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMaxOp>(allIvs,
|
||||
op);
|
||||
template <typename LoadOpTy, typename StoreOpTy>
|
||||
static void emitScalarImplementation(OpBuilder &b, Location loc,
|
||||
ArrayRef<Value> allIvs, PoolingMaxOp op) {
|
||||
emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMaxOp>(
|
||||
b, loc, allIvs, op);
|
||||
}
|
||||
|
||||
template <typename IndexedValueType>
|
||||
static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {
|
||||
emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMinOp>(allIvs,
|
||||
op);
|
||||
template <typename LoadOpTy, typename StoreOpTy>
|
||||
static void emitScalarImplementation(OpBuilder &b, Location loc,
|
||||
ArrayRef<Value> allIvs, PoolingMinOp op) {
|
||||
emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMinOp>(
|
||||
b, loc, allIvs, op);
|
||||
}
|
||||
|
||||
template <typename IndexedValueType>
|
||||
static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {
|
||||
auto indices = getInputAndOutputIndices(allIvs, op);
|
||||
IndexedValueType output(op.output());
|
||||
|
||||
// Emit scalar form.
|
||||
output(indices.outputs) +=
|
||||
getPoolingInput<IndexedValueType>(op, indices.inputs);
|
||||
template <typename LoadOpTy, typename StoreOpTy>
|
||||
static void emitScalarImplementation(OpBuilder &b, Location loc,
|
||||
ArrayRef<Value> allIvs, PoolingSumOp op) {
|
||||
auto indices = getInputAndOutputIndices(b, loc, allIvs, op);
|
||||
Value inputVal =
|
||||
getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
|
||||
Value outputVal = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
|
||||
Value added = ArithBuilder(b, loc).add(outputVal, inputVal);
|
||||
b.create<StoreOpTy>(loc, added, op.output(), indices.outputs);
|
||||
}
|
||||
|
||||
/// Replace the index operations in the body of the loop nest by the matching
|
||||
|
@ -413,8 +443,12 @@ static void replaceIndexOpsByInductionVariables(LinalgOp linalgOp,
|
|||
template <typename LoopTy>
|
||||
static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
|
||||
LinalgOp linalgOp) {
|
||||
using IndexedValueTy = typename GenerateLoopNest<LoopTy>::IndexedValueTy;
|
||||
ScopedContext scope(rewriter, linalgOp.getLoc());
|
||||
using LoadOpTy =
|
||||
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
|
||||
AffineLoadOp, memref::LoadOp>::type;
|
||||
using StoreOpTy =
|
||||
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
|
||||
AffineStoreOp, memref::StoreOp>::type;
|
||||
|
||||
// Canonicalize indexed_generic operations before lowering them to loops.
|
||||
if (isa<IndexedGenericOp>(linalgOp))
|
||||
|
@ -428,16 +462,18 @@ static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
|
|||
auto loopRanges = linalgOp.createLoopRanges(rewriter, linalgOp.getLoc());
|
||||
auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue());
|
||||
|
||||
SmallVector<Value, 4> allIvs;
|
||||
SmallVector<Value> allIvs;
|
||||
GenerateLoopNest<LoopTy>::doit(
|
||||
loopRanges, linalgOp, iteratorTypes,
|
||||
[&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector {
|
||||
rewriter, linalgOp.getLoc(), loopRanges, linalgOp, iteratorTypes,
|
||||
[&](OpBuilder &b, Location loc, ValueRange ivs,
|
||||
ValueRange iterArgs) -> scf::ValueVector {
|
||||
assert(iterArgs.empty() && "unexpected iterArgs");
|
||||
allIvs.append(ivs.begin(), ivs.end());
|
||||
llvm::TypeSwitch<Operation *>(linalgOp)
|
||||
.Case<ConvOp, PoolingMaxOp, PoolingMinOp, PoolingSumOp, LinalgOp>(
|
||||
[&](auto op) {
|
||||
emitScalarImplementation<IndexedValueTy>(allIvs, op);
|
||||
emitScalarImplementation<LoadOpTy, StoreOpTy>(b, loc, allIvs,
|
||||
op);
|
||||
})
|
||||
.Default([&](Operation *op) { assert(false && "unexpected op"); });
|
||||
return scf::ValueVector{};
|
||||
|
@ -499,7 +535,7 @@ struct TiledLoopToSCFPattern : public OpRewritePattern<TiledLoopOp> {
|
|||
tiledLoop.upperBound(), tiledLoop.step(),
|
||||
[&](OpBuilder &builder, Location loc, ValueRange ivs) {
|
||||
// Move body without its terminator.
|
||||
SmallVector<Value, 16> newBlockArgs;
|
||||
SmallVector<Value> newBlockArgs;
|
||||
newBlockArgs.append(ivs.begin(), ivs.end());
|
||||
newBlockArgs.append(tiledLoop.inputs().begin(),
|
||||
tiledLoop.inputs().end());
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
|
@ -225,69 +224,67 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
|
|||
// 2. Create the tiled loops.
|
||||
LinalgOp res = op;
|
||||
SmallVector<Value, 4> ivs, tensorResults;
|
||||
GenerateLoopNest<LoopTy>::doit(
|
||||
loopRanges, op, iteratorTypes,
|
||||
[&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector {
|
||||
auto &b = ScopedContext::getBuilderRef();
|
||||
auto loc = ScopedContext::getLocation();
|
||||
ivs.assign(localIvs.begin(), localIvs.end());
|
||||
auto tiledLoopBodyBuilder = [&](OpBuilder &b, Location loc,
|
||||
ValueRange localIvs,
|
||||
ValueRange iterArgs) -> scf::ValueVector {
|
||||
ivs.assign(localIvs.begin(), localIvs.end());
|
||||
|
||||
// When an `interchangeVector` is present, it has been applied to the
|
||||
// loop ranges and the iterator types. Apply its inverse to the
|
||||
// resulting loop `ivs` to match the op definition.
|
||||
SmallVector<Value, 4> interchangedIvs;
|
||||
if (!options.interchangeVector.empty())
|
||||
interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
|
||||
else
|
||||
interchangedIvs.assign(ivs.begin(), ivs.end());
|
||||
// When an `interchangeVector` is present, it has been applied to the
|
||||
// loop ranges and the iterator types. Apply its inverse to the
|
||||
// resulting loop `ivs` to match the op definition.
|
||||
SmallVector<Value, 4> interchangedIvs;
|
||||
if (!options.interchangeVector.empty())
|
||||
interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
|
||||
else
|
||||
interchangedIvs.assign(ivs.begin(), ivs.end());
|
||||
|
||||
assert(op.getNumOutputTensors() == iterArgs.size() &&
|
||||
"num output tensors must match number of loop iter arguments");
|
||||
assert(op.getNumOutputTensors() == iterArgs.size() &&
|
||||
"num output tensors must match number of loop iter arguments");
|
||||
|
||||
auto operands = llvm::to_vector<4>(op.getInputs());
|
||||
SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
|
||||
// TODO: thanks to simplifying assumption we do not need to worry about
|
||||
// order of output buffers and tensors: there is only ever one kind.
|
||||
assert(outputBuffers.empty() || iterArgs.empty());
|
||||
operands.append(outputBuffers.begin(), outputBuffers.end());
|
||||
operands.append(iterArgs.begin(), iterArgs.end());
|
||||
auto sizeBounds =
|
||||
applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
|
||||
SmallVector<Value, 4> tiledOperands = makeTiledShapes(
|
||||
b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
|
||||
auto nonShapedOperands = op.getAssumedNonShapedOperands();
|
||||
tiledOperands.append(nonShapedOperands.begin(),
|
||||
nonShapedOperands.end());
|
||||
auto operands = llvm::to_vector<4>(op.getInputs());
|
||||
SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
|
||||
// TODO: thanks to simplifying assumption we do not need to worry about
|
||||
// order of output buffers and tensors: there is only ever one kind.
|
||||
assert(outputBuffers.empty() || iterArgs.empty());
|
||||
operands.append(outputBuffers.begin(), outputBuffers.end());
|
||||
operands.append(iterArgs.begin(), iterArgs.end());
|
||||
auto sizeBounds =
|
||||
applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
|
||||
SmallVector<Value, 4> tiledOperands = makeTiledShapes(
|
||||
b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
|
||||
auto nonShapedOperands = op.getAssumedNonShapedOperands();
|
||||
tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end());
|
||||
|
||||
// TODO: use an interface/adaptor to avoid leaking position in
|
||||
// `tiledOperands`.
|
||||
SmallVector<Type, 4> resultTensorTypes;
|
||||
for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
|
||||
resultTensorTypes.push_back(
|
||||
tiledOperands[opOperand->getOperandNumber()].getType());
|
||||
// TODO: use an interface/adaptor to avoid leaking position in
|
||||
// `tiledOperands`.
|
||||
SmallVector<Type, 4> resultTensorTypes;
|
||||
for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
|
||||
resultTensorTypes.push_back(
|
||||
tiledOperands[opOperand->getOperandNumber()].getType());
|
||||
|
||||
res = op.clone(b, loc, resultTensorTypes, tiledOperands);
|
||||
res = op.clone(b, loc, resultTensorTypes, tiledOperands);
|
||||
|
||||
// Insert a subtensor_insert for each output tensor.
|
||||
unsigned resultIdx = 0;
|
||||
for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
|
||||
// TODO: use an interface/adaptor to avoid leaking position in
|
||||
// `tiledOperands`.
|
||||
Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
|
||||
if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
|
||||
tensorResults.push_back(b.create<SubTensorInsertOp>(
|
||||
loc, subtensor.source().getType(), res->getResult(resultIdx),
|
||||
subtensor.source(), subtensor.offsets(), subtensor.sizes(),
|
||||
subtensor.strides(), subtensor.static_offsets(),
|
||||
subtensor.static_sizes(), subtensor.static_strides()));
|
||||
} else {
|
||||
tensorResults.push_back(res->getResult(resultIdx));
|
||||
}
|
||||
++resultIdx;
|
||||
}
|
||||
return scf::ValueVector(tensorResults.begin(), tensorResults.end());
|
||||
},
|
||||
options.distribution);
|
||||
// Insert a subtensor_insert for each output tensor.
|
||||
unsigned resultIdx = 0;
|
||||
for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
|
||||
// TODO: use an interface/adaptor to avoid leaking position in
|
||||
// `tiledOperands`.
|
||||
Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
|
||||
if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
|
||||
tensorResults.push_back(b.create<SubTensorInsertOp>(
|
||||
loc, subtensor.source().getType(), res->getResult(resultIdx),
|
||||
subtensor.source(), subtensor.offsets(), subtensor.sizes(),
|
||||
subtensor.strides(), subtensor.static_offsets(),
|
||||
subtensor.static_sizes(), subtensor.static_strides()));
|
||||
} else {
|
||||
tensorResults.push_back(res->getResult(resultIdx));
|
||||
}
|
||||
++resultIdx;
|
||||
}
|
||||
return scf::ValueVector(tensorResults.begin(), tensorResults.end());
|
||||
};
|
||||
GenerateLoopNest<LoopTy>::doit(b, op.getLoc(), loopRanges, op, iteratorTypes,
|
||||
tiledLoopBodyBuilder, options.distribution);
|
||||
|
||||
// 3. Transform IndexOp results w.r.t. the tiling.
|
||||
transformIndexOps(b, res, ivs, loopIndexToRangeIndex);
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
|
@ -197,15 +196,14 @@ IntegerAttr getSmallestBoundingIndex(Value size) {
|
|||
/// Specialization to build an scf "for" nest.
|
||||
template <>
|
||||
void GenerateLoopNest<scf::ForOp>::doit(
|
||||
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
|
||||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions) {
|
||||
auto iterArgInitValues = linalgOp.getOutputTensors();
|
||||
// Create procInfo so it dominates loops, if appropriate.
|
||||
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
|
||||
Location loc = edsc::ScopedContext::getLocation();
|
||||
|
||||
SmallVector<ProcInfo, 4> procInfo;
|
||||
SmallVector<DistributionMethod, 0> distributionMethod;
|
||||
if (distributionOptions.hasValue()) {
|
||||
|
@ -219,13 +217,13 @@ void GenerateLoopNest<scf::ForOp>::doit(
|
|||
distributionMethod = distributionOptions->distributionMethod;
|
||||
if (distributionMethod.size() < parallelLoopRanges.size())
|
||||
parallelLoopRanges.resize(distributionMethod.size());
|
||||
procInfo = distributionOptions->procInfo(builder, loc, parallelLoopRanges);
|
||||
procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges);
|
||||
}
|
||||
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
unpackRanges(loopRanges, lbs, ubs, steps);
|
||||
LoopNest loopNest =
|
||||
edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
|
||||
LoopNest loopNest = mlir::scf::buildLoopNest(
|
||||
b, loc, lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
|
||||
|
||||
if (!distributionOptions || loopNest.loops.empty())
|
||||
return;
|
||||
|
@ -246,9 +244,11 @@ void GenerateLoopNest<scf::ForOp>::doit(
|
|||
/// Specialization to build affine "for" nest.
|
||||
template <>
|
||||
void GenerateLoopNest<AffineForOp>::doit(
|
||||
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
|
||||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions>) {
|
||||
auto iterArgInitValues = linalgOp.getOutputTensors();
|
||||
assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
|
||||
|
@ -264,38 +264,36 @@ void GenerateLoopNest<AffineForOp>::doit(
|
|||
constantSteps.push_back(op.getValue());
|
||||
}
|
||||
|
||||
auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
|
||||
bodyBuilderFn(ivs, {});
|
||||
};
|
||||
edsc::affineLoopNestBuilder(lbs, ubs, constantSteps,
|
||||
bodyBuilderWithoutIterArgsFn);
|
||||
mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
|
||||
[&](OpBuilder &b, Location loc, ValueRange ivs) {
|
||||
bodyBuilderFn(b, loc, ivs, {});
|
||||
});
|
||||
}
|
||||
|
||||
/// Specialization to build an linalg.tiled_loop
|
||||
template <>
|
||||
void GenerateLoopNest<TiledLoopOp>::doit(
|
||||
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
|
||||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions>) {
|
||||
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
|
||||
Location loc = edsc::ScopedContext::getLocation();
|
||||
SmallVector<ProcInfo, 2> procInfo;
|
||||
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
unpackRanges(loopRanges, lbs, ubs, steps);
|
||||
|
||||
auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc,
|
||||
ValueRange ivs, ValueRange inputs,
|
||||
ValueRange outputs) {
|
||||
ScopedContext context(nestedBuilder, nestedLoc);
|
||||
scf::ValueVector results = bodyBuilderFn(ivs, linalgOp.getOutputTensors());
|
||||
scf::ValueVector results = bodyBuilderFn(nestedBuilder, nestedLoc, ivs,
|
||||
linalgOp.getOutputTensors());
|
||||
nestedBuilder.create<linalg::YieldOp>(nestedLoc, results);
|
||||
};
|
||||
|
||||
auto tiledLoop = builder.create<TiledLoopOp>(
|
||||
auto tiledLoop = b.create<TiledLoopOp>(
|
||||
loc, lbs, ubs, steps, linalgOp.getInputs(), linalgOp.getOutputs(),
|
||||
builder.getArrayAttr(iteratorTypes), wrappedBuilderFn);
|
||||
b.getArrayAttr(iteratorTypes), wrappedBuilderFn);
|
||||
|
||||
// Replace inputs/outputs with the corresponding region args.
|
||||
auto isInsideTiledLoop = [&](OpOperand &operand) {
|
||||
|
@ -310,9 +308,9 @@ void GenerateLoopNest<TiledLoopOp>::doit(
|
|||
}
|
||||
|
||||
/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
|
||||
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
|
||||
Value procId, Value nprocs, Value &lb,
|
||||
Value &ub, Value &step) {
|
||||
void updateBoundsForCyclicDistribution(OpBuilder &b, Location loc, Value procId,
|
||||
Value nprocs, Value &lb, Value &ub,
|
||||
Value &step) {
|
||||
using edsc::op::operator+;
|
||||
using edsc::op::operator*;
|
||||
lb = lb + (procId * step);
|
||||
|
@ -329,20 +327,22 @@ void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
|
|||
// TODO: this function can be made iterative instead. However, it
|
||||
// will have at most as many recursive calls as nested loops, which rarely
|
||||
// exceeds 10.
|
||||
static void
|
||||
generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
||||
ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<void(ValueRange)> bodyBuilderFn,
|
||||
SmallVectorImpl<Value> &ivStorage,
|
||||
ArrayRef<DistributionMethod> distributionMethod = {}) {
|
||||
static void generateParallelLoopNest(
|
||||
OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
|
||||
ValueRange steps, ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
|
||||
SmallVectorImpl<Value> &ivStorage,
|
||||
ArrayRef<DistributionMethod> distributionMethod = {}) {
|
||||
assert(lbs.size() == ubs.size());
|
||||
assert(lbs.size() == steps.size());
|
||||
assert(lbs.size() == iteratorTypes.size());
|
||||
|
||||
// If there are no (more) loops to be generated, generate the body and be
|
||||
// done with it.
|
||||
if (iteratorTypes.empty())
|
||||
return bodyBuilderFn(ivStorage);
|
||||
if (iteratorTypes.empty()) {
|
||||
bodyBuilderFn(b, loc, ivStorage);
|
||||
return;
|
||||
}
|
||||
|
||||
// Find the outermost parallel loops and drop their types from the list.
|
||||
unsigned nLoops = iteratorTypes.size();
|
||||
|
@ -353,27 +353,29 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
|||
// recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
|
||||
// in this case.
|
||||
if (nOuterPar == 0) {
|
||||
edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) {
|
||||
ivStorage.push_back(iv);
|
||||
generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(),
|
||||
steps.drop_front(), iteratorTypes.drop_front(),
|
||||
bodyBuilderFn, ivStorage, distributionMethod);
|
||||
});
|
||||
LoopNest singleLoop = buildLoopNest(
|
||||
b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
|
||||
[&](OpBuilder &b, Location loc, ValueRange ivs) {
|
||||
ivStorage.append(ivs.begin(), ivs.end());
|
||||
generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(),
|
||||
steps.drop_front(),
|
||||
iteratorTypes.drop_front(), bodyBuilderFn,
|
||||
ivStorage, distributionMethod);
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (distributionMethod.empty()) {
|
||||
// Generate a single parallel loop-nest operation for all outermost
|
||||
// parallel loops and recurse.
|
||||
edsc::OperationBuilder<scf::ParallelOp>(
|
||||
lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
|
||||
b.create<scf::ParallelOp>(
|
||||
loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
|
||||
steps.take_front(nOuterPar),
|
||||
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
|
||||
edsc::ScopedContext context(nestedBuilder, nestedLoc);
|
||||
ivStorage.append(localIvs.begin(), localIvs.end());
|
||||
generateParallelLoopNest(
|
||||
lbs.drop_front(nOuterPar), ubs.drop_front(nOuterPar),
|
||||
steps.drop_front(nOuterPar), iteratorTypes.drop_front(nOuterPar),
|
||||
bodyBuilderFn, ivStorage,
|
||||
nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar),
|
||||
ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar),
|
||||
iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage,
|
||||
(distributionMethod.size() < nOuterPar)
|
||||
? ArrayRef<DistributionMethod>()
|
||||
: distributionMethod.drop_front(nOuterPar));
|
||||
|
@ -394,15 +396,14 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
|||
case DistributionMethod::Cyclic: {
|
||||
// Generate a single parallel loop-nest operation for all outermost
|
||||
// parallel loops and recurse.
|
||||
edsc::OperationBuilder<scf::ParallelOp>(
|
||||
lbs.take_front(numProcessed), ubs.take_front(numProcessed),
|
||||
b.create<scf::ParallelOp>(
|
||||
loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
|
||||
steps.take_front(numProcessed),
|
||||
[&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
|
||||
edsc::ScopedContext context(nestedBuilder, nestedLoc);
|
||||
ivStorage.append(localIvs.begin(), localIvs.end());
|
||||
generateParallelLoopNest(
|
||||
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
|
||||
steps.drop_front(numProcessed),
|
||||
nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
|
||||
ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
|
||||
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
|
||||
(distributionMethod.size() < numProcessed)
|
||||
? ArrayRef<DistributionMethod>()
|
||||
|
@ -418,12 +419,13 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
|||
for (unsigned i = 1; i < numProcessed; ++i)
|
||||
cond = cond && slt(lbs[i], ubs[i]);
|
||||
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
|
||||
edsc::conditionBuilder(cond, [&]() {
|
||||
b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
|
||||
generateParallelLoopNest(
|
||||
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
|
||||
b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
|
||||
steps.drop_front(numProcessed),
|
||||
iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
|
||||
distributionMethod.drop_front(numProcessed));
|
||||
b.create<scf::YieldOp>(loc, ValueRange{});
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
@ -432,7 +434,7 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
|||
// with inner loop generation.
|
||||
ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
|
||||
generateParallelLoopNest(
|
||||
lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
|
||||
b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
|
||||
steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
|
||||
bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
|
||||
return;
|
||||
|
@ -442,9 +444,11 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
|||
/// Specialization for generating a mix of parallel and sequential scf loops.
|
||||
template <>
|
||||
void GenerateLoopNest<scf::ParallelOp>::doit(
|
||||
ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
|
||||
ArrayRef<Attribute> iteratorTypes,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
|
||||
function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
|
||||
ValueRange)>
|
||||
bodyBuilderFn,
|
||||
Optional<LinalgLoopDistributionOptions> distributionOptions) {
|
||||
auto iterArgInitValues = linalgOp.getOutputTensors();
|
||||
assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
|
||||
|
@ -466,7 +470,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
|
|||
SmallVector<DistributionMethod, 0> distributionMethod;
|
||||
if (distributionOptions) {
|
||||
auto &options = distributionOptions.getValue();
|
||||
OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
|
||||
OpBuilder &b = edsc::ScopedContext::getBuilderRef();
|
||||
Location loc = edsc::ScopedContext::getLocation();
|
||||
distributionMethod.assign(distributionOptions->distributionMethod.begin(),
|
||||
distributionOptions->distributionMethod.end());
|
||||
|
@ -478,14 +482,14 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
|
|||
if (distributionMethod.size() < parallelLoopRanges.size())
|
||||
parallelLoopRanges.resize(distributionMethod.size());
|
||||
SmallVector<ProcInfo, 2> procInfo =
|
||||
options.procInfo(builder, loc, parallelLoopRanges);
|
||||
options.procInfo(b, loc, parallelLoopRanges);
|
||||
unsigned index = 0;
|
||||
for (auto iteratorType : enumerate(iteratorTypes)) {
|
||||
if (index >= procInfo.size())
|
||||
break;
|
||||
if (isParallelIteratorType(iteratorType.value())) {
|
||||
unsigned i = iteratorType.index();
|
||||
updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId,
|
||||
updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId,
|
||||
procInfo[index].nprocs, lbsStorage[i],
|
||||
ubsStorage[i], stepsStorage[i]);
|
||||
index++;
|
||||
|
@ -493,17 +497,17 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
|
|||
}
|
||||
}
|
||||
ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
|
||||
auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
|
||||
bodyBuilderFn(ivs, {});
|
||||
};
|
||||
generateParallelLoopNest(lbs, ubs, steps, iteratorTypes,
|
||||
bodyBuilderWithoutIterArgsFn, ivs,
|
||||
distributionMethod);
|
||||
generateParallelLoopNest(
|
||||
b, loc, lbs, ubs, steps, iteratorTypes,
|
||||
[&](OpBuilder &b, Location loc, ValueRange ivs) {
|
||||
bodyBuilderFn(b, loc, ivs, {});
|
||||
},
|
||||
ivs, distributionMethod);
|
||||
|
||||
assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
|
||||
}
|
||||
|
||||
SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
|
||||
SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
|
||||
LinalgOp linalgOp,
|
||||
ArrayRef<Value> tiledOperands,
|
||||
ValueRange ivs, ValueRange tileSizes,
|
||||
|
@ -529,7 +533,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
|
|||
LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n");
|
||||
}
|
||||
|
||||
MLIRContext *context = builder.getContext();
|
||||
MLIRContext *context = b.getContext();
|
||||
SmallVector<Value, 4> tiledShapes;
|
||||
tiledShapes.reserve(tiledOperands.size());
|
||||
for (auto en : llvm::enumerate(tiledOperands)) {
|
||||
|
@ -555,10 +559,10 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
|
|||
for (unsigned r = 0; r < rank; ++r) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
|
||||
if (!isTiled(map.getSubMap({r}), tileSizes)) {
|
||||
offsets.push_back(builder.getIndexAttr(0));
|
||||
offsets.push_back(b.getIndexAttr(0));
|
||||
Value dim = memref_dim(shapedOp, r).value;
|
||||
sizes.push_back(dim);
|
||||
strides.push_back(builder.getIndexAttr(1));
|
||||
strides.push_back(b.getIndexAttr(1));
|
||||
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
|
||||
continue;
|
||||
}
|
||||
|
@ -568,10 +572,9 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
|
|||
// (i.e. the op does not subsample, stepping occurs in the loop).
|
||||
auto m = map.getSubMap({r});
|
||||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: submap: " << map << "\n");
|
||||
auto offset = applyMapToValues(builder, loc, m, lbs).front();
|
||||
auto offset = applyMapToValues(b, loc, m, lbs).front();
|
||||
offsets.push_back(offset);
|
||||
auto closedIntSize =
|
||||
applyMapToValues(builder, loc, m, subShapeSizes).front();
|
||||
auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
|
||||
// Resulting size needs to be made half open interval again.
|
||||
auto size = closedIntSize + std_constant_index(1);
|
||||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
|
||||
|
@ -589,27 +592,29 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
|
|||
AffineExpr dim0, dim1, dim2;
|
||||
bindDims(context, dim0, dim1, dim2);
|
||||
// Compute min(size, dim - offset) to avoid out-of-bounds accesses.
|
||||
auto minMap = AffineMap::get(
|
||||
/*dimCount=*/3, /*symbolCount=*/0, {dim0, dim1 - dim2}, context);
|
||||
Value d = memref_dim(shapedOp, r);
|
||||
AffineMap minMap =
|
||||
AffineMap::inferFromExprList(
|
||||
ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
|
||||
.front();
|
||||
Value d = b.create<memref::DimOp>(loc, shapedOp, r);
|
||||
SmallVector<Value, 4> operands{size, d, offset};
|
||||
fullyComposeAffineMapAndOperands(&minMap, &operands);
|
||||
size = affine_min(builder.getIndexType(), minMap, operands);
|
||||
size = b.create<AffineMinOp>(loc, b.getIndexType(), minMap, operands);
|
||||
}
|
||||
|
||||
sizes.push_back(size);
|
||||
LLVM_DEBUG(llvm::dbgs()
|
||||
<< "makeTiledShapes: new offset: " << offset << "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new size: " << size << "\n");
|
||||
strides.push_back(builder.getIndexAttr(1));
|
||||
strides.push_back(b.getIndexAttr(1));
|
||||
}
|
||||
|
||||
if (shapedType.isa<MemRefType>())
|
||||
tiledShapes.push_back(builder.create<memref::SubViewOp>(
|
||||
loc, shapedOp, offsets, sizes, strides));
|
||||
tiledShapes.push_back(
|
||||
b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
|
||||
else
|
||||
tiledShapes.push_back(
|
||||
builder.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
|
||||
b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
|
||||
}
|
||||
|
||||
return tiledShapes;
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
add_mlir_dialect_library(MLIRSCF
|
||||
SCF.cpp
|
||||
EDSC/Builders.cpp
|
||||
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${MLIR_MAIN_INCLUDE_DIR}/mlir/LoopOps
|
||||
|
@ -9,7 +8,6 @@ add_mlir_dialect_library(MLIRSCF
|
|||
MLIRSCFOpsIncGen
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIREDSC
|
||||
MLIRIR
|
||||
MLIRLoopLikeInterface
|
||||
MLIRMemRef
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
//===- Builders.cpp - MLIR Declarative Builder Classes --------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/SCF/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/AffineMap.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::edsc;
|
||||
|
||||
mlir::scf::LoopNest
|
||||
mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
|
||||
function_ref<void(ValueRange)> fun) {
|
||||
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
|
||||
// the expected function interface.
|
||||
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
|
||||
return mlir::scf::buildLoopNest(
|
||||
ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
|
||||
steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) {
|
||||
ScopedContext context(builder, loc);
|
||||
if (fun)
|
||||
fun(ivs);
|
||||
});
|
||||
}
|
||||
|
||||
mlir::scf::LoopNest
|
||||
mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
|
||||
function_ref<void(Value)> fun) {
|
||||
// Delegates to the ValueRange-based version by wrapping the lambda.
|
||||
auto wrapper = [&](ValueRange ivs) {
|
||||
assert(ivs.size() == 1);
|
||||
if (fun)
|
||||
fun(ivs[0]);
|
||||
};
|
||||
return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step),
|
||||
wrapper);
|
||||
}
|
||||
|
||||
mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
|
||||
Value lb, Value ub, Value step, ValueRange iterArgInitValues,
|
||||
function_ref<scf::ValueVector(Value, ValueRange)> fun) {
|
||||
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
|
||||
// the expected function interface.
|
||||
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
|
||||
return mlir::scf::buildLoopNest(
|
||||
ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub,
|
||||
step, iterArgInitValues,
|
||||
[&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
|
||||
assert(ivs.size() == 1 && "expected one induction variable");
|
||||
ScopedContext context(builder, loc);
|
||||
if (fun)
|
||||
return fun(ivs[0], args);
|
||||
return scf::ValueVector(iterArgInitValues.begin(),
|
||||
iterArgInitValues.end());
|
||||
});
|
||||
}
|
||||
|
||||
mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
|
||||
ValueRange lbs, ValueRange ubs, ValueRange steps,
|
||||
ValueRange iterArgInitValues,
|
||||
function_ref<scf::ValueVector(ValueRange, ValueRange)> fun) {
|
||||
// Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
|
||||
// the expected function interface.
|
||||
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
|
||||
return mlir::scf::buildLoopNest(
|
||||
ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
|
||||
steps, iterArgInitValues,
|
||||
[&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
|
||||
ScopedContext context(builder, loc);
|
||||
if (fun)
|
||||
return fun(ivs, args);
|
||||
return scf::ValueVector(iterArgInitValues.begin(),
|
||||
iterArgInitValues.end());
|
||||
});
|
||||
}
|
||||
|
||||
static std::function<void(OpBuilder &, Location)>
|
||||
wrapIfBody(function_ref<scf::ValueVector()> body, TypeRange expectedTypes) {
|
||||
(void)expectedTypes;
|
||||
return [=](OpBuilder &builder, Location loc) {
|
||||
ScopedContext context(builder, loc);
|
||||
scf::ValueVector returned = body();
|
||||
assert(ValueRange(returned).getTypes() == expectedTypes &&
|
||||
"'if' body builder returned values of unexpected type");
|
||||
builder.create<scf::YieldOp>(loc, returned);
|
||||
};
|
||||
}
|
||||
|
||||
ValueRange
|
||||
mlir::edsc::conditionBuilder(TypeRange results, Value condition,
|
||||
function_ref<scf::ValueVector()> thenBody,
|
||||
function_ref<scf::ValueVector()> elseBody,
|
||||
scf::IfOp *ifOp) {
|
||||
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
|
||||
assert(thenBody && "thenBody is mandatory");
|
||||
|
||||
auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
|
||||
ScopedContext::getLocation(), results, condition,
|
||||
wrapIfBody(thenBody, results), wrapIfBody(elseBody, results));
|
||||
if (ifOp)
|
||||
*ifOp = newOp;
|
||||
return newOp.getResults();
|
||||
}
|
||||
|
||||
static std::function<void(OpBuilder &, Location)>
|
||||
wrapZeroResultIfBody(function_ref<void()> body) {
|
||||
return [=](OpBuilder &builder, Location loc) {
|
||||
ScopedContext context(builder, loc);
|
||||
body();
|
||||
builder.create<scf::YieldOp>(loc);
|
||||
};
|
||||
}
|
||||
|
||||
ValueRange mlir::edsc::conditionBuilder(Value condition,
|
||||
function_ref<void()> thenBody,
|
||||
function_ref<void()> elseBody,
|
||||
scf::IfOp *ifOp) {
|
||||
assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
|
||||
assert(thenBody && "thenBody is mandatory");
|
||||
|
||||
auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
|
||||
ScopedContext::getLocation(), condition, wrapZeroResultIfBody(thenBody),
|
||||
elseBody ? llvm::function_ref<void(OpBuilder &, Location)>(
|
||||
wrapZeroResultIfBody(elseBody))
|
||||
: llvm::function_ref<void(OpBuilder &, Location)>(nullptr));
|
||||
if (ifOp)
|
||||
*ifOp = newOp;
|
||||
return {};
|
||||
}
|
|
@ -24,18 +24,18 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
|
|||
// CHECK-SAME: [[M:arg[0-9]+]]: index
|
||||
// CHECK-SAME: [[N:arg[0-9]+]]: index
|
||||
// CHECK-SAME: [[K:arg[0-9]+]]: index
|
||||
// CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
|
||||
// CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
|
||||
// CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
|
||||
// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %{{.*}} {
|
||||
// CHECK-DAG: %[[a:.*]] = affine.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
|
||||
// CHECK-DAG: %[[b:.*]] = affine.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
|
||||
// CHECK: %[[A:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
|
||||
// CHECK: %[[B:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
|
||||
// CHECK: %[[C:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
|
||||
// CHECK: affine.for
|
||||
// CHECK: affine.for
|
||||
// CHECK: affine.for
|
||||
// CHECK-DAG: %[[a:.*]] = affine.load %[[A]]{{.*}} : memref<?x?xf32>
|
||||
// CHECK-DAG: %[[b:.*]] = affine.load %[[B]]{{.*}} : memref<?x?xf32>
|
||||
// CHECK-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
|
||||
// CHECK-DAG: %[[c:.*]] = affine.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
|
||||
// CHECK-DAG: %[[c:.*]] = affine.load %[[C]]{{.*}} : memref<?x?xf32>
|
||||
// CHECK-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32
|
||||
// CHECK: affine.store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
|
||||
// CHECK: affine.store %[[res]], %[[C]]{{.*}} : memref<?x?xf32>
|
||||
|
||||
func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
|
||||
linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
|
||||
|
@ -49,12 +49,12 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
|
|||
// CHECK: %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
|
||||
// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
|
||||
// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
|
||||
// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
|
||||
// CHECK: affine.for {{.*}}0 to %[[B]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[X0]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[K]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[Q]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[Z0]] {
|
||||
// CHECK: %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]{{.*}}
|
||||
// No padding needed here; only affine loads.
|
||||
// CHECK-NEXT: affine.load
|
||||
// CHECK-NEXT: affine.load
|
||||
|
@ -78,26 +78,26 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
|
|||
// CHECK: %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
|
||||
// CHECK: %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
|
||||
// CHECK: %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[B]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[X0]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[X1]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[K]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[Q]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[Z0]] {
|
||||
// CHECK: affine.for %{{.*}} = 0 to %[[Z1]] {
|
||||
// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
|
||||
// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
|
||||
// CHECK: affine.for {{.*}}0 to %[[B]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[X0]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[X1]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[K]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[Q]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[Z0]] {
|
||||
// CHECK: affine.for {{.*}}0 to %[[Z1]] {
|
||||
// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}
|
||||
// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}
|
||||
// CHECK: %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
|
||||
// CHECK: %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
|
||||
// Padded conv involves an affine.max in the memory access and this is not
|
||||
// allowed by affine.load. Use memref.load in such cases.
|
||||
// CHECK: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
|
||||
// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
|
||||
// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
|
||||
// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
|
||||
// CHECK: %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
|
||||
// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
|
||||
// CHECK: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
|
||||
// CHECK: memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
|
||||
// CHECK: select {{.*}} : f32
|
||||
// CHECK: affine.load
|
||||
// CHECK: mulf {{.*}} : f32
|
||||
// CHECK: affine.load
|
||||
// CHECK: addf {{.*}} : f32
|
||||
// CHECK: affine.store
|
||||
|
||||
//----------------------------------------------------------------------------//
|
||||
// Named ops to loops.
|
||||
|
@ -115,10 +115,10 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
|
|||
// CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
|
||||
// CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
|
||||
// CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
|
||||
// CHECK: affine.for %[[b:.*]] = 0 to %[[B]] {
|
||||
// CHECK: affine.for %[[m:.*]] = 0 to %[[M]] {
|
||||
// CHECK: affine.for %[[n:.*]] = 0 to %[[N]] {
|
||||
// CHECK: affine.for %[[k:.*]] = 0 to %[[K]] {
|
||||
// CHECK: affine.for %[[b:.*]] = {{.*}}0 to %[[B]] {
|
||||
// CHECK: affine.for %[[m:.*]] = {{.*}}0 to %[[M]] {
|
||||
// CHECK: affine.for %[[n:.*]] = {{.*}}0 to %[[N]] {
|
||||
// CHECK: affine.for %[[k:.*]] = {{.*}}0 to %[[K]] {
|
||||
// CHECK: %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
|
||||
// CHECK: %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
|
||||
// CHECK: %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue