forked from OSchip/llvm-project
[mlir][MemRef] NFC - Drop MemRef EDSC usage
Drop the MemRef dialect EDSC subdirectory and update all uses. Differential Revision: https://reviews.llvm.org/D102868
This commit is contained in:
parent
3eb12b0ae1
commit
e3cf7c88c4
|
@ -12,7 +12,6 @@
|
|||
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
//===- Intrinsics.h - MLIR EDSC Intrinsics for MemRefOps --------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
|
||||
#define MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
|
||||
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Builders.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/EDSC/Builders.h"
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace edsc {
|
||||
namespace intrinsics {
|
||||
|
||||
using memref_alloc = ValueBuilder<memref::AllocOp>;
|
||||
using memref_alloca = ValueBuilder<memref::AllocaOp>;
|
||||
using memref_cast = ValueBuilder<memref::CastOp>;
|
||||
using memref_dealloc = OperationBuilder<memref::DeallocOp>;
|
||||
using memref_dim = ValueBuilder<memref::DimOp>;
|
||||
using memref_load = ValueBuilder<memref::LoadOp>;
|
||||
using memref_store = OperationBuilder<memref::StoreOp>;
|
||||
using memref_sub_view = ValueBuilder<memref::SubViewOp>;
|
||||
using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
|
||||
using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
|
||||
using memref_view = ValueBuilder<memref::ViewOp>;
|
||||
|
||||
} // namespace intrinsics
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
static inline ::llvm::SmallVector<mlir::Value, 8>
|
||||
getMemRefSizes(mlir::Value memRef) {
|
||||
using namespace mlir;
|
||||
using namespace mlir::edsc;
|
||||
using namespace mlir::edsc::intrinsics;
|
||||
mlir::MemRefType memRefType = memRef.getType().cast<mlir::MemRefType>();
|
||||
assert(isStrided(memRefType) && "Expected strided MemRef type");
|
||||
|
||||
SmallVector<mlir::Value, 8> res;
|
||||
res.reserve(memRefType.getShape().size());
|
||||
const auto &shape = memRefType.getShape();
|
||||
for (unsigned idx = 0, n = shape.size(); idx < n; ++idx) {
|
||||
if (shape[idx] == -1)
|
||||
res.push_back(memref_dim(memRef, idx));
|
||||
else
|
||||
res.push_back(std_constant_index(shape[idx]));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
namespace mlir {
|
||||
namespace edsc {
|
||||
|
||||
/// A MemRefBoundsCapture represents the information required to step through a
|
||||
/// MemRef. It has placeholders for non-contiguous tensors that fit within the
|
||||
/// Fortran subarray model.
|
||||
/// At the moment it can only capture a MemRef with an identity layout map.
|
||||
// TODO: Support MemRefs with layoutMaps.
|
||||
class MemRefBoundsCapture : public BoundsCapture {
|
||||
public:
|
||||
explicit MemRefBoundsCapture(Value v) {
|
||||
auto memrefSizeValues = getMemRefSizes(v);
|
||||
for (auto s : memrefSizeValues) {
|
||||
lbs.push_back(intrinsics::std_constant_index(0));
|
||||
ubs.push_back(s);
|
||||
steps.push_back(1);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned fastestVarying() const { return rank() - 1; }
|
||||
|
||||
private:
|
||||
Value base;
|
||||
};
|
||||
|
||||
} // namespace edsc
|
||||
} // namespace mlir
|
||||
|
||||
#endif // MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
|
|
@ -63,7 +63,7 @@ public:
|
|||
/// Create an operation of specific op type at the current insertion point and
|
||||
/// location.
|
||||
template <typename OpTy, typename... Args>
|
||||
OpTy create(Args &&... args) {
|
||||
OpTy create(Args &&...args) {
|
||||
return OpBuilder::create<OpTy>(curLoc, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
|
@ -71,7 +71,7 @@ public:
|
|||
/// and immediately try to fold it. This functions populates 'results' with
|
||||
/// the results after folding the operation.
|
||||
template <typename OpTy, typename... Args>
|
||||
void createOrFold(llvm::SmallVectorImpl<Value> &results, Args &&... args) {
|
||||
void createOrFold(llvm::SmallVectorImpl<Value> &results, Args &&...args) {
|
||||
OpBuilder::createOrFold<OpTy>(results, curLoc, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
|
@ -79,7 +79,7 @@ public:
|
|||
template <typename OpTy, typename... Args>
|
||||
typename std::enable_if<OpTy::template hasTrait<mlir::OpTrait::OneResult>(),
|
||||
Value>::type
|
||||
createOrFold(Args &&... args) {
|
||||
createOrFold(Args &&...args) {
|
||||
return OpBuilder::createOrFold<OpTy>(curLoc, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ public:
|
|||
template <typename OpTy, typename... Args>
|
||||
typename std::enable_if<OpTy::template hasTrait<mlir::OpTrait::ZeroResult>(),
|
||||
OpTy>::type
|
||||
createOrFold(Args &&... args) {
|
||||
createOrFold(Args &&...args) {
|
||||
return OpBuilder::createOrFold<OpTy>(curLoc, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@
|
|||
|
||||
#include "mlir/Dialect/GPU/MemoryPromotion.h"
|
||||
#include "mlir/Dialect/GPU/GPUDialect.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/SCF/SCF.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/IR/ImplicitLocOpBuilder.h"
|
||||
#include "mlir/Pass/Pass.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
|
||||
|
@ -41,45 +41,46 @@ static StringRef getDimName(unsigned dim) {
|
|||
/// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
|
||||
/// single-iteration loops. Maps the innermost loops to thread dimensions, in
|
||||
/// reverse order to enable access coalescing in the innermost loop.
|
||||
static void insertCopyLoops(OpBuilder &b, Location loc,
|
||||
MemRefBoundsCapture &bounds, Value from, Value to) {
|
||||
// Create EDSC handles for bounds.
|
||||
unsigned rank = bounds.rank();
|
||||
static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) {
|
||||
auto memRefType = from.getType().cast<MemRefType>();
|
||||
auto rank = memRefType.getRank();
|
||||
|
||||
SmallVector<Value, 4> lbs, ubs, steps;
|
||||
Value zero = b.create<ConstantIndexOp>(0);
|
||||
Value one = b.create<ConstantIndexOp>(1);
|
||||
|
||||
// Make sure we have enough loops to use all thread dimensions, these trivial
|
||||
// loops should be outermost and therefore inserted first.
|
||||
if (rank < GPUDialect::getNumWorkgroupDimensions()) {
|
||||
unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions() - rank;
|
||||
Value zero = std_constant_index(0);
|
||||
Value one = std_constant_index(1);
|
||||
lbs.resize(extraLoops, zero);
|
||||
ubs.resize(extraLoops, one);
|
||||
steps.resize(extraLoops, one);
|
||||
}
|
||||
|
||||
// Add existing bounds.
|
||||
lbs.append(bounds.getLbs().begin(), bounds.getLbs().end());
|
||||
ubs.append(bounds.getUbs().begin(), bounds.getUbs().end());
|
||||
|
||||
// Emit constant operations for steps.
|
||||
lbs.append(rank, zero);
|
||||
ubs.reserve(lbs.size());
|
||||
steps.reserve(lbs.size());
|
||||
llvm::transform(bounds.getSteps(), std::back_inserter(steps),
|
||||
[](int64_t step) { return std_constant_index(step); });
|
||||
for (auto idx = 0; idx < rank; ++idx) {
|
||||
ubs.push_back(
|
||||
b.createOrFold<memref::DimOp>(from, b.create<ConstantIndexOp>(idx)));
|
||||
steps.push_back(one);
|
||||
}
|
||||
|
||||
// Obtain thread identifiers and block sizes, necessary to map to them.
|
||||
auto indexType = b.getIndexType();
|
||||
SmallVector<Value, 3> threadIds, blockDims;
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
auto dimName = b.getStringAttr(getDimName(i));
|
||||
threadIds.push_back(b.create<gpu::ThreadIdOp>(loc, indexType, dimName));
|
||||
blockDims.push_back(b.create<gpu::BlockDimOp>(loc, indexType, dimName));
|
||||
threadIds.push_back(b.create<gpu::ThreadIdOp>(indexType, dimName));
|
||||
blockDims.push_back(b.create<gpu::BlockDimOp>(indexType, dimName));
|
||||
}
|
||||
|
||||
// Produce the loop nest with copies.
|
||||
SmallVector<Value, 8> ivs(lbs.size());
|
||||
mlir::scf::buildLoopNest(
|
||||
b, loc, lbs, ubs, steps,
|
||||
b, b.getLoc(), lbs, ubs, steps,
|
||||
[&](OpBuilder &b, Location loc, ValueRange loopIvs) {
|
||||
ivs.assign(loopIvs.begin(), loopIvs.end());
|
||||
auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
|
||||
|
@ -142,17 +143,13 @@ static void insertCopies(Region ®ion, Location loc, Value from, Value to) {
|
|||
assert(llvm::hasSingleElement(region) &&
|
||||
"unstructured control flow not supported");
|
||||
|
||||
OpBuilder b(region.getContext());
|
||||
b.setInsertionPointToStart(®ion.front());
|
||||
|
||||
ScopedContext edscContext(b, loc);
|
||||
MemRefBoundsCapture fromBoundsCapture(from);
|
||||
insertCopyLoops(b, loc, fromBoundsCapture, from, to);
|
||||
b.create<gpu::BarrierOp>(loc);
|
||||
auto b = ImplicitLocOpBuilder::atBlockBegin(loc, ®ion.front());
|
||||
insertCopyLoops(b, from, to);
|
||||
b.create<gpu::BarrierOp>();
|
||||
|
||||
b.setInsertionPoint(®ion.front().back());
|
||||
b.create<gpu::BarrierOp>(loc);
|
||||
insertCopyLoops(b, loc, fromBoundsCapture, to, from);
|
||||
b.create<gpu::BarrierOp>();
|
||||
insertCopyLoops(b, to, from);
|
||||
}
|
||||
|
||||
/// Promotes a function argument to workgroup memory in the given function. The
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include "mlir/Dialect/Linalg/Passes.h"
|
||||
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
|
||||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
|
@ -114,13 +113,13 @@ getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth,
|
|||
/// Fuses the producer by cloning the `producer`. The `fusedLoopsAndRanges`
|
||||
/// provides the loop range information for the fused loops. The rest are
|
||||
/// obtained from the producer itself, since they are not tiled + fused.
|
||||
static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
|
||||
static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
|
||||
const DenseMap<unsigned, Range> &fusedLoopsAndRanges) {
|
||||
SmallVector<Value, 8> ivs, tileSizes, sizeBounds;
|
||||
SmallVector<Range, 8> loopRanges;
|
||||
auto zero = std_constant_index(0);
|
||||
auto one = std_constant_index(1);
|
||||
Location loc = producer.getLoc();
|
||||
auto zero = b.create<ConstantIndexOp>(loc, 0);
|
||||
auto one = b.create<ConstantIndexOp>(loc, 1);
|
||||
|
||||
for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
|
||||
auto it = fusedLoopsAndRanges.find(i);
|
||||
|
@ -133,7 +132,8 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
|
|||
<< loopRanges.back() << "\n");
|
||||
} else {
|
||||
auto shapeDim = getShapeDefiningLoopRange(producer, i);
|
||||
Value dim = memref_dim(shapeDim.shape, shapeDim.dimension);
|
||||
Value dim = b.createOrFold<memref::DimOp>(loc, shapeDim.shape,
|
||||
shapeDim.dimension);
|
||||
tileSizes.push_back(zero);
|
||||
sizeBounds.push_back(dim);
|
||||
loopRanges.push_back(Range{zero, dim, one});
|
||||
|
@ -147,8 +147,8 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
|
|||
|
||||
// Compute subranges for all tensor input/output operands.
|
||||
auto tiledOperands = llvm::to_vector<4>(producer.getShapedOperands());
|
||||
clonedShapes.append(makeTiledShapes(builder, loc, producer, tiledOperands,
|
||||
ivs, tileSizes, sizeBounds));
|
||||
clonedShapes.append(makeTiledShapes(b, loc, producer, tiledOperands, ivs,
|
||||
tileSizes, sizeBounds));
|
||||
|
||||
// Append the other operands.
|
||||
auto operands = producer.getAssumedNonShapedOperands();
|
||||
|
@ -172,7 +172,7 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
|
|||
staticStridesVector));
|
||||
}
|
||||
|
||||
Operation *clonedOp = producer.clone(builder, loc, resultTypes, clonedShapes);
|
||||
Operation *clonedOp = producer.clone(b, loc, resultTypes, clonedShapes);
|
||||
// When the producer has index semantics, we have to transform the indices of
|
||||
// the producer according to the tiling of the consumer, i.e. offset them by
|
||||
// the values computed in `loopRanges`.
|
||||
|
@ -184,11 +184,11 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
|
|||
// Shift all indices by the tile offset.
|
||||
Block &block = clonedOp->getRegion(0).front();
|
||||
for (IndexOp indexOp : block.getOps<IndexOp>()) {
|
||||
OpBuilder::InsertionGuard g(builder);
|
||||
builder.setInsertionPointAfter(indexOp);
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
b.setInsertionPointAfter(indexOp);
|
||||
AffineExpr index, offset;
|
||||
bindDims(builder.getContext(), index, offset);
|
||||
AffineApplyOp applyOp = builder.create<AffineApplyOp>(
|
||||
bindDims(b.getContext(), index, offset);
|
||||
AffineApplyOp applyOp = b.create<AffineApplyOp>(
|
||||
indexOp.getLoc(), index + offset,
|
||||
ValueRange{indexOp.getResult(), loopRanges[indexOp.dim()].offset});
|
||||
indexOp.getResult().replaceAllUsesExcept(applyOp, applyOp);
|
||||
|
@ -770,17 +770,18 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences(
|
|||
|
||||
/// Tile the fused loops in the root operation, by setting the tile sizes for
|
||||
/// all other loops to zero (those will be tiled later).
|
||||
static Optional<TiledLinalgOp> tileRootOperation(
|
||||
OpBuilder &builder, LinalgOp op, ArrayRef<Value> tileSizeVector,
|
||||
const LinalgTilingOptions &options, const std::set<unsigned> &fusedLoops) {
|
||||
static Optional<TiledLinalgOp>
|
||||
tileRootOperation(OpBuilder &b, LinalgOp op, ArrayRef<Value> tileSizeVector,
|
||||
const LinalgTilingOptions &options,
|
||||
const std::set<unsigned> &fusedLoops) {
|
||||
SmallVector<Value, 4> tileSizes(tileSizeVector.begin(), tileSizeVector.end());
|
||||
auto zero = std_constant_index(0);
|
||||
auto zero = b.create<ConstantIndexOp>(op.getLoc(), 0);
|
||||
for (unsigned i = 0, e = tileSizes.size(); i != e; ++i)
|
||||
if (!fusedLoops.count(i))
|
||||
tileSizes[i] = zero;
|
||||
LinalgTilingOptions tileFusedLoopsOptions = options;
|
||||
tileFusedLoopsOptions.setTileSizes(tileSizes);
|
||||
return tileLinalgOp(builder, op, tileFusedLoopsOptions);
|
||||
return tileLinalgOp(b, op, tileFusedLoopsOptions);
|
||||
}
|
||||
|
||||
/// Fuse the operations in `fusionCandidates` with `tiledOp`. Latter is expected
|
||||
|
@ -788,19 +789,19 @@ static Optional<TiledLinalgOp> tileRootOperation(
|
|||
/// `fusionCandidates`, i.e. move the operation within the inter-tile loops of
|
||||
/// `tiledOp`.
|
||||
static SmallVector<LinalgOp, 1>
|
||||
fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
|
||||
fuseOperations(OpBuilder &b, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
|
||||
ArrayRef<LinalgOp> fusionCandidates,
|
||||
const FusableOpDependencesTy &fusableDependences,
|
||||
const std::set<unsigned> &fusedLoops) {
|
||||
LinalgOp tiledOp = tiledLinalgOp.op;
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPoint(tiledOp);
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
b.setInsertionPoint(tiledOp);
|
||||
|
||||
DenseMap<unsigned, Range> fusedLoopsAndRanges;
|
||||
for (unsigned loop : fusedLoops) {
|
||||
ShapeDimension shapeDim = getShapeDefiningLoopRange(tiledOp, loop, true);
|
||||
fusedLoopsAndRanges[loop] = getRangeFromOperandShape(
|
||||
builder, tiledOp.getLoc(), shapeDim.shape, shapeDim.dimension);
|
||||
b, tiledOp.getLoc(), shapeDim.shape, shapeDim.dimension);
|
||||
}
|
||||
|
||||
SmallVector<LinalgOp, 1> fusedOps(fusionCandidates.size());
|
||||
|
@ -808,13 +809,12 @@ fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
|
|||
origOpToFusedOp[rootOp.getOperation()] = tiledOp;
|
||||
for (auto candidate : enumerate(llvm::reverse(fusionCandidates))) {
|
||||
LinalgOp origOp = candidate.value();
|
||||
LinalgOp fusedOp = fuse(builder, origOp, fusedLoopsAndRanges);
|
||||
LinalgOp fusedOp = fuse(b, origOp, fusedLoopsAndRanges);
|
||||
origOpToFusedOp[origOp.getOperation()] = fusedOp;
|
||||
fusedOps[fusionCandidates.size() - candidate.index() - 1] = fusedOp;
|
||||
|
||||
// Prepare the builder for the next insertion point.
|
||||
auto guard =
|
||||
llvm::make_scope_exit([&]() { builder.setInsertionPoint(fusedOp); });
|
||||
// Prepare the b for the next insertion point.
|
||||
auto guard = llvm::make_scope_exit([&]() { b.setInsertionPoint(fusedOp); });
|
||||
if (!origOp.hasTensorSemantics())
|
||||
continue;
|
||||
|
||||
|
@ -860,7 +860,7 @@ fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
|
|||
|
||||
template <typename LoopType>
|
||||
static Optional<TiledAndFusedLinalgOps>
|
||||
tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
|
||||
tileAndFuseLinalgOpsImpl(OpBuilder &b, ArrayRef<LinalgOp> ops,
|
||||
const LinalgDependenceGraph &dependenceGraph,
|
||||
const LinalgTilingOptions &tilingOptions) {
|
||||
if (ops.size() < 2)
|
||||
|
@ -884,9 +884,9 @@ tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
|
|||
return llvm::None;
|
||||
}
|
||||
|
||||
OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPoint(rootOp);
|
||||
ScopedContext scope(builder, rootOp.getLoc());
|
||||
OpBuilder::InsertionGuard guard(b);
|
||||
b.setInsertionPoint(rootOp);
|
||||
ScopedContext scope(b, rootOp.getLoc());
|
||||
|
||||
// Find all the producers.
|
||||
LLVM_DEBUG(llvm::dbgs() << "findAllFusableDependences\n");
|
||||
|
@ -911,9 +911,9 @@ tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
|
|||
|
||||
// Tile the fused loops in the last operation in the list.
|
||||
SmallVector<Value, 4> tileSizeVector =
|
||||
tilingOptions.tileSizeComputationFunction(builder, rootOp);
|
||||
tilingOptions.tileSizeComputationFunction(b, rootOp);
|
||||
Optional<TiledLinalgOp> tiledRootOp = tileRootOperation(
|
||||
builder, rootOp, tileSizeVector, tilingOptions, ret.fusedLoopDims);
|
||||
b, rootOp, tileSizeVector, tilingOptions, ret.fusedLoopDims);
|
||||
if (!tiledRootOp) {
|
||||
rootOp.emitRemark("failed to tile the fused loops");
|
||||
return llvm::None;
|
||||
|
@ -922,24 +922,23 @@ tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
|
|||
ret.fusedLoops.assign(tiledRootOp->loops.begin(), tiledRootOp->loops.end());
|
||||
|
||||
// Fuse the other operations into the fused inter-tile loops produced above.
|
||||
ret.fusedProducers =
|
||||
fuseOperations(builder, rootOp, *tiledRootOp, ops.drop_back(),
|
||||
fusableDependences, ret.fusedLoopDims);
|
||||
ret.fusedProducers = fuseOperations(b, rootOp, *tiledRootOp, ops.drop_back(),
|
||||
fusableDependences, ret.fusedLoopDims);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Optional<TiledAndFusedLinalgOps>
|
||||
mlir::linalg::tileAndFuseLinalgOps(OpBuilder &builder, ArrayRef<LinalgOp> ops,
|
||||
mlir::linalg::tileAndFuseLinalgOps(OpBuilder &b, ArrayRef<LinalgOp> ops,
|
||||
const LinalgDependenceGraph &dependenceGraph,
|
||||
const LinalgTilingOptions &tilingOptions) {
|
||||
switch (tilingOptions.loopType) {
|
||||
case LinalgTilingLoopType::Loops:
|
||||
return tileAndFuseLinalgOpsImpl<scf::ForOp>(builder, ops, dependenceGraph,
|
||||
return tileAndFuseLinalgOpsImpl<scf::ForOp>(b, ops, dependenceGraph,
|
||||
tilingOptions);
|
||||
case LinalgTilingLoopType::ParallelLoops:
|
||||
return tileAndFuseLinalgOpsImpl<scf::ParallelOp>(
|
||||
builder, ops, dependenceGraph, tilingOptions);
|
||||
return tileAndFuseLinalgOpsImpl<scf::ParallelOp>(b, ops, dependenceGraph,
|
||||
tilingOptions);
|
||||
default:;
|
||||
}
|
||||
return llvm::None;
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
#include "mlir/Dialect/Linalg/Passes.h"
|
||||
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
|
||||
#include "mlir/Dialect/Linalg/Utils/Utils.h"
|
||||
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
|
@ -75,8 +74,8 @@ makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,
|
|||
// Create a new range with the applied tile sizes.
|
||||
SmallVector<Range, 4> res;
|
||||
for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
|
||||
res.push_back(
|
||||
Range{std_constant_index(0), shapeSizes[idx], tileSizes[idx]});
|
||||
res.push_back(Range{b.create<ConstantIndexOp>(loc, 0), shapeSizes[idx],
|
||||
tileSizes[idx]});
|
||||
return std::make_tuple(res, loopIndexToRangeIndex);
|
||||
}
|
||||
|
||||
|
@ -330,7 +329,7 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
|
|||
SmallVector<Value, 4> tileSizeVector =
|
||||
options.tileSizeComputationFunction(b, op);
|
||||
if (tileSizeVector.size() < nLoops) {
|
||||
auto zero = std_constant_index(0);
|
||||
auto zero = b.create<ConstantIndexOp>(op.getLoc(), 0);
|
||||
tileSizeVector.append(nLoops - tileSizeVector.size(), zero);
|
||||
}
|
||||
|
||||
|
|
|
@ -525,10 +525,11 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
|
|||
for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
|
||||
bool isTiled = !isZero(tileSizes[idx]);
|
||||
lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0));
|
||||
lbs.push_back(isTiled ? ivs[idxIvs++]
|
||||
: (Value)b.create<ConstantIndexOp>(loc, 0));
|
||||
// Before composing, we need to make range a closed interval.
|
||||
Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
|
||||
subShapeSizes.push_back(size - std_constant_index(1));
|
||||
subShapeSizes.push_back(size - b.create<ConstantIndexOp>(loc, 1));
|
||||
LLVM_DEBUG(llvm::dbgs() << "lb: " << lbs.back() << "\n");
|
||||
LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n");
|
||||
}
|
||||
|
@ -560,7 +561,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
|
|||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
|
||||
if (!isTiled(map.getSubMap({r}), tileSizes)) {
|
||||
offsets.push_back(b.getIndexAttr(0));
|
||||
Value dim = memref_dim(shapedOp, r).value;
|
||||
Value dim = b.createOrFold<memref::DimOp>(loc, shapedOp, r);
|
||||
sizes.push_back(dim);
|
||||
strides.push_back(b.getIndexAttr(1));
|
||||
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
|
||||
|
@ -576,7 +577,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
|
|||
offsets.push_back(offset);
|
||||
auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
|
||||
// Resulting size needs to be made half open interval again.
|
||||
auto size = closedIntSize + std_constant_index(1);
|
||||
auto size = closedIntSize + b.create<ConstantIndexOp>(loc, 1);
|
||||
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
|
||||
|
||||
// The size of the subview / subtensor should be trimmed to avoid
|
||||
|
|
Loading…
Reference in New Issue