[mlir][MemRef] NFC - Drop MemRef EDSC usage

Drop the MemRef dialect EDSC subdirectory and update all uses.

Differential Revision: https://reviews.llvm.org/D102868
This commit is contained in:
Nicolas Vasilache 2021-05-20 20:05:55 +00:00
parent 3eb12b0ae1
commit e3cf7c88c4
7 changed files with 71 additions and 162 deletions

View File

@ -12,7 +12,6 @@
#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"

View File

@ -1,86 +0,0 @@
//===- Intrinsics.h - MLIR EDSC Intrinsics for MemRefOps --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
#define MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/StandardOps/EDSC/Builders.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/EDSC/Builders.h"
#include "llvm/ADT/SmallVector.h"
namespace mlir {
namespace edsc {
namespace intrinsics {
using memref_alloc = ValueBuilder<memref::AllocOp>;
using memref_alloca = ValueBuilder<memref::AllocaOp>;
using memref_cast = ValueBuilder<memref::CastOp>;
using memref_dealloc = OperationBuilder<memref::DeallocOp>;
using memref_dim = ValueBuilder<memref::DimOp>;
using memref_load = ValueBuilder<memref::LoadOp>;
using memref_store = OperationBuilder<memref::StoreOp>;
using memref_sub_view = ValueBuilder<memref::SubViewOp>;
using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
using memref_view = ValueBuilder<memref::ViewOp>;
} // namespace intrinsics
} // namespace edsc
} // namespace mlir
static inline ::llvm::SmallVector<mlir::Value, 8>
getMemRefSizes(mlir::Value memRef) {
using namespace mlir;
using namespace mlir::edsc;
using namespace mlir::edsc::intrinsics;
mlir::MemRefType memRefType = memRef.getType().cast<mlir::MemRefType>();
assert(isStrided(memRefType) && "Expected strided MemRef type");
SmallVector<mlir::Value, 8> res;
res.reserve(memRefType.getShape().size());
const auto &shape = memRefType.getShape();
for (unsigned idx = 0, n = shape.size(); idx < n; ++idx) {
if (shape[idx] == -1)
res.push_back(memref_dim(memRef, idx));
else
res.push_back(std_constant_index(shape[idx]));
}
return res;
}
namespace mlir {
namespace edsc {
/// A MemRefBoundsCapture represents the information required to step through a
/// MemRef. It has placeholders for non-contiguous tensors that fit within the
/// Fortran subarray model.
/// At the moment it can only capture a MemRef with an identity layout map.
// TODO: Support MemRefs with layoutMaps.
class MemRefBoundsCapture : public BoundsCapture {
public:
explicit MemRefBoundsCapture(Value v) {
auto memrefSizeValues = getMemRefSizes(v);
for (auto s : memrefSizeValues) {
lbs.push_back(intrinsics::std_constant_index(0));
ubs.push_back(s);
steps.push_back(1);
}
}
unsigned fastestVarying() const { return rank() - 1; }
private:
Value base;
};
} // namespace edsc
} // namespace mlir
#endif // MLIR_DIALECT_MEMREF_EDSC_INTRINSICS_H_

View File

@ -63,7 +63,7 @@ public:
/// Create an operation of specific op type at the current insertion point and
/// location.
template <typename OpTy, typename... Args>
OpTy create(Args &&... args) {
OpTy create(Args &&...args) {
return OpBuilder::create<OpTy>(curLoc, std::forward<Args>(args)...);
}
@ -71,7 +71,7 @@ public:
/// and immediately try to fold it. This functions populates 'results' with
/// the results after folding the operation.
template <typename OpTy, typename... Args>
void createOrFold(llvm::SmallVectorImpl<Value> &results, Args &&... args) {
void createOrFold(llvm::SmallVectorImpl<Value> &results, Args &&...args) {
OpBuilder::createOrFold<OpTy>(results, curLoc, std::forward<Args>(args)...);
}
@ -79,7 +79,7 @@ public:
template <typename OpTy, typename... Args>
typename std::enable_if<OpTy::template hasTrait<mlir::OpTrait::OneResult>(),
Value>::type
createOrFold(Args &&... args) {
createOrFold(Args &&...args) {
return OpBuilder::createOrFold<OpTy>(curLoc, std::forward<Args>(args)...);
}
@ -87,7 +87,7 @@ public:
template <typename OpTy, typename... Args>
typename std::enable_if<OpTy::template hasTrait<mlir::OpTrait::ZeroResult>(),
OpTy>::type
createOrFold(Args &&... args) {
createOrFold(Args &&...args) {
return OpBuilder::createOrFold<OpTy>(curLoc, std::forward<Args>(args)...);
}

View File

@ -13,9 +13,9 @@
#include "mlir/Dialect/GPU/MemoryPromotion.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/IR/ImplicitLocOpBuilder.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
@ -41,45 +41,46 @@ static StringRef getDimName(unsigned dim) {
/// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
/// single-iteration loops. Maps the innermost loops to thread dimensions, in
/// reverse order to enable access coalescing in the innermost loop.
static void insertCopyLoops(OpBuilder &b, Location loc,
MemRefBoundsCapture &bounds, Value from, Value to) {
// Create EDSC handles for bounds.
unsigned rank = bounds.rank();
static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) {
auto memRefType = from.getType().cast<MemRefType>();
auto rank = memRefType.getRank();
SmallVector<Value, 4> lbs, ubs, steps;
Value zero = b.create<ConstantIndexOp>(0);
Value one = b.create<ConstantIndexOp>(1);
// Make sure we have enough loops to use all thread dimensions, these trivial
// loops should be outermost and therefore inserted first.
if (rank < GPUDialect::getNumWorkgroupDimensions()) {
unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions() - rank;
Value zero = std_constant_index(0);
Value one = std_constant_index(1);
lbs.resize(extraLoops, zero);
ubs.resize(extraLoops, one);
steps.resize(extraLoops, one);
}
// Add existing bounds.
lbs.append(bounds.getLbs().begin(), bounds.getLbs().end());
ubs.append(bounds.getUbs().begin(), bounds.getUbs().end());
// Emit constant operations for steps.
lbs.append(rank, zero);
ubs.reserve(lbs.size());
steps.reserve(lbs.size());
llvm::transform(bounds.getSteps(), std::back_inserter(steps),
[](int64_t step) { return std_constant_index(step); });
for (auto idx = 0; idx < rank; ++idx) {
ubs.push_back(
b.createOrFold<memref::DimOp>(from, b.create<ConstantIndexOp>(idx)));
steps.push_back(one);
}
// Obtain thread identifiers and block sizes, necessary to map to them.
auto indexType = b.getIndexType();
SmallVector<Value, 3> threadIds, blockDims;
for (unsigned i = 0; i < 3; ++i) {
auto dimName = b.getStringAttr(getDimName(i));
threadIds.push_back(b.create<gpu::ThreadIdOp>(loc, indexType, dimName));
blockDims.push_back(b.create<gpu::BlockDimOp>(loc, indexType, dimName));
threadIds.push_back(b.create<gpu::ThreadIdOp>(indexType, dimName));
blockDims.push_back(b.create<gpu::BlockDimOp>(indexType, dimName));
}
// Produce the loop nest with copies.
SmallVector<Value, 8> ivs(lbs.size());
mlir::scf::buildLoopNest(
b, loc, lbs, ubs, steps,
b, b.getLoc(), lbs, ubs, steps,
[&](OpBuilder &b, Location loc, ValueRange loopIvs) {
ivs.assign(loopIvs.begin(), loopIvs.end());
auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
@ -142,17 +143,13 @@ static void insertCopies(Region &region, Location loc, Value from, Value to) {
assert(llvm::hasSingleElement(region) &&
"unstructured control flow not supported");
OpBuilder b(region.getContext());
b.setInsertionPointToStart(&region.front());
ScopedContext edscContext(b, loc);
MemRefBoundsCapture fromBoundsCapture(from);
insertCopyLoops(b, loc, fromBoundsCapture, from, to);
b.create<gpu::BarrierOp>(loc);
auto b = ImplicitLocOpBuilder::atBlockBegin(loc, &region.front());
insertCopyLoops(b, from, to);
b.create<gpu::BarrierOp>();
b.setInsertionPoint(&region.front().back());
b.create<gpu::BarrierOp>(loc);
insertCopyLoops(b, loc, fromBoundsCapture, to, from);
b.create<gpu::BarrierOp>();
insertCopyLoops(b, to, from);
}
/// Promotes a function argument to workgroup memory in the given function. The

View File

@ -18,7 +18,6 @@
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
@ -114,13 +113,13 @@ getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth,
/// Fuses the producer by cloning the `producer`. The `fusedLoopsAndRanges`
/// provides the loop range information for the fused loops. The rest are
/// obtained from the producer itself, since they are not tiled + fused.
static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
const DenseMap<unsigned, Range> &fusedLoopsAndRanges) {
SmallVector<Value, 8> ivs, tileSizes, sizeBounds;
SmallVector<Range, 8> loopRanges;
auto zero = std_constant_index(0);
auto one = std_constant_index(1);
Location loc = producer.getLoc();
auto zero = b.create<ConstantIndexOp>(loc, 0);
auto one = b.create<ConstantIndexOp>(loc, 1);
for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
auto it = fusedLoopsAndRanges.find(i);
@ -133,7 +132,8 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
<< loopRanges.back() << "\n");
} else {
auto shapeDim = getShapeDefiningLoopRange(producer, i);
Value dim = memref_dim(shapeDim.shape, shapeDim.dimension);
Value dim = b.createOrFold<memref::DimOp>(loc, shapeDim.shape,
shapeDim.dimension);
tileSizes.push_back(zero);
sizeBounds.push_back(dim);
loopRanges.push_back(Range{zero, dim, one});
@ -147,8 +147,8 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
// Compute subranges for all tensor input/output operands.
auto tiledOperands = llvm::to_vector<4>(producer.getShapedOperands());
clonedShapes.append(makeTiledShapes(builder, loc, producer, tiledOperands,
ivs, tileSizes, sizeBounds));
clonedShapes.append(makeTiledShapes(b, loc, producer, tiledOperands, ivs,
tileSizes, sizeBounds));
// Append the other operands.
auto operands = producer.getAssumedNonShapedOperands();
@ -172,7 +172,7 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
staticStridesVector));
}
Operation *clonedOp = producer.clone(builder, loc, resultTypes, clonedShapes);
Operation *clonedOp = producer.clone(b, loc, resultTypes, clonedShapes);
// When the producer has index semantics, we have to transform the indices of
// the producer according to the tiling of the consumer, i.e. offset them by
// the values computed in `loopRanges`.
@ -184,11 +184,11 @@ static LinalgOp fuse(OpBuilder &builder, LinalgOp producer,
// Shift all indices by the tile offset.
Block &block = clonedOp->getRegion(0).front();
for (IndexOp indexOp : block.getOps<IndexOp>()) {
OpBuilder::InsertionGuard g(builder);
builder.setInsertionPointAfter(indexOp);
OpBuilder::InsertionGuard g(b);
b.setInsertionPointAfter(indexOp);
AffineExpr index, offset;
bindDims(builder.getContext(), index, offset);
AffineApplyOp applyOp = builder.create<AffineApplyOp>(
bindDims(b.getContext(), index, offset);
AffineApplyOp applyOp = b.create<AffineApplyOp>(
indexOp.getLoc(), index + offset,
ValueRange{indexOp.getResult(), loopRanges[indexOp.dim()].offset});
indexOp.getResult().replaceAllUsesExcept(applyOp, applyOp);
@ -770,17 +770,18 @@ FusableOpDependencesTy mlir::linalg::findAllFusableDependences(
/// Tile the fused loops in the root operation, by setting the tile sizes for
/// all other loops to zero (those will be tiled later).
static Optional<TiledLinalgOp> tileRootOperation(
OpBuilder &builder, LinalgOp op, ArrayRef<Value> tileSizeVector,
const LinalgTilingOptions &options, const std::set<unsigned> &fusedLoops) {
static Optional<TiledLinalgOp>
tileRootOperation(OpBuilder &b, LinalgOp op, ArrayRef<Value> tileSizeVector,
const LinalgTilingOptions &options,
const std::set<unsigned> &fusedLoops) {
SmallVector<Value, 4> tileSizes(tileSizeVector.begin(), tileSizeVector.end());
auto zero = std_constant_index(0);
auto zero = b.create<ConstantIndexOp>(op.getLoc(), 0);
for (unsigned i = 0, e = tileSizes.size(); i != e; ++i)
if (!fusedLoops.count(i))
tileSizes[i] = zero;
LinalgTilingOptions tileFusedLoopsOptions = options;
tileFusedLoopsOptions.setTileSizes(tileSizes);
return tileLinalgOp(builder, op, tileFusedLoopsOptions);
return tileLinalgOp(b, op, tileFusedLoopsOptions);
}
/// Fuse the operations in `fusionCandidates` with `tiledOp`. Latter is expected
@ -788,19 +789,19 @@ static Optional<TiledLinalgOp> tileRootOperation(
/// `fusionCandidates`, i.e. move the operation within the inter-tile loops of
/// `tiledOp`.
static SmallVector<LinalgOp, 1>
fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
fuseOperations(OpBuilder &b, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
ArrayRef<LinalgOp> fusionCandidates,
const FusableOpDependencesTy &fusableDependences,
const std::set<unsigned> &fusedLoops) {
LinalgOp tiledOp = tiledLinalgOp.op;
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPoint(tiledOp);
OpBuilder::InsertionGuard guard(b);
b.setInsertionPoint(tiledOp);
DenseMap<unsigned, Range> fusedLoopsAndRanges;
for (unsigned loop : fusedLoops) {
ShapeDimension shapeDim = getShapeDefiningLoopRange(tiledOp, loop, true);
fusedLoopsAndRanges[loop] = getRangeFromOperandShape(
builder, tiledOp.getLoc(), shapeDim.shape, shapeDim.dimension);
b, tiledOp.getLoc(), shapeDim.shape, shapeDim.dimension);
}
SmallVector<LinalgOp, 1> fusedOps(fusionCandidates.size());
@ -808,13 +809,12 @@ fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
origOpToFusedOp[rootOp.getOperation()] = tiledOp;
for (auto candidate : enumerate(llvm::reverse(fusionCandidates))) {
LinalgOp origOp = candidate.value();
LinalgOp fusedOp = fuse(builder, origOp, fusedLoopsAndRanges);
LinalgOp fusedOp = fuse(b, origOp, fusedLoopsAndRanges);
origOpToFusedOp[origOp.getOperation()] = fusedOp;
fusedOps[fusionCandidates.size() - candidate.index() - 1] = fusedOp;
// Prepare the builder for the next insertion point.
auto guard =
llvm::make_scope_exit([&]() { builder.setInsertionPoint(fusedOp); });
// Prepare the b for the next insertion point.
auto guard = llvm::make_scope_exit([&]() { b.setInsertionPoint(fusedOp); });
if (!origOp.hasTensorSemantics())
continue;
@ -860,7 +860,7 @@ fuseOperations(OpBuilder &builder, LinalgOp rootOp, TiledLinalgOp tiledLinalgOp,
template <typename LoopType>
static Optional<TiledAndFusedLinalgOps>
tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
tileAndFuseLinalgOpsImpl(OpBuilder &b, ArrayRef<LinalgOp> ops,
const LinalgDependenceGraph &dependenceGraph,
const LinalgTilingOptions &tilingOptions) {
if (ops.size() < 2)
@ -884,9 +884,9 @@ tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
return llvm::None;
}
OpBuilder::InsertionGuard guard(builder);
builder.setInsertionPoint(rootOp);
ScopedContext scope(builder, rootOp.getLoc());
OpBuilder::InsertionGuard guard(b);
b.setInsertionPoint(rootOp);
ScopedContext scope(b, rootOp.getLoc());
// Find all the producers.
LLVM_DEBUG(llvm::dbgs() << "findAllFusableDependences\n");
@ -911,9 +911,9 @@ tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
// Tile the fused loops in the last operation in the list.
SmallVector<Value, 4> tileSizeVector =
tilingOptions.tileSizeComputationFunction(builder, rootOp);
tilingOptions.tileSizeComputationFunction(b, rootOp);
Optional<TiledLinalgOp> tiledRootOp = tileRootOperation(
builder, rootOp, tileSizeVector, tilingOptions, ret.fusedLoopDims);
b, rootOp, tileSizeVector, tilingOptions, ret.fusedLoopDims);
if (!tiledRootOp) {
rootOp.emitRemark("failed to tile the fused loops");
return llvm::None;
@ -922,24 +922,23 @@ tileAndFuseLinalgOpsImpl(OpBuilder &builder, ArrayRef<LinalgOp> ops,
ret.fusedLoops.assign(tiledRootOp->loops.begin(), tiledRootOp->loops.end());
// Fuse the other operations into the fused inter-tile loops produced above.
ret.fusedProducers =
fuseOperations(builder, rootOp, *tiledRootOp, ops.drop_back(),
fusableDependences, ret.fusedLoopDims);
ret.fusedProducers = fuseOperations(b, rootOp, *tiledRootOp, ops.drop_back(),
fusableDependences, ret.fusedLoopDims);
return ret;
}
Optional<TiledAndFusedLinalgOps>
mlir::linalg::tileAndFuseLinalgOps(OpBuilder &builder, ArrayRef<LinalgOp> ops,
mlir::linalg::tileAndFuseLinalgOps(OpBuilder &b, ArrayRef<LinalgOp> ops,
const LinalgDependenceGraph &dependenceGraph,
const LinalgTilingOptions &tilingOptions) {
switch (tilingOptions.loopType) {
case LinalgTilingLoopType::Loops:
return tileAndFuseLinalgOpsImpl<scf::ForOp>(builder, ops, dependenceGraph,
return tileAndFuseLinalgOpsImpl<scf::ForOp>(b, ops, dependenceGraph,
tilingOptions);
case LinalgTilingLoopType::ParallelLoops:
return tileAndFuseLinalgOpsImpl<scf::ParallelOp>(
builder, ops, dependenceGraph, tilingOptions);
return tileAndFuseLinalgOpsImpl<scf::ParallelOp>(b, ops, dependenceGraph,
tilingOptions);
default:;
}
return llvm::None;

View File

@ -16,7 +16,6 @@
#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
@ -75,8 +74,8 @@ makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,
// Create a new range with the applied tile sizes.
SmallVector<Range, 4> res;
for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
res.push_back(
Range{std_constant_index(0), shapeSizes[idx], tileSizes[idx]});
res.push_back(Range{b.create<ConstantIndexOp>(loc, 0), shapeSizes[idx],
tileSizes[idx]});
return std::make_tuple(res, loopIndexToRangeIndex);
}
@ -330,7 +329,7 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
SmallVector<Value, 4> tileSizeVector =
options.tileSizeComputationFunction(b, op);
if (tileSizeVector.size() < nLoops) {
auto zero = std_constant_index(0);
auto zero = b.create<ConstantIndexOp>(op.getLoc(), 0);
tileSizeVector.append(nLoops - tileSizeVector.size(), zero);
}

View File

@ -525,10 +525,11 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
bool isTiled = !isZero(tileSizes[idx]);
lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0));
lbs.push_back(isTiled ? ivs[idxIvs++]
: (Value)b.create<ConstantIndexOp>(loc, 0));
// Before composing, we need to make range a closed interval.
Value size = isTiled ? tileSizes[idx] : sizeBounds[idx];
subShapeSizes.push_back(size - std_constant_index(1));
subShapeSizes.push_back(size - b.create<ConstantIndexOp>(loc, 1));
LLVM_DEBUG(llvm::dbgs() << "lb: " << lbs.back() << "\n");
LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n");
}
@ -560,7 +561,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
if (!isTiled(map.getSubMap({r}), tileSizes)) {
offsets.push_back(b.getIndexAttr(0));
Value dim = memref_dim(shapedOp, r).value;
Value dim = b.createOrFold<memref::DimOp>(loc, shapedOp, r);
sizes.push_back(dim);
strides.push_back(b.getIndexAttr(1));
LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
@ -576,7 +577,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
offsets.push_back(offset);
auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
// Resulting size needs to be made half open interval again.
auto size = closedIntSize + std_constant_index(1);
auto size = closedIntSize + b.create<ConstantIndexOp>(loc, 1);
LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
// The size of the subview / subtensor should be trimmed to avoid