forked from OSchip/llvm-project
[mlir][Linalg] Add comprehensive bufferization support for TiledLoopOp (14/n)
Differential Revision: https://reviews.llvm.org/D105335
This commit is contained in:
parent
dadedc99e9
commit
ad0050c607
|
@ -296,13 +296,13 @@ static InPlaceSpec getInPlace(BlockArgument bbArg) {
|
|||
return InPlaceSpec::None;
|
||||
return inplaceAttr.getValue() ? InPlaceSpec::True : InPlaceSpec::False;
|
||||
}
|
||||
// Interestingly, scf::ForOp's bbArg can **always** be viewed inplace from the
|
||||
// perspective of ops nested under it:
|
||||
// Interestingly, scf::ForOp's and TiledLoop's bbArg can **always** be viewed
|
||||
// inplace from the perspective of ops nested under:
|
||||
// 1. Either the matching iter operand is not bufferized inplace and an
|
||||
// alloc + optional copy makes the bbArg itself inplaceable.
|
||||
// 2. Or the matching iter operand is bufferized inplace and bbArg just
|
||||
// bufferizes to that too.
|
||||
if (auto forOp = dyn_cast<scf::ForOp>(bbArg.getOwner()->getParentOp()))
|
||||
if (isa<scf::ForOp, TiledLoopOp>(bbArg.getOwner()->getParentOp()))
|
||||
return InPlaceSpec::True;
|
||||
// Unknown cases.
|
||||
return InPlaceSpec::None;
|
||||
|
@ -359,19 +359,28 @@ static bool hasKnownBufferizationAliasingBehavior(Operation *op) {
|
|||
isa<CallOpInterface,
|
||||
tensor::CastOp,
|
||||
ConstantOp,
|
||||
ExtractSliceOp,
|
||||
scf::ForOp,
|
||||
InsertSliceOp,
|
||||
InitTensorOp,
|
||||
LinalgOp,
|
||||
ReturnOp,
|
||||
ExtractSliceOp,
|
||||
InsertSliceOp,
|
||||
TiledLoopOp,
|
||||
VectorTransferOpInterface,
|
||||
linalg::YieldOp,
|
||||
scf::YieldOp>(op)
|
||||
// clang-format on
|
||||
|| (none_of(op->getResultTypes(), isaTensor) &&
|
||||
none_of(op->getOperandTypes(), isaTensor));
|
||||
}
|
||||
|
||||
/// Return the OpResult that may bufferize into the same buffer as `opOperand`
|
||||
/// when the op is bufferized inplace.
|
||||
/// Return null if no such result exists.
|
||||
static OpResult getInplaceableOpResult(TiledLoopOp op, OpOperand &opOperand) {
|
||||
return op.getTiedOpResult(opOperand);
|
||||
}
|
||||
|
||||
/// Return the OpResult that may bufferize into the same buffer as `opOperand`
|
||||
/// when the op is bufferized inplace.
|
||||
/// Return null if no such result exists.
|
||||
|
@ -441,8 +450,9 @@ static OpResult getInplaceableOpResult(OpOperand &opOperand) {
|
|||
// result(s).
|
||||
.Case<tensor::CastOp,
|
||||
scf::ForOp,
|
||||
LinalgOp,
|
||||
InsertSliceOp,
|
||||
LinalgOp,
|
||||
TiledLoopOp,
|
||||
VectorTransferOpInterface>(
|
||||
[&](auto op) { return getInplaceableOpResult(op, opOperand); })
|
||||
// ExtractSliceOp is special, when bufferized inplace it just returns an
|
||||
|
@ -469,18 +479,23 @@ static Optional<OpOperand *> getAliasingOpOperand(OpResult result) {
|
|||
return TypeSwitch<Operation *, OpOperand *>(result.getDefiningOp())
|
||||
.Case([&](tensor::CastOp op) { return &op->getOpOperand(0); })
|
||||
.Case([&](ConstantOp op) { return &op->getOpOperand(0); })
|
||||
.Case([&](LinalgOp op) {
|
||||
return op.getOutputTensorOperands()[result.getResultNumber()];
|
||||
})
|
||||
.Case([&](ExtractSliceOp op) { return &op->getOpOperand(0); })
|
||||
.Case([&](InsertSliceOp op) { return &op->getOpOperand(1); })
|
||||
.Case([&](vector::TransferWriteOp op) { return &op->getOpOperand(1); })
|
||||
// In the case of scf::ForOp, this currently assumes the iter_args / yield
|
||||
// are 1-1. This may fail and is verified at the end.
|
||||
// TODO: update this.
|
||||
.Case([&](scf::ForOp op) {
|
||||
return &op.getIterOpOperands()[result.getResultNumber()];
|
||||
})
|
||||
.Case([&](InsertSliceOp op) { return &op->getOpOperand(1); })
|
||||
.Case([&](LinalgOp op) {
|
||||
return op.getOutputTensorOperands()[result.getResultNumber()];
|
||||
})
|
||||
.Case([&](TiledLoopOp op) {
|
||||
// TODO: TiledLoopOp helper method to avoid leaking impl details.
|
||||
return &op->getOpOperand(op.getNumControlOperands() +
|
||||
op.getNumInputs() + result.getResultNumber());
|
||||
})
|
||||
.Case([&](vector::TransferWriteOp op) { return &op->getOpOperand(1); })
|
||||
.Default([&](Operation *op) {
|
||||
op->dump();
|
||||
llvm_unreachable("unexpected defining op");
|
||||
|
@ -528,6 +543,10 @@ static bool bufferizesToMemoryRead(OpOperand &opOperand) {
|
|||
// matching bbArg may.
|
||||
if (isa<scf::ForOp>(opOperand.getOwner()))
|
||||
return false;
|
||||
// TiledLoop alone doesn't bufferize to a memory read, one of the uses of its
|
||||
// matching bbArg may.
|
||||
if (isa<TiledLoopOp>(opOperand.getOwner()))
|
||||
return false;
|
||||
// CallOpInterface alone doesn't bufferize to a memory read, one of the uses
|
||||
// of the matching bbArg may. It is the responsibility of the caller to
|
||||
// inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be
|
||||
|
@ -1340,11 +1359,10 @@ createNewAllocDeallocPairForShapedValue(OpBuilder &b, Location loc,
|
|||
/// When allocating a new buffer, analyze whether `op` want to read form that
|
||||
/// buffer. In such a case, insert a copy to ensure the newly allocated buffer
|
||||
/// is properly initialiazed.
|
||||
static LogicalResult
|
||||
allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op,
|
||||
SmallVectorImpl<Value> &resultBuffers,
|
||||
BlockAndValueMapping &bvm,
|
||||
BufferizationAliasInfo &aliasInfo) {
|
||||
static void allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op,
|
||||
SmallVectorImpl<Value> &resultBuffers,
|
||||
BlockAndValueMapping &bvm,
|
||||
BufferizationAliasInfo &aliasInfo) {
|
||||
// Take a guard before anything else.
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
|
||||
|
@ -1360,8 +1378,7 @@ allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op,
|
|||
OpResult opResult = getInplaceableOpResult(*opOperand);
|
||||
if (getInPlace(opResult) == InPlaceSpec::True) {
|
||||
Value v = lookup(bvm, output);
|
||||
if (!v)
|
||||
return failure();
|
||||
assert(v && "missing buffer");
|
||||
resultBuffers.push_back(v);
|
||||
continue;
|
||||
}
|
||||
|
@ -1375,17 +1392,13 @@ allocateBuffersForResults(OpBuilder &b, Location loc, LinalgOp op,
|
|||
|
||||
// Additionally, if the output buffer is used, clone its value for now.
|
||||
if (op.payloadUsesValueFromOperand(opOperand)) {
|
||||
if (Value v = lookup(bvm, output))
|
||||
b.create<CopyOp>(loc, v, alloc);
|
||||
else
|
||||
return failure();
|
||||
Value v = lookup(bvm, output);
|
||||
b.create<CopyOp>(loc, v, alloc);
|
||||
}
|
||||
}
|
||||
|
||||
if (op->getNumResults())
|
||||
map(bvm, op->getResults(), resultBuffers);
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Generic conversion for any LinalgOp on tensors.
|
||||
|
@ -1398,7 +1411,7 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op,
|
|||
// Ensure op has only tensors. Allow mixed tensor-buffer mode on a per-need
|
||||
// basis.
|
||||
if (!op.hasTensorSemantics())
|
||||
return failure();
|
||||
return op->emitError() << "op does not have tensor semantics";
|
||||
|
||||
b.setInsertionPoint(op);
|
||||
Location loc = op.getLoc();
|
||||
|
@ -1410,14 +1423,11 @@ static LogicalResult bufferize(OpBuilder &b, LinalgOp op,
|
|||
continue;
|
||||
}
|
||||
newInputBuffers.push_back(lookup(bvm, opOperand->get()));
|
||||
if (!newInputBuffers.back())
|
||||
return failure();
|
||||
assert(newInputBuffers.back() && "missing buffer");
|
||||
}
|
||||
SmallVector<Value> newOutputBuffers;
|
||||
// Try to allocate new buffers depending on op's inplace semantics.
|
||||
if (failed(allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm,
|
||||
aliasInfo)))
|
||||
return failure();
|
||||
allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm, aliasInfo);
|
||||
|
||||
// Clone the newly bufferized op.
|
||||
SmallVector<Value> newOperands = newInputBuffers;
|
||||
|
@ -1608,8 +1618,8 @@ static LogicalResult bufferize(OpBuilder &b, ConstantOp constantOp,
|
|||
BlockAndValueMapping &bvm,
|
||||
BufferizationAliasInfo &aliasInfo,
|
||||
GlobalCreator &globalCreator) {
|
||||
if (!constantOp.getType().dyn_cast<RankedTensorType>())
|
||||
return failure();
|
||||
assert(constantOp.getType().dyn_cast<RankedTensorType>() &&
|
||||
"not a constant ranked tensor");
|
||||
|
||||
// Take a guard before anything else.
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
|
@ -1629,11 +1639,15 @@ static LogicalResult bufferize(OpBuilder &b, ConstantOp constantOp,
|
|||
static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp,
|
||||
BlockAndValueMapping &bvm,
|
||||
BufferizationAliasInfo &aliasInfo) {
|
||||
// Take a guard before anything else.
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
b.setInsertionPoint(dimOp);
|
||||
|
||||
if (dimOp.source().getType().isa<RankedTensorType>()) {
|
||||
Value v = lookup(bvm, dimOp.source());
|
||||
if (!v)
|
||||
return failure();
|
||||
dimOp.sourceMutable().assign(v);
|
||||
assert(v && "missing buffer");
|
||||
dimOp.result().replaceAllUsesWith(
|
||||
b.create<memref::DimOp>(dimOp.getLoc(), v, dimOp.index()));
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
@ -1649,10 +1663,12 @@ static LogicalResult bufferize(OpBuilder &b, scf::ForOp forOp,
|
|||
// Otherwise alloc and copy.
|
||||
b.setInsertionPoint(forOp);
|
||||
for (OpResult opResult : forOp->getResults()) {
|
||||
if (!opResult.getType().isa<TensorType>())
|
||||
continue;
|
||||
// TODO: Atm we bail on unranked TensorType because we don't know how to
|
||||
// alloc an UnrankedMemRefType + its underlying ranked MemRefType.
|
||||
if (!opResult.getType().isa<RankedTensorType>())
|
||||
return failure();
|
||||
assert(opResult.getType().isa<RankedTensorType>() &&
|
||||
"unsupported unranked tensor");
|
||||
OpOperand &opOperand = forOp.getOpOperandForResult(opResult);
|
||||
Value operand = opOperand.get();
|
||||
Value operandBuffer = lookup(bvm, operand);
|
||||
|
@ -1730,8 +1746,7 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp,
|
|||
if (!tensorType)
|
||||
continue;
|
||||
Value v = lookup(bvm, operand.get());
|
||||
if (!v)
|
||||
return failure();
|
||||
assert(v && "missing buffer for result");
|
||||
Value returnTensor = b.create<memref::TensorLoadOp>(returnOp.getLoc(), v);
|
||||
operand.set(returnTensor);
|
||||
aliasInfo.insertNewBufferEquivalence(returnTensor, v);
|
||||
|
@ -1740,6 +1755,135 @@ static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp,
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Bufferization for TiledLoopOp..
|
||||
static LogicalResult bufferize(OpBuilder &b, TiledLoopOp tiledLoopOp,
|
||||
BlockAndValueMapping &bvm,
|
||||
BufferizationAliasInfo &aliasInfo) {
|
||||
// Allocate output buffers if needed, forward output tensor args to the
|
||||
// terminator.
|
||||
Operation *yieldOp = tiledLoopOp.getBody()->getTerminator();
|
||||
Block *body = tiledLoopOp.getBody();
|
||||
|
||||
// Take copies of the old input and output operands, so we can insert inplace
|
||||
// easily.
|
||||
auto oldInputs = llvm::to_vector<4>(tiledLoopOp.inputs());
|
||||
auto oldOutputs = llvm::to_vector<4>(tiledLoopOp.outputs());
|
||||
|
||||
int numLoops = tiledLoopOp.getNumLoops();
|
||||
int numControlOperands = tiledLoopOp.getNumControlOperands();
|
||||
|
||||
// Add buffers for outputs and the corresponding block arguments.
|
||||
// Keep separate iterators to increment without further leaking impl. details.
|
||||
// Start with outputs to avoid interference from new input buffers.
|
||||
int numNewOutputBuffers = 0;
|
||||
int resultIndex = 0;
|
||||
int oldOutputBBArgIndex = numLoops + oldInputs.size();
|
||||
int nextOutputBBArgIndex = numLoops + oldInputs.size() + oldOutputs.size();
|
||||
int nextOutputOperandIndex =
|
||||
numControlOperands + oldInputs.size() + oldOutputs.size();
|
||||
for (Value oldOutputTensor : oldOutputs) {
|
||||
if (!oldOutputTensor.getType().isa<TensorType>()) {
|
||||
// Skip and increment the old bbarg index only.
|
||||
++oldOutputBBArgIndex;
|
||||
// Do not increment resultIndex as only tensors are returned.
|
||||
// TODO: better interface to avoid leaking such impl details.
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(oldOutputTensor.getType().isa<RankedTensorType>() &&
|
||||
"bufferizable output must be a ranked tensor");
|
||||
|
||||
Value outputBuffer = lookup(bvm, oldOutputTensor);
|
||||
const OpResult &opResult = tiledLoopOp->getResult(resultIndex);
|
||||
OpOperand &yieldOperand = yieldOp->getOpOperand(resultIndex);
|
||||
// If the result is not inplaceable, need to allocate a copy for it.
|
||||
if (getInPlace(opResult) != InPlaceSpec::True) {
|
||||
auto loc = tiledLoopOp.getLoc();
|
||||
Value alloc = createNewAllocDeallocPairForShapedValue(
|
||||
b, loc, oldOutputTensor, aliasInfo);
|
||||
// If the tensor comes from `linalg::InitTensorOp`, the value is
|
||||
// unitialized and we do not need to copy.
|
||||
// TODO: "matching bbArg does not bufferize to a read" is a more general
|
||||
// check.
|
||||
if (!oldOutputTensor.getDefiningOp<linalg::InitTensorOp>()) {
|
||||
b.setInsertionPointAfter(alloc.getDefiningOp());
|
||||
b.create<linalg::CopyOp>(loc, outputBuffer, alloc);
|
||||
}
|
||||
outputBuffer = alloc;
|
||||
}
|
||||
// Insert mapping and aliasing info.
|
||||
aliasInfo.createAliasInfoEntry(outputBuffer);
|
||||
aliasInfo.insertNewBufferEquivalence(opResult, outputBuffer);
|
||||
map(bvm, opResult, outputBuffer);
|
||||
|
||||
// Insert new operand and bbArg.
|
||||
tiledLoopOp->insertOperands(nextOutputOperandIndex, outputBuffer);
|
||||
BlockArgument newBufferBBArg =
|
||||
body->insertArgument(nextOutputBBArgIndex, outputBuffer.getType());
|
||||
BlockArgument oldTensorBBArg = body->getArgument(oldOutputBBArgIndex);
|
||||
// Insert mapping and aliasing info.
|
||||
aliasInfo.createAliasInfoEntry(newBufferBBArg);
|
||||
aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg);
|
||||
map(bvm, oldTensorBBArg, newBufferBBArg);
|
||||
|
||||
// Set operand of `linalg.yield` to the bbArg so it just canonicalizes away
|
||||
// later.
|
||||
yieldOperand.set(oldTensorBBArg);
|
||||
|
||||
// Increment indices.
|
||||
++numNewOutputBuffers;
|
||||
++resultIndex;
|
||||
++oldOutputBBArgIndex;
|
||||
++nextOutputBBArgIndex;
|
||||
++nextOutputOperandIndex;
|
||||
}
|
||||
|
||||
// Add buffers for inputs and the corresponding block arguments.
|
||||
// Keep separate iterators to increment without further leaking impl. details.
|
||||
int numNewInputBuffers = 0;
|
||||
int oldInputBBArgIndex = numLoops;
|
||||
int nextInputBBArgIndex = numLoops + oldInputs.size();
|
||||
int nextInputOperandIndex = numControlOperands + oldInputs.size();
|
||||
for (Value oldInputTensor : oldInputs) {
|
||||
if (!oldInputTensor.getType().isa<TensorType>()) {
|
||||
// Skip and increment the old bbarg index only.
|
||||
++oldInputBBArgIndex;
|
||||
continue;
|
||||
}
|
||||
|
||||
Value inputBuffer = lookup(bvm, oldInputTensor);
|
||||
assert(inputBuffer && " missing buffer for operand");
|
||||
|
||||
// Insert new operand and bbArg.
|
||||
tiledLoopOp->insertOperands(nextInputOperandIndex, inputBuffer);
|
||||
BlockArgument newBufferBBArg =
|
||||
body->insertArgument(nextInputBBArgIndex, inputBuffer.getType());
|
||||
BlockArgument oldTensorBBArg = body->getArgument(oldInputBBArgIndex);
|
||||
|
||||
// Insert mapping and aliasing info.
|
||||
aliasInfo.createAliasInfoEntry(newBufferBBArg);
|
||||
aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg);
|
||||
map(bvm, oldTensorBBArg, newBufferBBArg);
|
||||
|
||||
// Increment indices.
|
||||
++numNewInputBuffers;
|
||||
++oldInputBBArgIndex;
|
||||
++nextInputBBArgIndex;
|
||||
++nextInputOperandIndex;
|
||||
}
|
||||
|
||||
// Update segment sizes.
|
||||
// TODO: Helper method to avoid leaking impl details.
|
||||
tiledLoopOp->setAttr(
|
||||
TiledLoopOp::getOperandSegmentSizeAttr(),
|
||||
b.getI32VectorAttr(
|
||||
{numLoops, numLoops, numLoops,
|
||||
static_cast<int>(oldInputs.size()) + numNewInputBuffers,
|
||||
static_cast<int>(oldOutputs.size()) + numNewOutputBuffers}));
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Bufferize ExtractSliceOp to subview with optional alloc + copy depending on
|
||||
/// whether or not it is marked inplaceable.
|
||||
/// Note that `getInplaceableOpResult` on a ExtractSliceOp always returns null.
|
||||
|
@ -1871,8 +2015,7 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op,
|
|||
/// op.source().
|
||||
if (auto readOp = dyn_cast<vector::TransferReadOp>(op.getOperation())) {
|
||||
Value v = lookup(bvm, op.source());
|
||||
if (!v)
|
||||
return failure();
|
||||
assert(v && "missing buffer");
|
||||
readOp.sourceMutable().assign(v);
|
||||
return success();
|
||||
}
|
||||
|
@ -1891,8 +2034,7 @@ static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op,
|
|||
// InPlace write will result in memref.tensor_load(x) which must
|
||||
// canonicalize away with one of it uses.
|
||||
newInputBuffer = lookup(bvm, writeOp.source());
|
||||
if (!newInputBuffer)
|
||||
return failure();
|
||||
assert(newInputBuffer && "missing buffer");
|
||||
}
|
||||
|
||||
// Create a new transfer_write on buffer that doesn't have a return value.
|
||||
|
@ -1933,6 +2075,22 @@ static LogicalResult bufferize(OpBuilder &b, scf::YieldOp yieldOp,
|
|||
return success();
|
||||
}
|
||||
|
||||
/// Bufferization for linalg::YieldOp either does not involve tensors or just
|
||||
/// results in later canonicalization. In either case it does nothing.
|
||||
static LogicalResult bufferize(OpBuilder &b, linalg::YieldOp yieldOp,
|
||||
BlockAndValueMapping &bvm,
|
||||
BufferizationAliasInfo &aliasInfo) {
|
||||
// Take a guard before anything else.
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
b.setInsertionPoint(yieldOp);
|
||||
// No tensors -> success.
|
||||
if (!llvm::any_of(yieldOp.getOperandTypes(), isaTensor))
|
||||
return success();
|
||||
// linalg::YieldOp nested under TiledLoop must just canonicalize.
|
||||
if (yieldOp->getParentOfType<TiledLoopOp>())
|
||||
return success();
|
||||
llvm_unreachable("unexpected yieldOp");
|
||||
}
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Bufferization analyses.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2043,7 +2201,7 @@ bufferizationSanityCheck(scf::YieldOp yieldOp,
|
|||
const BufferizationAliasInfo &aliasInfo) {
|
||||
auto parentForOp = yieldOp->getParentOfType<scf::ForOp>();
|
||||
if (!parentForOp)
|
||||
return failure();
|
||||
return yieldOp->emitError() << "not nested under ForOp";
|
||||
|
||||
for (OpOperand &operand : yieldOp->getOpOperands()) {
|
||||
OpResult matchingForOpResult =
|
||||
|
@ -2057,11 +2215,10 @@ bufferizationSanityCheck(scf::YieldOp yieldOp,
|
|||
parentForOp.getRegionIterArgForOpOperand(machingForOpOperand);
|
||||
if (!aliasInfo.areEquivalentBufferizedValues(matchingForOpIterArg,
|
||||
operand.get())) {
|
||||
yieldOp->emitError()
|
||||
<< "Yield operand #" << operand.getOperandNumber()
|
||||
<< " does not bufferize to an equivalent buffer to the matching"
|
||||
<< " enclosing scf::for operand -> Fail the pass\n";
|
||||
return failure();
|
||||
return yieldOp->emitError()
|
||||
<< "Yield operand #" << operand.getOperandNumber()
|
||||
<< " does not bufferize to an equivalent buffer to the matching"
|
||||
<< " enclosing scf::for operand -> Fail the pass\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2150,10 +2307,10 @@ static LogicalResult bufferizeFuncOpInternals(
|
|||
// Walk in PreOrder to ensure ops with regions are handled before their body.
|
||||
// Since walk has to be PreOrder, we need to erase ops that require it
|
||||
// separately: this is the case for CallOp
|
||||
// clang-format off
|
||||
SmallVector<Operation *> toErase;
|
||||
WalkResult result =
|
||||
funcOp.walk<WalkOrder::PreOrder>([&](Operation *op) -> WalkResult {
|
||||
// clang-format off
|
||||
WalkResult result = funcOp.walk<WalkOrder::PreOrder>([&](Operation *op)
|
||||
-> WalkResult {
|
||||
WalkResult result =
|
||||
TypeSwitch<Operation *, LogicalResult>(op)
|
||||
// Skip BufferCast and TensorLoad ops.
|
||||
|
@ -2161,13 +2318,15 @@ static LogicalResult bufferizeFuncOpInternals(
|
|||
memref::TensorLoadOp>([&](auto) { return success(); })
|
||||
.Case<tensor::CastOp,
|
||||
tensor::DimOp,
|
||||
ExtractSliceOp,
|
||||
scf::ForOp,
|
||||
InitTensorOp,
|
||||
InsertSliceOp,
|
||||
LinalgOp,
|
||||
ReturnOp,
|
||||
ExtractSliceOp,
|
||||
InsertSliceOp,
|
||||
TiledLoopOp,
|
||||
VectorTransferOpInterface,
|
||||
linalg::YieldOp,
|
||||
scf::YieldOp>([&](auto op) {
|
||||
LDBG("Begin bufferize:\n" << op << '\n');
|
||||
return bufferize(b, op, bvm, aliasInfo);
|
||||
|
@ -2182,23 +2341,23 @@ static LogicalResult bufferizeFuncOpInternals(
|
|||
LDBG("Begin bufferize:\n" << op << '\n');
|
||||
return bufferize(b, op, bvm, aliasInfo, globalCreator);
|
||||
})
|
||||
.Default([&](Operation *op) {
|
||||
.Default([&](Operation *op) -> LogicalResult {
|
||||
auto isaTensor = [](Type t) { return t.isa<TensorType>(); };
|
||||
if (any_of(op->getOperandTypes(), isaTensor) ||
|
||||
any_of(op->getResultTypes(), isaTensor))
|
||||
return failure();
|
||||
return op->emitError() << "unsupported op with tensors";
|
||||
return success();
|
||||
});
|
||||
// clang-format on
|
||||
|
||||
// Register post-walk erasure, if necessary.
|
||||
if (isa<CallOpInterface>(op))
|
||||
if (llvm::any_of(op->getOperandTypes(), isaTensor) ||
|
||||
llvm::any_of(op->getResultTypes(), isaTensor))
|
||||
toErase.push_back(op);
|
||||
// Register post-walk erasure, if necessary.
|
||||
if (isa<CallOpInterface>(op))
|
||||
if (llvm::any_of(op->getOperandTypes(), isaTensor) ||
|
||||
llvm::any_of(op->getResultTypes(), isaTensor))
|
||||
toErase.push_back(op);
|
||||
|
||||
return result;
|
||||
});
|
||||
return result;
|
||||
});
|
||||
// clang-format on
|
||||
LDBG("End BufferizeFuncOpInternals:\n" << funcOp << '\n');
|
||||
|
||||
for (Operation *op : toErase)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics
|
||||
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize -split-input-file -verify-diagnostics
|
||||
|
||||
func private @foo() -> tensor<?xf32>
|
||||
|
||||
|
@ -85,3 +85,25 @@ func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = true})
|
|||
// expected-error @+1 {{buffer result #0 not produced by an alloc}}
|
||||
return %r0: tensor<4xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @scf_yield(%b : i1, %A : tensor<4xf32>, %B : tensor<4xf32>) -> tensor<4xf32>
|
||||
{
|
||||
%r = scf.if %b -> (tensor<4xf32>) {
|
||||
// expected-error @+1 {{not nested under ForOp}}
|
||||
scf.yield %A : tensor<4xf32>
|
||||
} else {
|
||||
scf.yield %B : tensor<4xf32>
|
||||
}
|
||||
return %r: tensor<4xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func @unknown_op(%A : tensor<4xf32>) -> tensor<4xf32>
|
||||
{
|
||||
// expected-error @+1 {{unsupported op with tensors}}
|
||||
%r = "marklar"(%A) : (tensor<4xf32>) -> (tensor<4xf32>)
|
||||
return %r: tensor<4xf32>
|
||||
}
|
||||
|
|
|
@ -498,3 +498,60 @@ func @main() {
|
|||
|
||||
// CHECK: func private @print_memref_f32(memref<*xf32>)
|
||||
func private @print_memref_f32(tensor<*xf32>)
|
||||
|
||||
// -----
|
||||
|
||||
func private @some_use(memref<?xf32>)
|
||||
|
||||
#TILE_MAP = affine_map<(d0)[s0] -> (3, -d0 + s0)>
|
||||
|
||||
// CHECK-DAG: #[[$DYN_0D_MAP:.*]] = affine_map<()[s0] -> (s0)>
|
||||
// CHECK-DAG: #[[$DYN_1D_MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
|
||||
// CHECK-DAG: #[[$TILE_MAP:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
|
||||
|
||||
// CHECK: func @tiled_dot(
|
||||
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
|
||||
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, #[[$DYN_1D_MAP]]>
|
||||
// CHECK-SAME: %[[c:[a-zA-Z0-9]*]]: memref<f32, #[[$DYN_0D_MAP]]>
|
||||
func @tiled_dot(%A: tensor<?xf32>, %B: tensor<?xf32>, %c: tensor<f32> {linalg.inplaceable = true},
|
||||
%effecting: memref<?xf32>) -> tensor<f32> {
|
||||
%c3 = constant 3 : index
|
||||
%c0 = constant 0 : index
|
||||
|
||||
// CHECK: %[[M:.*]] = memref.dim %[[A]], {{.*}} : memref<?xf32, #[[$DYN_1D_MAP:.*]]>
|
||||
%0 = tensor.dim %A, %c0 : tensor<?xf32>
|
||||
|
||||
// CHECK: linalg.tiled_loop {{.*}} to (%[[M]]) {{.*}} %[[A]]{{.*}}%[[B]]{{.*}}outs{{.*}}%[[c]]
|
||||
%1 = linalg.tiled_loop (%arg3) = (%c0) to (%0) step (%c3)
|
||||
ins (%arg4 = %A: tensor<?xf32>, %use = %effecting : memref<?xf32>, %arg5 = %B: tensor<?xf32>)
|
||||
outs (%arg6 = %c: tensor<f32>)
|
||||
iterators["reduction"]
|
||||
{
|
||||
// CHECK-NOT: alloc
|
||||
|
||||
%2 = tensor.dim %arg4, %c0 : tensor<?xf32>
|
||||
%3 = affine.min #TILE_MAP(%arg3)[%2]
|
||||
|
||||
// CHECK: %[[SV_A:.*]] = memref.subview {{.*}}
|
||||
%4 = tensor.extract_slice %arg4[%arg3] [%3] [1] : tensor<?xf32> to tensor<?xf32>
|
||||
%5 = tensor.dim %arg5, %c0 : tensor<?xf32>
|
||||
%6 = affine.min #TILE_MAP(%arg3)[%5]
|
||||
|
||||
// CHECK: %[[SV_B:.*]] = memref.subview {{.*}}
|
||||
%7 = tensor.extract_slice %arg5[%arg3] [%6] [1] : tensor<?xf32> to tensor<?xf32>
|
||||
|
||||
// CHECK: linalg.dot ins(%[[SV_A]], %[[SV_B]] : memref<?xf32, #[[$DYN_1D_MAP:.*]]>, memref<?xf32, #[[$DYN_1D_MAP:.*]]>) outs(%{{.*}} : memref<f32, #[[$DYN_0D_MAP]]>)
|
||||
%8 = linalg.dot ins(%4, %7 : tensor<?xf32>, tensor<?xf32>) outs(%arg6 : tensor<f32>) -> tensor<f32>
|
||||
|
||||
// CHECK: call @some_use(%{{.*}}) : (memref<?xf32>) -> ()
|
||||
call @some_use(%use) : (memref<?xf32>) -> ()
|
||||
|
||||
linalg.yield %8 : tensor<f32>
|
||||
// CHECK: linalg.yield
|
||||
// CHECK-NOT: tensor
|
||||
}
|
||||
|
||||
// CHECK: return
|
||||
// CHECK-NOT: tensor
|
||||
return %1 : tensor<f32>
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue