[mlir][linalg][bufferize][NFC] Analyze OpOperands instead of OpResults

With this change, the analysis takes a look at OpOperands instead of OpResults. OpOperands can bufferize out-of-place (even if they have no aliasing OpResults). The analysis does no longer care about OpResults.

Previously, only OpResults could bufferize out-of-place, so OpOperands that have no aliasing OpResults were never copied by Comprehensive Bufferize. This does not fit wwell with the new CallOp bufferization that is introduced in a subsequent change. In essence, called FuncOps can then be treated as "black boxes" that may read/write to any bbArg, even if they do not return anything.

Differential Revision: https://reviews.llvm.org/D115706
This commit is contained in:
Matthias Springer 2022-01-08 00:56:13 +09:00
parent e56a9c9b5b
commit 8e2b6aac32
11 changed files with 527 additions and 467 deletions

View File

@ -204,10 +204,10 @@ public:
/// Set the inPlace bufferization spec to true.
/// Merge result's and operand's aliasing sets and iterate to a fixed point.
void bufferizeInPlace(OpResult result, OpOperand &operand);
void bufferizeInPlace(OpOperand &operand, BufferizationState &state);
/// Set the inPlace bufferization spec to false.
void bufferizeOutOfPlace(OpResult result);
void bufferizeOutOfPlace(OpOperand &operand);
/// Return true if `v1` and `v2` bufferize to equivalent buffers.
bool areEquivalentBufferizedValues(Value v1, Value v2) const {
@ -234,10 +234,10 @@ public:
void applyOnAliases(Value v, function_ref<void(Value)> fun) const;
/// Mark a value as in-place bufferized.
void markInPlace(OpResult v) { inplaceBufferized.insert(v); }
void markInPlace(OpOperand &o) { inplaceBufferized.insert(&o); }
/// Return `true` if a value was marked as in-place bufferized.
bool isInPlace(OpResult opResult) const;
bool isInPlace(OpOperand &opOperand) const;
private:
/// llvm::EquivalenceClasses wants comparable elements. This comparator uses
@ -255,7 +255,7 @@ private:
EquivalenceClassRangeType getAliases(Value v) const;
/// Set of all OpResults that were decided to bufferize in-place.
llvm::DenseSet<OpResult> inplaceBufferized;
llvm::DenseSet<OpOperand *> inplaceBufferized;
/// Auxiliary structure to store all the values a given value may alias with.
/// Alias information is "may be" conservative: In the presence of branches, a
@ -382,7 +382,7 @@ public:
Value lookupBuffer(RewriterBase &rewriter, Value tensor) const;
/// Return `true` if the given OpResult has been decided to bufferize inplace.
bool isInPlace(OpResult opResult) const;
bool isInPlace(OpOperand &opOperand) const;
/// Return the result buffer (memref) for a given OpResult (tensor). Allocate
/// a new buffer and copy over data from the existing buffer if out-of-place

View File

@ -104,18 +104,14 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
>,
InterfaceMethod<
/*desc=*/[{
Return `true` if the given OpResult must bufferize in-place with its
corresponding aliasing OpOperand. Alias sets and inplace attributes
will be set up accordingly before making any other bufferization
decisions. This method will never be called on OpResults that do not
have a tensor type.
Note: This method may not return `true` if the given OpResult does not
have an aliasing OpOperand.
Return `true` if the given OpOperand must bufferize in-place. Alias
sets and inplace attributes will be set up accordingly before making
any other bufferization decisions. This method will never be called on
OpOperands that do not have a tensor type.
}],
/*retType=*/"bool",
/*methodName=*/"mustBufferizeInPlace",
/*args=*/(ins "OpResult":$opResult,
/*args=*/(ins "OpOperand &":$opOperand,
"const BufferizationState &":$state),
/*methodBody=*/"",
/*defaultImplementation=*/[{

View File

@ -129,27 +129,22 @@ void BufferizationAliasInfo::insertNewBufferEquivalence(Value newValue,
}
/// Return `true` if a value was marked as in-place bufferized.
bool BufferizationAliasInfo::isInPlace(OpResult opResult) const {
return inplaceBufferized.contains(opResult);
bool BufferizationAliasInfo::isInPlace(OpOperand &operand) const {
return inplaceBufferized.contains(&operand);
}
/// Set the inPlace bufferization spec to true.
void BufferizationAliasInfo::bufferizeInPlace(OpResult result,
OpOperand &operand) {
LLVM_DEBUG(llvm::dbgs() << "bufferizeInPlace: ");
LLVM_DEBUG(result.print(llvm::dbgs()));
markInPlace(result);
aliasInfo.unionSets(result, operand.get());
void BufferizationAliasInfo::bufferizeInPlace(OpOperand &operand,
BufferizationState &state) {
markInPlace(operand);
if (OpResult result = state.getAliasingOpResult(operand))
aliasInfo.unionSets(result, operand.get());
}
/// Set the inPlace bufferization spec to false.
void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) {
LLVM_DEBUG(llvm::dbgs() << "bufferizeOutOfPlace: ");
LLVM_DEBUG(result.print(llvm::dbgs()));
if (inplaceBufferized.contains(result))
inplaceBufferized.erase(result);
void BufferizationAliasInfo::bufferizeOutOfPlace(OpOperand &operand) {
assert(!inplaceBufferized.contains(&operand) &&
"OpOperand was already decided to bufferize inplace");
}
/// Apply `fun` to all the members of the equivalence class of `v`.
@ -339,16 +334,13 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::BufferizationState(
op->walk([&](BufferizableOpInterface bufferizableOp) {
if (!options.isOpAllowed(bufferizableOp))
return WalkResult::skip();
for (OpResult opResult : bufferizableOp->getOpResults()) {
if (opResult.getType().isa<TensorType>())
if (bufferizableOp.mustBufferizeInPlace(opResult, *this)) {
SmallVector<OpOperand *> operands =
bufferizableOp.getAliasingOpOperand(opResult, *this);
assert(!operands.empty() &&
"expected that OpResult has aliasing OpOperand");
for (OpOperand *operand : operands)
aliasInfo.unionAliasSets(operand->get(), opResult);
aliasInfo.markInPlace(opResult);
for (OpOperand &opOperand : bufferizableOp->getOpOperands()) {
if (opOperand.get().getType().isa<TensorType>())
if (bufferizableOp.mustBufferizeInPlace(opOperand, *this)) {
if (OpResult opResult =
bufferizableOp.getAliasingOpResult(opOperand, *this))
aliasInfo.unionAliasSets(opOperand.get(), opResult);
aliasInfo.markInPlace(opOperand);
}
}
return WalkResult::advance();
@ -380,7 +372,7 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::getResultBuffer(
return FailureOr<Value>(op->emitError("result buffer is ambiguous"));
// If bufferizing out-of-place, allocate a new buffer.
if (!aliasInfo.isInPlace(result)) {
if (!aliasInfo.isInPlace(*opOperand)) {
// Ops with multiple aliasing operands can currently not bufferize
// out-of-place.
assert(
@ -624,8 +616,8 @@ Value mlir::linalg::comprehensive_bufferize::BufferizationState::lookupBuffer(
}
bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace(
OpResult opResult) const {
return aliasInfo.isInPlace(opResult);
OpOperand &opOperand) const {
return aliasInfo.isInPlace(opOperand);
}
MemRefType mlir::linalg::comprehensive_bufferize::getContiguousMemRefType(

View File

@ -48,6 +48,19 @@ struct ToMemrefOpInterface
return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
// It is unknown whether the resulting MemRef will be written or not.
return true;
}
bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
// ToMemrefOps always bufferize inplace.
// TODO: Remove ToMemrefOps from the analysis.
return true;
}
OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
return OpResult();

View File

@ -74,24 +74,25 @@ static bool isaTensor(Type t) { return t.isa<TensorType>(); }
//===----------------------------------------------------------------------===//
/// Attribute marker to specify op results that can be bufferized inPlace.
constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_results_attr__";
constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_operands_attr__";
/// Mark whether OpResult can actually be bufferized inplace.
/// If `inPlace` is `true`, the use-def chain analysis has guaranteed that no
/// subsequent write would occur to the bufferized tensor value (i.e. the result
/// can be bufferized inplace).
static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
if (!opResult)
return;
Operation *op = opResult.getOwner();
/// Mark whether OpOperand will be bufferized inplace.
static void setInPlaceOpOperand(OpOperand &opOperand, bool inPlace) {
Operation *op = opOperand.getOwner();
auto attr =
op->getAttr(kInPlaceResultsAttrName).dyn_cast_or_null<ArrayAttr>();
SmallVector<StringRef> inPlaceVector =
attr ? SmallVector<StringRef>(
llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()))
: SmallVector<StringRef>(op->getNumResults(), "false");
inPlaceVector[opResult.getResultNumber()] = inPlace ? "true" : "false";
SmallVector<StringRef> inPlaceVector;
if (attr) {
inPlaceVector = SmallVector<StringRef>(
llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()));
} else {
inPlaceVector = SmallVector<StringRef>(op->getNumOperands(), "none");
for (OpOperand &opOperand : op->getOpOperands())
if (opOperand.get().getType().isa<TensorType>())
inPlaceVector[opOperand.getOperandNumber()] = "false";
}
inPlaceVector[opOperand.getOperandNumber()] = inPlace ? "true" : "false";
op->setAttr(kInPlaceResultsAttrName,
OpBuilder(op).getStrArrayAttr(inPlaceVector));
}
@ -104,21 +105,11 @@ static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
static bool isInplaceMemoryWrite(OpOperand &opOperand,
const BufferizationAliasInfo &aliasInfo,
BufferizationState &state) {
// The analysis does not know what happens to the result of a ToMemrefOp, so
// we assume that it is written to.
// TODO: This is a conservative implementation. This rule will have to be
// relaxed for partial bufferization.
if (isa<bufferization::ToMemrefOp>(opOperand.getOwner()))
return true;
// OpOperands without an aliasing OpResult do not write.
OpResult opResult = state.getAliasingOpResult(opOperand);
if (!opResult)
return false;
// OpOperands that do not bufferize to a memory write do not write in-place.
if (!state.bufferizesToMemoryWrite(opOperand))
return false;
// Check current bufferization decisions.
return aliasInfo.isInPlace(opResult);
return aliasInfo.isInPlace(opOperand);
}
/// Return true if, under current bufferization decisions, the buffer of `value`
@ -128,8 +119,8 @@ static bool aliasesNonWritableBuffer(Value value,
BufferizationState &state) {
bool foundNonWritableBuffer = false;
aliasInfo.applyOnAliases(value, [&](Value v) {
// Query BufferizableOpInterface to see if the OpResult is writable.
// TODO: Out-of-place bufferized OpResult could be considered writable.
// Query BufferizableOpInterface to see if the value is writable.
// TODO: Out-of-place bufferized value could be considered writable.
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(v))
if (bufferizableOp && bufferizableOp.isWritable(v, state))
return;
@ -309,8 +300,8 @@ static bool hasReadAfterWriteInterference(
return false;
}
/// Return true if bufferizing result inplace would create a conflict. A read R
/// and a write W of the same alias set is a conflict if inplace bufferization
/// Return true if bufferizing `operand` inplace would create a conflict. A read
/// R and a write W of the same alias set is a conflict if inplace bufferization
/// of W changes the value read by R to a value different from the one that
/// would be expected by tracing back R's origin through SSA use-def chains.
/// A conflict can only be introduced by a new alias and/or an inplace
@ -338,21 +329,10 @@ static bool hasReadAfterWriteInterference(
/// Note: If `checkConsistencyOnly`, this function may be called with a null
/// OpResult. In that case, only the consistency of bufferization decisions
/// involving aliases of the given OpOperand are checked.
bool wouldCreateReadAfterWriteInterference(
OpOperand &operand, OpResult result, const DominanceInfo &domInfo,
BufferizationState &state, const BufferizationAliasInfo &aliasInfo,
static bool wouldCreateReadAfterWriteInterference(
OpOperand &operand, const DominanceInfo &domInfo, BufferizationState &state,
const BufferizationAliasInfo &aliasInfo,
bool checkConsistencyOnly = false) {
#ifndef NDEBUG
if (result) {
SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
assert(llvm::find(opOperands, &operand) != opOperands.end() &&
"operand and result do not match");
} else {
assert(checkConsistencyOnly &&
"result not provided, can only check consistency");
}
#endif // NDEBUG
// Helper function to iterate on aliases of `root` and capture the reads.
auto getAliasingReads = [&](DenseSet<OpOperand *> &res, Value root) {
aliasInfo.applyOnAliases(root, [&](Value alias) {
@ -376,11 +356,11 @@ bool wouldCreateReadAfterWriteInterference(
// Collect reads and writes of all aliases of OpOperand and OpResult.
DenseSet<OpOperand *> usesRead, usesWrite;
getAliasingReads(usesRead, operand.get());
if (result)
getAliasingReads(usesRead, result);
getAliasingInplaceWrites(usesWrite, operand.get());
if (result)
if (OpResult result = state.getAliasingOpResult(operand)) {
getAliasingReads(usesRead, result);
getAliasingInplaceWrites(usesWrite, result);
}
if (!checkConsistencyOnly && state.bufferizesToMemoryWrite(operand))
usesWrite.insert(&operand);
@ -388,18 +368,12 @@ bool wouldCreateReadAfterWriteInterference(
aliasInfo);
}
/// Return true if bufferizing `opOperand` inplace with `opResult` would create
/// a write to a non-writable buffer.
/// Return true if bufferizing `opOperand` inplace would create a write to a
/// non-writable buffer.
static bool
wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand,
const BufferizationAliasInfo &aliasInfo,
BufferizationState &state) {
#ifndef NDEBUG
SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(opResult);
assert(llvm::find(opOperands, &opOperand) != opOperands.end() &&
"operand and result do not match");
#endif // NDEBUG
// Certain buffers are not writeable:
// 1. A function bbArg that is not inplaceable or
// 2. A constant op.
@ -409,9 +383,12 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
return false;
// This is a problem only if the buffer is written to via some alias.
bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) ||
aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
bool hasWrite = aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
state.bufferizesToMemoryWrite(opOperand);
if (OpResult opResult = state.getAliasingOpResult(opOperand))
hasWrite |= aliasesInPlaceWrite(opResult, aliasInfo, state);
return hasWrite;
}
@ -419,30 +396,23 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
// Bufferization analyses.
//===----------------------------------------------------------------------===//
/// Determine if `operand` can be bufferized in-place with `result`.
/// Determine if `operand` can be bufferized in-place.
static LogicalResult bufferizableInPlaceAnalysisImpl(
OpOperand &operand, OpResult result, BufferizationAliasInfo &aliasInfo,
OpOperand &operand, BufferizationAliasInfo &aliasInfo,
BufferizationState &state, const DominanceInfo &domInfo) {
#ifndef NDEBUG
SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
assert(llvm::find(opOperands, &operand) != opOperands.end() &&
"operand and result do not match");
#endif // NDEBUG
bool foundInterference =
wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo, state) ||
wouldCreateReadAfterWriteInterference(operand, result, domInfo, state,
aliasInfo);
wouldCreateWriteToNonWritableBuffer(operand, aliasInfo, state) ||
wouldCreateReadAfterWriteInterference(operand, domInfo, state, aliasInfo);
if (foundInterference)
aliasInfo.bufferizeOutOfPlace(result);
aliasInfo.bufferizeOutOfPlace(operand);
else
aliasInfo.bufferizeInPlace(result, operand);
aliasInfo.bufferizeInPlace(operand, state);
return success();
}
/// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in
/// Analyze the `ops` to determine which OpOperands are inplaceable. Walk ops in
/// reverse and bufferize ops greedily. This is a good starter heuristic.
///
/// Even if an op does not read or write, it may still create an alias when
@ -478,11 +448,9 @@ static LogicalResult inPlaceAnalysis(SmallVector<Operation *> &ops,
for (OpOperand &opOperand : op->getOpOperands())
if (opOperand.get().getType().isa<TensorType>())
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
if (OpResult opResult =
bufferizableOp.getAliasingOpResult(opOperand, state))
if (failed(bufferizableInPlaceAnalysisImpl(
opOperand, opResult, aliasInfo, state, domInfo)))
return failure();
if (failed(bufferizableInPlaceAnalysisImpl(opOperand, aliasInfo,
state, domInfo)))
return failure();
return success();
}
@ -520,15 +488,12 @@ static void equivalenceAnalysis(SmallVector<Operation *> &ops,
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
for (OpResult opResult : op->getOpResults())
if (opResult.getType().isa<TensorType>())
if (aliasInfo.isInPlace(opResult)) {
SmallVector<OpOperand *> opOperands =
bufferizableOp.getAliasingOpOperand(opResult, state);
if (!opOperands.empty())
for (OpOperand *opOperand :
bufferizableOp.getAliasingOpOperand(opResult, state))
if (state.isInPlace(*opOperand))
if (bufferizableOp.bufferRelation(opResult, aliasInfo, state) ==
BufferRelation::Equivalent)
for (OpOperand *opOperand : opOperands)
aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
}
aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
}
/// Analyze equivalence of tied OpResult/OpOperand pairs of all ops contained
@ -559,15 +524,12 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
if (auto bufferizableOp = options.dynCastBufferizableOp(op))
for (OpOperand &opOperand : op->getOpOperands())
if (opOperand.get().getType().isa<TensorType>()) {
OpResult opResult =
bufferizableOp.getAliasingOpResult(opOperand, state);
if (wouldCreateReadAfterWriteInterference(
opOperand, opResult, domInfo, state, aliasInfo,
opOperand, domInfo, state, aliasInfo,
/*checkConsistencyOnly=*/true)) {
// This error can happen for two reasons. Either the input IR
// already has a read-after-write conflict. Or certain
// "mustBufferizeInPlace" interface methods are implemented
// incorrectly.
// This error can happen if certain "mustBufferizeInPlace" interface
// methods are implemented incorrectly, such that the IR already has
// a RaW conflict before making any bufferization decisions.
inconsistentOp = op;
return WalkResult::interrupt();
}
@ -576,10 +538,6 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
});
if (walkResult.wasInterrupted())
// This can currently happen in one situation: When a tensor is passed into
// a ToMemrefOp and read by another op consecutively. ToMemrefOps are
// currently handled conservatively. Once a tensor is passed into a
// ToMemrefOp, it may longer be read.
return inconsistentOp->emitError("input IR has RaW conflict");
return success();
}
@ -587,11 +545,13 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
/// Annotate the IR with the result of the analysis. For testing/debugging only.
static void
annotateOpsWithBufferizationMarkers(Operation *op,
const BufferizationAliasInfo &aliasInfo) {
const BufferizationAliasInfo &aliasInfo,
BufferizationState &state) {
op->walk([&](Operation *op) {
for (OpResult opResult : op->getResults())
if (opResult.getType().isa<TensorType>())
setInPlaceOpResult(opResult, aliasInfo.isInPlace(opResult));
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
for (OpOperand &opOperand : op->getOpOperands())
if (opOperand.get().getType().isa<TensorType>())
setInPlaceOpOperand(opOperand, aliasInfo.isInPlace(opOperand));
});
}
@ -688,7 +648,7 @@ LogicalResult mlir::linalg::comprehensive_bufferize::runComprehensiveBufferize(
// Annotate operations if we only want to report the analysis.
if (options.testAnalysisOnly) {
annotateOpsWithBufferizationMarkers(op, aliasInfo);
annotateOpsWithBufferizationMarkers(op, aliasInfo, state);
return success();
}

View File

@ -397,6 +397,14 @@ struct YieldOpInterface
return OpResult();
}
bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
// Yield operands always bufferize inplace. Otherwise, an alloc + copy
// may be generated inside the block. We should not return/yield allocations
// when possible.
return true;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
auto yieldOp = cast<linalg::YieldOp>(op);
@ -447,22 +455,26 @@ mlir::linalg::comprehensive_bufferize::linalg_ext::InitTensorEliminationStep::
WalkResult status = op->walk([&](Operation *op) {
for (OpOperand &operand : op->getOpOperands()) {
// Skip operands that do not bufferize inplace.
if (!aliasInfo.isInPlace(operand))
continue;
// Is this a matching OpOperand?
if (!anchorMatchFunc(operand))
continue;
SetVector<Value> maybeInitTensor =
state.findValueInReverseUseDefChain(operand.get(), [&](Value val) {
// Continue traversal until this function returns true.
OpResult opResult = val.dyn_cast<OpResult>();
if (!opResult)
return true;
if (!aliasInfo.isInPlace(opResult))
return true;
// Only equivalent tensors are supported at the moment.
// TODO: Support cases such as extract_slice(init_tensor).
SmallVector<OpOperand *> opOperands =
state.getAliasingOpOperand(opResult);
if (!llvm::all_of(opOperands, [&](OpOperand *operand) {
return aliasInfo.isInPlace(*operand);
}))
return true;
// Only equivalent tensors are supported at the moment.
// TODO: Support cases such as extract_slice(init_tensor)
return !llvm::all_of(opOperands, [&](OpOperand *operand) {
return aliasInfo.areEquivalentBufferizedValues(operand->get(),
opResult);
@ -542,7 +554,7 @@ LogicalResult mlir::linalg::comprehensive_bufferize::linalg_ext::
if (!insertSliceOp)
return false;
// Only inplace bufferized InsertSliceOps are eligible.
if (!aliasInfo.isInPlace(insertSliceOp->getOpResult(0)))
if (!aliasInfo.isInPlace(insertSliceOp->getOpOperand(1) /*dest*/))
return false;
return &operand == &insertSliceOp->getOpOperand(0) /*source*/;
},

View File

@ -590,6 +590,11 @@ struct CallOpInterface
return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
return false;
}
OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
// CallOpInterface is special, it needs to wait for the callee to be

View File

@ -42,14 +42,6 @@ struct ExecuteRegionOpInterface
return {&yieldOp->getOpOperand(resultNum)};
}
bool mustBufferizeInPlace(Operation *op, OpResult opResult,
const BufferizationState &state) const {
// ExecuteRegionOp results always bufferize in-place. Since they have no
// OpOperands, they are mostly ignored by the analysis once alias sets are
// set up.
return true;
}
// TODO: For better bufferization results, this could return `true` only if
// there is a memory write in the region.
bool isMemoryWrite(Operation *op, OpResult opResult,
@ -129,13 +121,6 @@ struct IfOpInterface
return true;
}
bool mustBufferizeInPlace(Operation *op, OpResult opResult,
const BufferizationState &state) const {
// IfOp results always bufferize in-place. Since they have no OpOperands,
// they are mostly ignored by the analysis once alias sets are set up.
return true;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
auto ifOp = cast<scf::IfOp>(op);
@ -430,9 +415,21 @@ struct YieldOpInterface
OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
if (isa<scf::IfOp>(op->getParentOp()))
return op->getParentOp()->getResult(opOperand.getOperandNumber());
if (isa<scf::ExecuteRegionOp>(op->getParentOp()))
return op->getParentOp()->getResult(opOperand.getOperandNumber());
return OpResult();
}
bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
const BufferizationState &state) const {
// Yield operands always bufferize inplace. Otherwise, an alloc + copy
// may be generated inside the block. We should not return/yield allocations
// when possible.
return true;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationState &state) const {
auto yieldOp = cast<scf::YieldOp>(op);

View File

@ -149,7 +149,7 @@ struct ExtractSliceOpInterface
extractSliceOp.result().getType().cast<RankedTensorType>();
// If not inplaceable, alloc.
bool inplace = state.isInPlace(extractSliceOp->getResult(0));
bool inplace = state.isInPlace(extractSliceOp->getOpOperand(0));
Value alloc;
if (!inplace) {
FailureOr<Value> allocOrFailure =

View File

@ -17,14 +17,15 @@ func @fill_extract_matmul_1234(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -43,14 +44,15 @@ func @fill_extract_matmul_1243(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -68,14 +70,15 @@ func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_layout
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -93,14 +96,15 @@ func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_layout
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -118,15 +122,15 @@ func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_layout
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -144,15 +148,15 @@ func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_layout
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -171,14 +175,15 @@ func @fill_extract_matmul_2134(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -197,14 +202,15 @@ func @fill_extract_matmul_2143(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -223,15 +229,15 @@ func @fill_extract_matmul_2314(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -250,14 +256,15 @@ func @fill_extract_matmul_2341(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -276,14 +283,15 @@ func @fill_extract_matmul_2413(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -302,14 +310,15 @@ func @fill_extract_matmul_2431(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -328,14 +337,15 @@ func @fill_extract_matmul_3124(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -354,14 +364,15 @@ func @fill_extract_matmul_3142(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -380,14 +391,15 @@ func @fill_extract_matmul_3214(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -406,14 +418,15 @@ func @fill_extract_matmul_3241(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -432,14 +445,15 @@ func @fill_extract_matmul_3412(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -458,14 +472,15 @@ func @fill_extract_matmul_3421(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -484,15 +499,15 @@ func @fill_extract_matmul_4123(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -511,14 +526,15 @@ func @fill_extract_matmul_4132(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -537,15 +553,15 @@ func @fill_extract_matmul_4213(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -564,15 +580,15 @@ func @fill_extract_matmul_4231(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -591,14 +607,15 @@ func @fill_extract_matmul_4312(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}
@ -617,14 +634,15 @@ func @fill_extract_matmul_4321(
%cst_0 = arith.constant 1.000000e+00 : f32
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: {__inplace_results_attr__ = ["false"]}
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
// CHECK: {__inplace_operands_attr__ = ["false"]}
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
return %5 : tensor<256x256xf32>
}