forked from OSchip/llvm-project
[mlir][linalg][bufferize][NFC] Analyze OpOperands instead of OpResults
With this change, the analysis takes a look at OpOperands instead of OpResults. OpOperands can bufferize out-of-place (even if they have no aliasing OpResults). The analysis does no longer care about OpResults. Previously, only OpResults could bufferize out-of-place, so OpOperands that have no aliasing OpResults were never copied by Comprehensive Bufferize. This does not fit wwell with the new CallOp bufferization that is introduced in a subsequent change. In essence, called FuncOps can then be treated as "black boxes" that may read/write to any bbArg, even if they do not return anything. Differential Revision: https://reviews.llvm.org/D115706
This commit is contained in:
parent
e56a9c9b5b
commit
8e2b6aac32
|
@ -204,10 +204,10 @@ public:
|
|||
|
||||
/// Set the inPlace bufferization spec to true.
|
||||
/// Merge result's and operand's aliasing sets and iterate to a fixed point.
|
||||
void bufferizeInPlace(OpResult result, OpOperand &operand);
|
||||
void bufferizeInPlace(OpOperand &operand, BufferizationState &state);
|
||||
|
||||
/// Set the inPlace bufferization spec to false.
|
||||
void bufferizeOutOfPlace(OpResult result);
|
||||
void bufferizeOutOfPlace(OpOperand &operand);
|
||||
|
||||
/// Return true if `v1` and `v2` bufferize to equivalent buffers.
|
||||
bool areEquivalentBufferizedValues(Value v1, Value v2) const {
|
||||
|
@ -234,10 +234,10 @@ public:
|
|||
void applyOnAliases(Value v, function_ref<void(Value)> fun) const;
|
||||
|
||||
/// Mark a value as in-place bufferized.
|
||||
void markInPlace(OpResult v) { inplaceBufferized.insert(v); }
|
||||
void markInPlace(OpOperand &o) { inplaceBufferized.insert(&o); }
|
||||
|
||||
/// Return `true` if a value was marked as in-place bufferized.
|
||||
bool isInPlace(OpResult opResult) const;
|
||||
bool isInPlace(OpOperand &opOperand) const;
|
||||
|
||||
private:
|
||||
/// llvm::EquivalenceClasses wants comparable elements. This comparator uses
|
||||
|
@ -255,7 +255,7 @@ private:
|
|||
EquivalenceClassRangeType getAliases(Value v) const;
|
||||
|
||||
/// Set of all OpResults that were decided to bufferize in-place.
|
||||
llvm::DenseSet<OpResult> inplaceBufferized;
|
||||
llvm::DenseSet<OpOperand *> inplaceBufferized;
|
||||
|
||||
/// Auxiliary structure to store all the values a given value may alias with.
|
||||
/// Alias information is "may be" conservative: In the presence of branches, a
|
||||
|
@ -382,7 +382,7 @@ public:
|
|||
Value lookupBuffer(RewriterBase &rewriter, Value tensor) const;
|
||||
|
||||
/// Return `true` if the given OpResult has been decided to bufferize inplace.
|
||||
bool isInPlace(OpResult opResult) const;
|
||||
bool isInPlace(OpOperand &opOperand) const;
|
||||
|
||||
/// Return the result buffer (memref) for a given OpResult (tensor). Allocate
|
||||
/// a new buffer and copy over data from the existing buffer if out-of-place
|
||||
|
|
|
@ -104,18 +104,14 @@ def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> {
|
|||
>,
|
||||
InterfaceMethod<
|
||||
/*desc=*/[{
|
||||
Return `true` if the given OpResult must bufferize in-place with its
|
||||
corresponding aliasing OpOperand. Alias sets and inplace attributes
|
||||
will be set up accordingly before making any other bufferization
|
||||
decisions. This method will never be called on OpResults that do not
|
||||
have a tensor type.
|
||||
|
||||
Note: This method may not return `true` if the given OpResult does not
|
||||
have an aliasing OpOperand.
|
||||
Return `true` if the given OpOperand must bufferize in-place. Alias
|
||||
sets and inplace attributes will be set up accordingly before making
|
||||
any other bufferization decisions. This method will never be called on
|
||||
OpOperands that do not have a tensor type.
|
||||
}],
|
||||
/*retType=*/"bool",
|
||||
/*methodName=*/"mustBufferizeInPlace",
|
||||
/*args=*/(ins "OpResult":$opResult,
|
||||
/*args=*/(ins "OpOperand &":$opOperand,
|
||||
"const BufferizationState &":$state),
|
||||
/*methodBody=*/"",
|
||||
/*defaultImplementation=*/[{
|
||||
|
|
|
@ -129,27 +129,22 @@ void BufferizationAliasInfo::insertNewBufferEquivalence(Value newValue,
|
|||
}
|
||||
|
||||
/// Return `true` if a value was marked as in-place bufferized.
|
||||
bool BufferizationAliasInfo::isInPlace(OpResult opResult) const {
|
||||
return inplaceBufferized.contains(opResult);
|
||||
bool BufferizationAliasInfo::isInPlace(OpOperand &operand) const {
|
||||
return inplaceBufferized.contains(&operand);
|
||||
}
|
||||
|
||||
/// Set the inPlace bufferization spec to true.
|
||||
void BufferizationAliasInfo::bufferizeInPlace(OpResult result,
|
||||
OpOperand &operand) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "bufferizeInPlace: ");
|
||||
LLVM_DEBUG(result.print(llvm::dbgs()));
|
||||
|
||||
markInPlace(result);
|
||||
aliasInfo.unionSets(result, operand.get());
|
||||
void BufferizationAliasInfo::bufferizeInPlace(OpOperand &operand,
|
||||
BufferizationState &state) {
|
||||
markInPlace(operand);
|
||||
if (OpResult result = state.getAliasingOpResult(operand))
|
||||
aliasInfo.unionSets(result, operand.get());
|
||||
}
|
||||
|
||||
/// Set the inPlace bufferization spec to false.
|
||||
void BufferizationAliasInfo::bufferizeOutOfPlace(OpResult result) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "bufferizeOutOfPlace: ");
|
||||
LLVM_DEBUG(result.print(llvm::dbgs()));
|
||||
|
||||
if (inplaceBufferized.contains(result))
|
||||
inplaceBufferized.erase(result);
|
||||
void BufferizationAliasInfo::bufferizeOutOfPlace(OpOperand &operand) {
|
||||
assert(!inplaceBufferized.contains(&operand) &&
|
||||
"OpOperand was already decided to bufferize inplace");
|
||||
}
|
||||
|
||||
/// Apply `fun` to all the members of the equivalence class of `v`.
|
||||
|
@ -339,16 +334,13 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::BufferizationState(
|
|||
op->walk([&](BufferizableOpInterface bufferizableOp) {
|
||||
if (!options.isOpAllowed(bufferizableOp))
|
||||
return WalkResult::skip();
|
||||
for (OpResult opResult : bufferizableOp->getOpResults()) {
|
||||
if (opResult.getType().isa<TensorType>())
|
||||
if (bufferizableOp.mustBufferizeInPlace(opResult, *this)) {
|
||||
SmallVector<OpOperand *> operands =
|
||||
bufferizableOp.getAliasingOpOperand(opResult, *this);
|
||||
assert(!operands.empty() &&
|
||||
"expected that OpResult has aliasing OpOperand");
|
||||
for (OpOperand *operand : operands)
|
||||
aliasInfo.unionAliasSets(operand->get(), opResult);
|
||||
aliasInfo.markInPlace(opResult);
|
||||
for (OpOperand &opOperand : bufferizableOp->getOpOperands()) {
|
||||
if (opOperand.get().getType().isa<TensorType>())
|
||||
if (bufferizableOp.mustBufferizeInPlace(opOperand, *this)) {
|
||||
if (OpResult opResult =
|
||||
bufferizableOp.getAliasingOpResult(opOperand, *this))
|
||||
aliasInfo.unionAliasSets(opOperand.get(), opResult);
|
||||
aliasInfo.markInPlace(opOperand);
|
||||
}
|
||||
}
|
||||
return WalkResult::advance();
|
||||
|
@ -380,7 +372,7 @@ mlir::linalg::comprehensive_bufferize::BufferizationState::getResultBuffer(
|
|||
return FailureOr<Value>(op->emitError("result buffer is ambiguous"));
|
||||
|
||||
// If bufferizing out-of-place, allocate a new buffer.
|
||||
if (!aliasInfo.isInPlace(result)) {
|
||||
if (!aliasInfo.isInPlace(*opOperand)) {
|
||||
// Ops with multiple aliasing operands can currently not bufferize
|
||||
// out-of-place.
|
||||
assert(
|
||||
|
@ -624,8 +616,8 @@ Value mlir::linalg::comprehensive_bufferize::BufferizationState::lookupBuffer(
|
|||
}
|
||||
|
||||
bool mlir::linalg::comprehensive_bufferize::BufferizationState::isInPlace(
|
||||
OpResult opResult) const {
|
||||
return aliasInfo.isInPlace(opResult);
|
||||
OpOperand &opOperand) const {
|
||||
return aliasInfo.isInPlace(opOperand);
|
||||
}
|
||||
|
||||
MemRefType mlir::linalg::comprehensive_bufferize::getContiguousMemRefType(
|
||||
|
|
|
@ -48,6 +48,19 @@ struct ToMemrefOpInterface
|
|||
return true;
|
||||
}
|
||||
|
||||
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
// It is unknown whether the resulting MemRef will be written or not.
|
||||
return true;
|
||||
}
|
||||
|
||||
bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
// ToMemrefOps always bufferize inplace.
|
||||
// TODO: Remove ToMemrefOps from the analysis.
|
||||
return true;
|
||||
}
|
||||
|
||||
OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
return OpResult();
|
||||
|
|
|
@ -74,24 +74,25 @@ static bool isaTensor(Type t) { return t.isa<TensorType>(); }
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Attribute marker to specify op results that can be bufferized inPlace.
|
||||
constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_results_attr__";
|
||||
constexpr StringLiteral kInPlaceResultsAttrName = "__inplace_operands_attr__";
|
||||
|
||||
/// Mark whether OpResult can actually be bufferized inplace.
|
||||
/// If `inPlace` is `true`, the use-def chain analysis has guaranteed that no
|
||||
/// subsequent write would occur to the bufferized tensor value (i.e. the result
|
||||
/// can be bufferized inplace).
|
||||
static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
|
||||
if (!opResult)
|
||||
return;
|
||||
|
||||
Operation *op = opResult.getOwner();
|
||||
/// Mark whether OpOperand will be bufferized inplace.
|
||||
static void setInPlaceOpOperand(OpOperand &opOperand, bool inPlace) {
|
||||
Operation *op = opOperand.getOwner();
|
||||
auto attr =
|
||||
op->getAttr(kInPlaceResultsAttrName).dyn_cast_or_null<ArrayAttr>();
|
||||
SmallVector<StringRef> inPlaceVector =
|
||||
attr ? SmallVector<StringRef>(
|
||||
llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()))
|
||||
: SmallVector<StringRef>(op->getNumResults(), "false");
|
||||
inPlaceVector[opResult.getResultNumber()] = inPlace ? "true" : "false";
|
||||
SmallVector<StringRef> inPlaceVector;
|
||||
if (attr) {
|
||||
inPlaceVector = SmallVector<StringRef>(
|
||||
llvm::to_vector<4>(attr.getAsValueRange<StringAttr>()));
|
||||
} else {
|
||||
inPlaceVector = SmallVector<StringRef>(op->getNumOperands(), "none");
|
||||
for (OpOperand &opOperand : op->getOpOperands())
|
||||
if (opOperand.get().getType().isa<TensorType>())
|
||||
inPlaceVector[opOperand.getOperandNumber()] = "false";
|
||||
}
|
||||
|
||||
inPlaceVector[opOperand.getOperandNumber()] = inPlace ? "true" : "false";
|
||||
op->setAttr(kInPlaceResultsAttrName,
|
||||
OpBuilder(op).getStrArrayAttr(inPlaceVector));
|
||||
}
|
||||
|
@ -104,21 +105,11 @@ static void setInPlaceOpResult(OpResult opResult, bool inPlace) {
|
|||
static bool isInplaceMemoryWrite(OpOperand &opOperand,
|
||||
const BufferizationAliasInfo &aliasInfo,
|
||||
BufferizationState &state) {
|
||||
// The analysis does not know what happens to the result of a ToMemrefOp, so
|
||||
// we assume that it is written to.
|
||||
// TODO: This is a conservative implementation. This rule will have to be
|
||||
// relaxed for partial bufferization.
|
||||
if (isa<bufferization::ToMemrefOp>(opOperand.getOwner()))
|
||||
return true;
|
||||
// OpOperands without an aliasing OpResult do not write.
|
||||
OpResult opResult = state.getAliasingOpResult(opOperand);
|
||||
if (!opResult)
|
||||
return false;
|
||||
// OpOperands that do not bufferize to a memory write do not write in-place.
|
||||
if (!state.bufferizesToMemoryWrite(opOperand))
|
||||
return false;
|
||||
// Check current bufferization decisions.
|
||||
return aliasInfo.isInPlace(opResult);
|
||||
return aliasInfo.isInPlace(opOperand);
|
||||
}
|
||||
|
||||
/// Return true if, under current bufferization decisions, the buffer of `value`
|
||||
|
@ -128,8 +119,8 @@ static bool aliasesNonWritableBuffer(Value value,
|
|||
BufferizationState &state) {
|
||||
bool foundNonWritableBuffer = false;
|
||||
aliasInfo.applyOnAliases(value, [&](Value v) {
|
||||
// Query BufferizableOpInterface to see if the OpResult is writable.
|
||||
// TODO: Out-of-place bufferized OpResult could be considered writable.
|
||||
// Query BufferizableOpInterface to see if the value is writable.
|
||||
// TODO: Out-of-place bufferized value could be considered writable.
|
||||
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(v))
|
||||
if (bufferizableOp && bufferizableOp.isWritable(v, state))
|
||||
return;
|
||||
|
@ -309,8 +300,8 @@ static bool hasReadAfterWriteInterference(
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Return true if bufferizing result inplace would create a conflict. A read R
|
||||
/// and a write W of the same alias set is a conflict if inplace bufferization
|
||||
/// Return true if bufferizing `operand` inplace would create a conflict. A read
|
||||
/// R and a write W of the same alias set is a conflict if inplace bufferization
|
||||
/// of W changes the value read by R to a value different from the one that
|
||||
/// would be expected by tracing back R's origin through SSA use-def chains.
|
||||
/// A conflict can only be introduced by a new alias and/or an inplace
|
||||
|
@ -338,21 +329,10 @@ static bool hasReadAfterWriteInterference(
|
|||
/// Note: If `checkConsistencyOnly`, this function may be called with a null
|
||||
/// OpResult. In that case, only the consistency of bufferization decisions
|
||||
/// involving aliases of the given OpOperand are checked.
|
||||
bool wouldCreateReadAfterWriteInterference(
|
||||
OpOperand &operand, OpResult result, const DominanceInfo &domInfo,
|
||||
BufferizationState &state, const BufferizationAliasInfo &aliasInfo,
|
||||
static bool wouldCreateReadAfterWriteInterference(
|
||||
OpOperand &operand, const DominanceInfo &domInfo, BufferizationState &state,
|
||||
const BufferizationAliasInfo &aliasInfo,
|
||||
bool checkConsistencyOnly = false) {
|
||||
#ifndef NDEBUG
|
||||
if (result) {
|
||||
SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
|
||||
assert(llvm::find(opOperands, &operand) != opOperands.end() &&
|
||||
"operand and result do not match");
|
||||
} else {
|
||||
assert(checkConsistencyOnly &&
|
||||
"result not provided, can only check consistency");
|
||||
}
|
||||
#endif // NDEBUG
|
||||
|
||||
// Helper function to iterate on aliases of `root` and capture the reads.
|
||||
auto getAliasingReads = [&](DenseSet<OpOperand *> &res, Value root) {
|
||||
aliasInfo.applyOnAliases(root, [&](Value alias) {
|
||||
|
@ -376,11 +356,11 @@ bool wouldCreateReadAfterWriteInterference(
|
|||
// Collect reads and writes of all aliases of OpOperand and OpResult.
|
||||
DenseSet<OpOperand *> usesRead, usesWrite;
|
||||
getAliasingReads(usesRead, operand.get());
|
||||
if (result)
|
||||
getAliasingReads(usesRead, result);
|
||||
getAliasingInplaceWrites(usesWrite, operand.get());
|
||||
if (result)
|
||||
if (OpResult result = state.getAliasingOpResult(operand)) {
|
||||
getAliasingReads(usesRead, result);
|
||||
getAliasingInplaceWrites(usesWrite, result);
|
||||
}
|
||||
if (!checkConsistencyOnly && state.bufferizesToMemoryWrite(operand))
|
||||
usesWrite.insert(&operand);
|
||||
|
||||
|
@ -388,18 +368,12 @@ bool wouldCreateReadAfterWriteInterference(
|
|||
aliasInfo);
|
||||
}
|
||||
|
||||
/// Return true if bufferizing `opOperand` inplace with `opResult` would create
|
||||
/// a write to a non-writable buffer.
|
||||
/// Return true if bufferizing `opOperand` inplace would create a write to a
|
||||
/// non-writable buffer.
|
||||
static bool
|
||||
wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
|
||||
wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand,
|
||||
const BufferizationAliasInfo &aliasInfo,
|
||||
BufferizationState &state) {
|
||||
#ifndef NDEBUG
|
||||
SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(opResult);
|
||||
assert(llvm::find(opOperands, &opOperand) != opOperands.end() &&
|
||||
"operand and result do not match");
|
||||
#endif // NDEBUG
|
||||
|
||||
// Certain buffers are not writeable:
|
||||
// 1. A function bbArg that is not inplaceable or
|
||||
// 2. A constant op.
|
||||
|
@ -409,9 +383,12 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
|
|||
return false;
|
||||
|
||||
// This is a problem only if the buffer is written to via some alias.
|
||||
bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo, state) ||
|
||||
aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
|
||||
bool hasWrite = aliasesInPlaceWrite(opOperand.get(), aliasInfo, state) ||
|
||||
state.bufferizesToMemoryWrite(opOperand);
|
||||
|
||||
if (OpResult opResult = state.getAliasingOpResult(opOperand))
|
||||
hasWrite |= aliasesInPlaceWrite(opResult, aliasInfo, state);
|
||||
|
||||
return hasWrite;
|
||||
}
|
||||
|
||||
|
@ -419,30 +396,23 @@ wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult,
|
|||
// Bufferization analyses.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Determine if `operand` can be bufferized in-place with `result`.
|
||||
/// Determine if `operand` can be bufferized in-place.
|
||||
static LogicalResult bufferizableInPlaceAnalysisImpl(
|
||||
OpOperand &operand, OpResult result, BufferizationAliasInfo &aliasInfo,
|
||||
OpOperand &operand, BufferizationAliasInfo &aliasInfo,
|
||||
BufferizationState &state, const DominanceInfo &domInfo) {
|
||||
#ifndef NDEBUG
|
||||
SmallVector<OpOperand *> opOperands = state.getAliasingOpOperand(result);
|
||||
assert(llvm::find(opOperands, &operand) != opOperands.end() &&
|
||||
"operand and result do not match");
|
||||
#endif // NDEBUG
|
||||
|
||||
bool foundInterference =
|
||||
wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo, state) ||
|
||||
wouldCreateReadAfterWriteInterference(operand, result, domInfo, state,
|
||||
aliasInfo);
|
||||
wouldCreateWriteToNonWritableBuffer(operand, aliasInfo, state) ||
|
||||
wouldCreateReadAfterWriteInterference(operand, domInfo, state, aliasInfo);
|
||||
|
||||
if (foundInterference)
|
||||
aliasInfo.bufferizeOutOfPlace(result);
|
||||
aliasInfo.bufferizeOutOfPlace(operand);
|
||||
else
|
||||
aliasInfo.bufferizeInPlace(result, operand);
|
||||
aliasInfo.bufferizeInPlace(operand, state);
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in
|
||||
/// Analyze the `ops` to determine which OpOperands are inplaceable. Walk ops in
|
||||
/// reverse and bufferize ops greedily. This is a good starter heuristic.
|
||||
///
|
||||
/// Even if an op does not read or write, it may still create an alias when
|
||||
|
@ -478,11 +448,9 @@ static LogicalResult inPlaceAnalysis(SmallVector<Operation *> &ops,
|
|||
for (OpOperand &opOperand : op->getOpOperands())
|
||||
if (opOperand.get().getType().isa<TensorType>())
|
||||
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
|
||||
if (OpResult opResult =
|
||||
bufferizableOp.getAliasingOpResult(opOperand, state))
|
||||
if (failed(bufferizableInPlaceAnalysisImpl(
|
||||
opOperand, opResult, aliasInfo, state, domInfo)))
|
||||
return failure();
|
||||
if (failed(bufferizableInPlaceAnalysisImpl(opOperand, aliasInfo,
|
||||
state, domInfo)))
|
||||
return failure();
|
||||
|
||||
return success();
|
||||
}
|
||||
|
@ -520,15 +488,12 @@ static void equivalenceAnalysis(SmallVector<Operation *> &ops,
|
|||
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
|
||||
for (OpResult opResult : op->getOpResults())
|
||||
if (opResult.getType().isa<TensorType>())
|
||||
if (aliasInfo.isInPlace(opResult)) {
|
||||
SmallVector<OpOperand *> opOperands =
|
||||
bufferizableOp.getAliasingOpOperand(opResult, state);
|
||||
if (!opOperands.empty())
|
||||
for (OpOperand *opOperand :
|
||||
bufferizableOp.getAliasingOpOperand(opResult, state))
|
||||
if (state.isInPlace(*opOperand))
|
||||
if (bufferizableOp.bufferRelation(opResult, aliasInfo, state) ==
|
||||
BufferRelation::Equivalent)
|
||||
for (OpOperand *opOperand : opOperands)
|
||||
aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
|
||||
}
|
||||
aliasInfo.unionEquivalenceClasses(opResult, opOperand->get());
|
||||
}
|
||||
|
||||
/// Analyze equivalence of tied OpResult/OpOperand pairs of all ops contained
|
||||
|
@ -559,15 +524,12 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
|
|||
if (auto bufferizableOp = options.dynCastBufferizableOp(op))
|
||||
for (OpOperand &opOperand : op->getOpOperands())
|
||||
if (opOperand.get().getType().isa<TensorType>()) {
|
||||
OpResult opResult =
|
||||
bufferizableOp.getAliasingOpResult(opOperand, state);
|
||||
if (wouldCreateReadAfterWriteInterference(
|
||||
opOperand, opResult, domInfo, state, aliasInfo,
|
||||
opOperand, domInfo, state, aliasInfo,
|
||||
/*checkConsistencyOnly=*/true)) {
|
||||
// This error can happen for two reasons. Either the input IR
|
||||
// already has a read-after-write conflict. Or certain
|
||||
// "mustBufferizeInPlace" interface methods are implemented
|
||||
// incorrectly.
|
||||
// This error can happen if certain "mustBufferizeInPlace" interface
|
||||
// methods are implemented incorrectly, such that the IR already has
|
||||
// a RaW conflict before making any bufferization decisions.
|
||||
inconsistentOp = op;
|
||||
return WalkResult::interrupt();
|
||||
}
|
||||
|
@ -576,10 +538,6 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
|
|||
});
|
||||
|
||||
if (walkResult.wasInterrupted())
|
||||
// This can currently happen in one situation: When a tensor is passed into
|
||||
// a ToMemrefOp and read by another op consecutively. ToMemrefOps are
|
||||
// currently handled conservatively. Once a tensor is passed into a
|
||||
// ToMemrefOp, it may longer be read.
|
||||
return inconsistentOp->emitError("input IR has RaW conflict");
|
||||
return success();
|
||||
}
|
||||
|
@ -587,11 +545,13 @@ checkAliasInfoConsistency(Operation *op, const DominanceInfo &domInfo,
|
|||
/// Annotate the IR with the result of the analysis. For testing/debugging only.
|
||||
static void
|
||||
annotateOpsWithBufferizationMarkers(Operation *op,
|
||||
const BufferizationAliasInfo &aliasInfo) {
|
||||
const BufferizationAliasInfo &aliasInfo,
|
||||
BufferizationState &state) {
|
||||
op->walk([&](Operation *op) {
|
||||
for (OpResult opResult : op->getResults())
|
||||
if (opResult.getType().isa<TensorType>())
|
||||
setInPlaceOpResult(opResult, aliasInfo.isInPlace(opResult));
|
||||
if (auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op))
|
||||
for (OpOperand &opOperand : op->getOpOperands())
|
||||
if (opOperand.get().getType().isa<TensorType>())
|
||||
setInPlaceOpOperand(opOperand, aliasInfo.isInPlace(opOperand));
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -688,7 +648,7 @@ LogicalResult mlir::linalg::comprehensive_bufferize::runComprehensiveBufferize(
|
|||
|
||||
// Annotate operations if we only want to report the analysis.
|
||||
if (options.testAnalysisOnly) {
|
||||
annotateOpsWithBufferizationMarkers(op, aliasInfo);
|
||||
annotateOpsWithBufferizationMarkers(op, aliasInfo, state);
|
||||
return success();
|
||||
}
|
||||
|
||||
|
|
|
@ -397,6 +397,14 @@ struct YieldOpInterface
|
|||
return OpResult();
|
||||
}
|
||||
|
||||
bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
// Yield operands always bufferize inplace. Otherwise, an alloc + copy
|
||||
// may be generated inside the block. We should not return/yield allocations
|
||||
// when possible.
|
||||
return true;
|
||||
}
|
||||
|
||||
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
|
||||
const BufferizationState &state) const {
|
||||
auto yieldOp = cast<linalg::YieldOp>(op);
|
||||
|
@ -447,22 +455,26 @@ mlir::linalg::comprehensive_bufferize::linalg_ext::InitTensorEliminationStep::
|
|||
|
||||
WalkResult status = op->walk([&](Operation *op) {
|
||||
for (OpOperand &operand : op->getOpOperands()) {
|
||||
// Skip operands that do not bufferize inplace.
|
||||
if (!aliasInfo.isInPlace(operand))
|
||||
continue;
|
||||
// Is this a matching OpOperand?
|
||||
if (!anchorMatchFunc(operand))
|
||||
continue;
|
||||
|
||||
SetVector<Value> maybeInitTensor =
|
||||
state.findValueInReverseUseDefChain(operand.get(), [&](Value val) {
|
||||
// Continue traversal until this function returns true.
|
||||
OpResult opResult = val.dyn_cast<OpResult>();
|
||||
if (!opResult)
|
||||
return true;
|
||||
if (!aliasInfo.isInPlace(opResult))
|
||||
return true;
|
||||
// Only equivalent tensors are supported at the moment.
|
||||
// TODO: Support cases such as extract_slice(init_tensor).
|
||||
SmallVector<OpOperand *> opOperands =
|
||||
state.getAliasingOpOperand(opResult);
|
||||
if (!llvm::all_of(opOperands, [&](OpOperand *operand) {
|
||||
return aliasInfo.isInPlace(*operand);
|
||||
}))
|
||||
return true;
|
||||
// Only equivalent tensors are supported at the moment.
|
||||
// TODO: Support cases such as extract_slice(init_tensor)
|
||||
return !llvm::all_of(opOperands, [&](OpOperand *operand) {
|
||||
return aliasInfo.areEquivalentBufferizedValues(operand->get(),
|
||||
opResult);
|
||||
|
@ -542,7 +554,7 @@ LogicalResult mlir::linalg::comprehensive_bufferize::linalg_ext::
|
|||
if (!insertSliceOp)
|
||||
return false;
|
||||
// Only inplace bufferized InsertSliceOps are eligible.
|
||||
if (!aliasInfo.isInPlace(insertSliceOp->getOpResult(0)))
|
||||
if (!aliasInfo.isInPlace(insertSliceOp->getOpOperand(1) /*dest*/))
|
||||
return false;
|
||||
return &operand == &insertSliceOp->getOpOperand(0) /*source*/;
|
||||
},
|
||||
|
|
|
@ -590,6 +590,11 @@ struct CallOpInterface
|
|||
return true;
|
||||
}
|
||||
|
||||
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
// CallOpInterface is special, it needs to wait for the callee to be
|
||||
|
|
|
@ -42,14 +42,6 @@ struct ExecuteRegionOpInterface
|
|||
return {&yieldOp->getOpOperand(resultNum)};
|
||||
}
|
||||
|
||||
bool mustBufferizeInPlace(Operation *op, OpResult opResult,
|
||||
const BufferizationState &state) const {
|
||||
// ExecuteRegionOp results always bufferize in-place. Since they have no
|
||||
// OpOperands, they are mostly ignored by the analysis once alias sets are
|
||||
// set up.
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: For better bufferization results, this could return `true` only if
|
||||
// there is a memory write in the region.
|
||||
bool isMemoryWrite(Operation *op, OpResult opResult,
|
||||
|
@ -129,13 +121,6 @@ struct IfOpInterface
|
|||
return true;
|
||||
}
|
||||
|
||||
bool mustBufferizeInPlace(Operation *op, OpResult opResult,
|
||||
const BufferizationState &state) const {
|
||||
// IfOp results always bufferize in-place. Since they have no OpOperands,
|
||||
// they are mostly ignored by the analysis once alias sets are set up.
|
||||
return true;
|
||||
}
|
||||
|
||||
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
|
||||
const BufferizationState &state) const {
|
||||
auto ifOp = cast<scf::IfOp>(op);
|
||||
|
@ -430,9 +415,21 @@ struct YieldOpInterface
|
|||
|
||||
OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
if (isa<scf::IfOp>(op->getParentOp()))
|
||||
return op->getParentOp()->getResult(opOperand.getOperandNumber());
|
||||
if (isa<scf::ExecuteRegionOp>(op->getParentOp()))
|
||||
return op->getParentOp()->getResult(opOperand.getOperandNumber());
|
||||
return OpResult();
|
||||
}
|
||||
|
||||
bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
|
||||
const BufferizationState &state) const {
|
||||
// Yield operands always bufferize inplace. Otherwise, an alloc + copy
|
||||
// may be generated inside the block. We should not return/yield allocations
|
||||
// when possible.
|
||||
return true;
|
||||
}
|
||||
|
||||
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
|
||||
const BufferizationState &state) const {
|
||||
auto yieldOp = cast<scf::YieldOp>(op);
|
||||
|
|
|
@ -149,7 +149,7 @@ struct ExtractSliceOpInterface
|
|||
extractSliceOp.result().getType().cast<RankedTensorType>();
|
||||
|
||||
// If not inplaceable, alloc.
|
||||
bool inplace = state.isInPlace(extractSliceOp->getResult(0));
|
||||
bool inplace = state.isInPlace(extractSliceOp->getOpOperand(0));
|
||||
Value alloc;
|
||||
if (!inplace) {
|
||||
FailureOr<Value> allocOrFailure =
|
||||
|
|
|
@ -17,14 +17,15 @@ func @fill_extract_matmul_1234(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -43,14 +44,15 @@ func @fill_extract_matmul_1243(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -68,14 +70,15 @@ func @fill_extract_matmul_1324(%arg0: tensor<518x518xf32> {linalg.buffer_layout
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -93,14 +96,15 @@ func @fill_extract_matmul_1342(%arg0: tensor<518x518xf32> {linalg.buffer_layout
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -118,15 +122,15 @@ func @fill_extract_matmul_1423(%arg0: tensor<518x518xf32> {linalg.buffer_layout
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -144,15 +148,15 @@ func @fill_extract_matmul_1432(%arg0: tensor<518x518xf32> {linalg.buffer_layout
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -171,14 +175,15 @@ func @fill_extract_matmul_2134(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -197,14 +202,15 @@ func @fill_extract_matmul_2143(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -223,15 +229,15 @@ func @fill_extract_matmul_2314(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -250,14 +256,15 @@ func @fill_extract_matmul_2341(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -276,14 +283,15 @@ func @fill_extract_matmul_2413(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -302,14 +310,15 @@ func @fill_extract_matmul_2431(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "false"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -328,14 +337,15 @@ func @fill_extract_matmul_3124(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -354,14 +364,15 @@ func @fill_extract_matmul_3142(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -380,14 +391,15 @@ func @fill_extract_matmul_3214(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -406,14 +418,15 @@ func @fill_extract_matmul_3241(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -432,14 +445,15 @@ func @fill_extract_matmul_3412(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -458,14 +472,15 @@ func @fill_extract_matmul_3421(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -484,15 +499,15 @@ func @fill_extract_matmul_4123(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -511,14 +526,15 @@ func @fill_extract_matmul_4132(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -537,15 +553,15 @@ func @fill_extract_matmul_4213(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %0) : f32, tensor<256x256xf32> -> tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -564,15 +580,15 @@ func @fill_extract_matmul_4231(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -591,14 +607,15 @@ func @fill_extract_matmul_4312(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
@ -617,14 +634,15 @@ func @fill_extract_matmul_4321(
|
|||
%cst_0 = arith.constant 1.000000e+00 : f32
|
||||
%0 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
|
||||
|
||||
// CHECK: {__inplace_results_attr__ = ["false"]}
|
||||
// CHECK-COUNT-4: {__inplace_results_attr__ = ["true"]}
|
||||
// CHECK: {__inplace_operands_attr__ = ["false"]}
|
||||
%4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["true"]}
|
||||
%3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%2 = linalg.fill(%cst_0, %4) : f32, tensor<16x256xf32> -> tensor<16x256xf32>
|
||||
// CHECK: {__inplace_operands_attr__ = ["none", "true"]}
|
||||
%1 = linalg.fill(%cst, %3) : f32, tensor<256x16xf32> -> tensor<256x16xf32>
|
||||
|
||||
// CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
|
||||
%5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
|
||||
return %5 : tensor<256x256xf32>
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue