forked from OSchip/llvm-project
[mlir][bufferize][NFC] Rename `comprehensive-function-bufferize` to `one-shot-bufferize`
The related functionality is moved over to the bufferization dialect. Test cases are cleaned up a bit. Differential Revision: https://reviews.llvm.org/D120191
This commit is contained in:
parent
5acd9c49a8
commit
d2dacde5d8
|
@ -164,9 +164,8 @@ private:
|
||||||
LogicalResult analyzeOp(Operation *op, AnalysisBufferizationState &state);
|
LogicalResult analyzeOp(Operation *op, AnalysisBufferizationState &state);
|
||||||
|
|
||||||
/// Run One-Shot Bufferize on the given op: Analysis + Bufferization
|
/// Run One-Shot Bufferize on the given op: Analysis + Bufferization
|
||||||
LogicalResult
|
LogicalResult runOneShotBufferize(Operation *op,
|
||||||
runOneShotBufferize(Operation *op,
|
const AnalysisBufferizationOptions &options);
|
||||||
std::unique_ptr<AnalysisBufferizationOptions> options);
|
|
||||||
|
|
||||||
} // namespace bufferization
|
} // namespace bufferization
|
||||||
} // namespace mlir
|
} // namespace mlir
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
namespace mlir {
|
namespace mlir {
|
||||||
namespace bufferization {
|
namespace bufferization {
|
||||||
|
struct AnalysisBufferizationOptions;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Passes
|
// Passes
|
||||||
|
@ -29,6 +30,15 @@ std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
|
||||||
/// bufferization.to_tensor and bufferization.to_memref operations.
|
/// bufferization.to_tensor and bufferization.to_memref operations.
|
||||||
std::unique_ptr<OperationPass<FuncOp>> createFinalizingBufferizePass();
|
std::unique_ptr<OperationPass<FuncOp>> createFinalizingBufferizePass();
|
||||||
|
|
||||||
|
/// Create a pass that bufferizes all ops that implement BufferizableOpInterface
|
||||||
|
/// with One-Shot Bufferize.
|
||||||
|
std::unique_ptr<Pass> createOneShotBufferizePass();
|
||||||
|
|
||||||
|
/// Create a pass that bufferizes all ops that implement BufferizableOpInterface
|
||||||
|
/// with One-Shot Bufferize and the specified bufferization options.
|
||||||
|
std::unique_ptr<Pass>
|
||||||
|
createOneShotBufferizePass(const AnalysisBufferizationOptions &options);
|
||||||
|
|
||||||
/// Creates a pass that promotes heap-based allocations to stack-based ones.
|
/// Creates a pass that promotes heap-based allocations to stack-based ones.
|
||||||
/// Only buffers smaller than the provided size are promoted.
|
/// Only buffers smaller than the provided size are promoted.
|
||||||
/// Dynamic shaped buffers are promoted up to the given rank.
|
/// Dynamic shaped buffers are promoted up to the given rank.
|
||||||
|
|
|
@ -149,6 +149,88 @@ def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
|
||||||
let constructor = "mlir::bufferization::createFinalizingBufferizePass()";
|
let constructor = "mlir::bufferization::createFinalizingBufferizePass()";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
|
||||||
|
let summary = "One-Shot Bufferize";
|
||||||
|
let description = [{
|
||||||
|
This pass bufferizes all ops that implement `BufferizableOpInterface`. It
|
||||||
|
first performs an inplacability analysis on SSA use-def chains of tensor
|
||||||
|
values to determine which OpOperands may bufferize in-place, i.e., without
|
||||||
|
inserting a buffer copy. It then rewrites the IR, inserting a buffer
|
||||||
|
allocation and copy for each OpOperand that was decided to bufferize
|
||||||
|
out-of-place.
|
||||||
|
|
||||||
|
One-Shot Bufferize (and `BufferizableOpInterface`) was designed for ops that
|
||||||
|
are in destination-passing style. When bufferizing such ops, it is possible
|
||||||
|
to reuse the buffer of a tensor OpOperand for a tensor OpResult. In essence,
|
||||||
|
a possible destination of an operation is already passed as an SSA value.
|
||||||
|
|
||||||
|
`tensor.insert` is an example for an op in destination-passing style. E.g.,
|
||||||
|
when bufferizing `%t0 = tensor.insert %f into %dest[%idx]`, `buffer(%t0)` is
|
||||||
|
identical to `buffer(%dest)` in the absence of RaW conflicts. As a counter
|
||||||
|
example, `tensor.generate` is not in destination-passing style and always
|
||||||
|
results in a new buffer allocation.
|
||||||
|
|
||||||
|
One-Shot Bufferize deallocates all buffers that it allocates. Yielding newly
|
||||||
|
allocated buffers from a block is not supported yet and such IR will be
|
||||||
|
rejected. For testing purposes and compatibility with partial bufferization,
|
||||||
|
One-Shot Bufferize can be run with `allow-return-memref=1 create-dealloc=0`
|
||||||
|
to allow such IR.
|
||||||
|
|
||||||
|
One-Shot Bufferize will by default reject IR that contains non-bufferizable
|
||||||
|
op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can
|
||||||
|
be allowed with `allow-unknown-ops=1`. In that case, to_memref and to_tensor
|
||||||
|
ops will be generated at the bufferization boundary. This is useful for
|
||||||
|
compatibility with existing partial bufferization passes: These can
|
||||||
|
bufferize the remaining IR after running One-Shot Bufferize.
|
||||||
|
|
||||||
|
Note: Running One-Shot Bufferize after a partial bufferization pass is
|
||||||
|
currently not supported. Running partial bufferization passes after running
|
||||||
|
One-Shot Bufferize is supported and the recommended way to gradually
|
||||||
|
migrate from partial bufferization to One-Shot Bufferize.
|
||||||
|
|
||||||
|
With `dialect-filter`, bufferization can be restricted to a set of dialects.
|
||||||
|
If no filter is specified, all ops that implement `BufferizableOpInterface`
|
||||||
|
are bufferized. Ops from the `std` dialect are an exception: These ops are
|
||||||
|
always ignored, even if no filter is specified. When specifying a dialect
|
||||||
|
filter and `allow-unknown-ops` is not turned on, bufferization would fail
|
||||||
|
when encountering an op that is not included in the filter (even if it is
|
||||||
|
bufferizable).
|
||||||
|
|
||||||
|
For testing/debugging purposes, `test-analysis-only=1 print-conflicts=1`
|
||||||
|
prints analysis results and explains why an OpOperand was decided to
|
||||||
|
bufferize out-of-place. This is useful for understanding why One-Shot
|
||||||
|
Bufferize chose to insert a certain buffer copy.
|
||||||
|
}];
|
||||||
|
let options = [
|
||||||
|
Option<"allowReturnMemref", "allow-return-memref", "bool",
|
||||||
|
/*default=*/"false",
|
||||||
|
"Allows the return of memrefs (for testing purposes only)">,
|
||||||
|
Option<"allowUnknownOps", "allow-unknown-ops", "bool",
|
||||||
|
/*default=*/"false",
|
||||||
|
"Allows unknown (not bufferizable) ops in the input IR.">,
|
||||||
|
Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
|
||||||
|
/*default=*/"0",
|
||||||
|
"Test only: Analyze ops in random order with a given seed (fuzzer)">,
|
||||||
|
Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
|
||||||
|
"Specify if buffers should be deallocated. For compatibility with "
|
||||||
|
"core bufferization passes.">,
|
||||||
|
ListOption<"dialectFilter", "dialect-filter", "std::string",
|
||||||
|
"Restrict bufferization to ops from these dialects.",
|
||||||
|
"llvm::cl::MiscFlags::CommaSeparated">,
|
||||||
|
Option<"fullyDynamicLayoutMaps", "fully-dynamic-layout-maps", "bool",
|
||||||
|
/*default=*/"true",
|
||||||
|
"Generate MemRef types with dynamic offset+strides by default.">,
|
||||||
|
Option<"testAnalysisOnly", "test-analysis-only", "bool",
|
||||||
|
/*default=*/"false",
|
||||||
|
"Test only: Only run inplaceability analysis and annotate IR">,
|
||||||
|
Option<"printConflicts", "print-conflicts", "bool",
|
||||||
|
/*default=*/"false",
|
||||||
|
"Test only: Annotate IR with RaW conflicts. Requires "
|
||||||
|
"test-analysis-only.">,
|
||||||
|
];
|
||||||
|
let constructor = "mlir::bufferization::createOneShotBufferizePass()";
|
||||||
|
}
|
||||||
|
|
||||||
def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
|
def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
|
||||||
let summary = "Promotes heap-based allocations to automatically managed "
|
let summary = "Promotes heap-based allocations to automatically managed "
|
||||||
"stack-based allocations";
|
"stack-based allocations";
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "mlir/Dialect/AMX/AMXDialect.h"
|
#include "mlir/Dialect/AMX/AMXDialect.h"
|
||||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||||
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
|
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
|
||||||
|
#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
|
||||||
#include "mlir/Dialect/ArmNeon/ArmNeonDialect.h"
|
#include "mlir/Dialect/ArmNeon/ArmNeonDialect.h"
|
||||||
#include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
|
#include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
|
||||||
#include "mlir/Dialect/Async/IR/Async.h"
|
#include "mlir/Dialect/Async/IR/Async.h"
|
||||||
|
@ -30,6 +31,7 @@
|
||||||
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
||||||
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
|
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
|
||||||
#include "mlir/Dialect/Linalg/IR/Linalg.h"
|
#include "mlir/Dialect/Linalg/IR/Linalg.h"
|
||||||
|
#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
|
||||||
#include "mlir/Dialect/Math/IR/Math.h"
|
#include "mlir/Dialect/Math/IR/Math.h"
|
||||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||||
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
||||||
|
@ -37,6 +39,7 @@
|
||||||
#include "mlir/Dialect/PDL/IR/PDL.h"
|
#include "mlir/Dialect/PDL/IR/PDL.h"
|
||||||
#include "mlir/Dialect/PDLInterp/IR/PDLInterp.h"
|
#include "mlir/Dialect/PDLInterp/IR/PDLInterp.h"
|
||||||
#include "mlir/Dialect/Quant/QuantOps.h"
|
#include "mlir/Dialect/Quant/QuantOps.h"
|
||||||
|
#include "mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h"
|
||||||
#include "mlir/Dialect/SCF/SCF.h"
|
#include "mlir/Dialect/SCF/SCF.h"
|
||||||
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
|
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
|
||||||
#include "mlir/Dialect/Shape/IR/Shape.h"
|
#include "mlir/Dialect/Shape/IR/Shape.h"
|
||||||
|
@ -45,8 +48,10 @@
|
||||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||||
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
|
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
|
||||||
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
|
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
|
||||||
|
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
|
||||||
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
|
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
|
||||||
#include "mlir/Dialect/Vector/IR/VectorOps.h"
|
#include "mlir/Dialect/Vector/IR/VectorOps.h"
|
||||||
|
#include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h"
|
||||||
#include "mlir/Dialect/X86Vector/X86VectorDialect.h"
|
#include "mlir/Dialect/X86Vector/X86VectorDialect.h"
|
||||||
#include "mlir/IR/Dialect.h"
|
#include "mlir/IR/Dialect.h"
|
||||||
|
|
||||||
|
@ -88,8 +93,13 @@ inline void registerAllDialects(DialectRegistry ®istry) {
|
||||||
tosa::TosaDialect,
|
tosa::TosaDialect,
|
||||||
x86vector::X86VectorDialect>();
|
x86vector::X86VectorDialect>();
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
arith::registerBufferizableOpInterfaceExternalModels(registry);
|
||||||
|
linalg::registerBufferizableOpInterfaceExternalModels(registry);
|
||||||
|
scf::registerBufferizableOpInterfaceExternalModels(registry);
|
||||||
|
tensor::registerBufferizableOpInterfaceExternalModels(registry);
|
||||||
tensor::registerInferTypeOpInterfaceExternalModels(registry);
|
tensor::registerInferTypeOpInterfaceExternalModels(registry);
|
||||||
tensor::registerTilingOpInterfaceExternalModels(registry);
|
tensor::registerTilingOpInterfaceExternalModels(registry);
|
||||||
|
vector::registerBufferizableOpInterfaceExternalModels(registry);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Append all the MLIR dialects to the registry contained in the given context.
|
/// Append all the MLIR dialects to the registry contained in the given context.
|
||||||
|
|
|
@ -11,9 +11,13 @@
|
||||||
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
||||||
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
||||||
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
|
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
|
||||||
|
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
|
||||||
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
|
#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
|
||||||
|
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
||||||
#include "mlir/IR/Operation.h"
|
#include "mlir/IR/Operation.h"
|
||||||
|
#include "mlir/Pass/PassManager.h"
|
||||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||||
|
#include "mlir/Transforms/Passes.h"
|
||||||
|
|
||||||
using namespace mlir;
|
using namespace mlir;
|
||||||
using namespace mlir::bufferization;
|
using namespace mlir::bufferization;
|
||||||
|
@ -144,8 +148,81 @@ struct FinalizingBufferizePass
|
||||||
signalPassFailure();
|
signalPassFailure();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct OneShotBufferizePass
|
||||||
|
: public OneShotBufferizeBase<OneShotBufferizePass> {
|
||||||
|
using OneShotBufferizeBase<OneShotBufferizePass>::OneShotBufferizeBase;
|
||||||
|
|
||||||
|
explicit OneShotBufferizePass(const AnalysisBufferizationOptions &options)
|
||||||
|
: options(options) {}
|
||||||
|
|
||||||
|
void getDependentDialects(DialectRegistry ®istry) const override {
|
||||||
|
registry.insert<bufferization::BufferizationDialect>();
|
||||||
|
}
|
||||||
|
|
||||||
|
void runOnOperation() override {
|
||||||
|
AnalysisBufferizationOptions opt;
|
||||||
|
if (!options) {
|
||||||
|
// Make new bufferization options if none were provided when creating the
|
||||||
|
// pass.
|
||||||
|
opt.allowReturnMemref = allowReturnMemref;
|
||||||
|
opt.allowUnknownOps = allowUnknownOps;
|
||||||
|
opt.analysisFuzzerSeed = analysisFuzzerSeed;
|
||||||
|
opt.createDeallocs = createDeallocs;
|
||||||
|
opt.fullyDynamicLayoutMaps = fullyDynamicLayoutMaps;
|
||||||
|
opt.printConflicts = printConflicts;
|
||||||
|
opt.testAnalysisOnly = testAnalysisOnly;
|
||||||
|
|
||||||
|
BufferizationOptions::OpFilterEntry::FilterFn filterFn =
|
||||||
|
[&](Operation *op) {
|
||||||
|
// Disallow non-std dialect ops. I.e., no ops related to function
|
||||||
|
// calls.
|
||||||
|
if (op->getDialect()->getNamespace() ==
|
||||||
|
StandardOpsDialect::getDialectNamespace())
|
||||||
|
return false;
|
||||||
|
// Filter may be specified via options.
|
||||||
|
if (this->dialectFilter.hasValue())
|
||||||
|
return llvm::find(this->dialectFilter,
|
||||||
|
op->getDialect()->getNamespace()) !=
|
||||||
|
this->dialectFilter.end();
|
||||||
|
// No filter specified: All other ops are allowed.
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
opt.allowOperationInFilter(filterFn);
|
||||||
|
} else {
|
||||||
|
opt = *options;
|
||||||
|
}
|
||||||
|
|
||||||
|
ModuleOp moduleOp = getOperation();
|
||||||
|
if (failed(runOneShotBufferize(moduleOp, opt))) {
|
||||||
|
signalPassFailure();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt.testAnalysisOnly)
|
||||||
|
return;
|
||||||
|
|
||||||
|
OpPassManager cleanupPipeline("builtin.module");
|
||||||
|
cleanupPipeline.addPass(createCanonicalizerPass());
|
||||||
|
cleanupPipeline.addPass(createCSEPass());
|
||||||
|
cleanupPipeline.addPass(createLoopInvariantCodeMotionPass());
|
||||||
|
(void)runPipeline(cleanupPipeline, moduleOp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
llvm::Optional<AnalysisBufferizationOptions> options;
|
||||||
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
std::unique_ptr<Pass> mlir::bufferization::createOneShotBufferizePass() {
|
||||||
|
return std::make_unique<OneShotBufferizePass>();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<Pass> mlir::bufferization::createOneShotBufferizePass(
|
||||||
|
const AnalysisBufferizationOptions &options) {
|
||||||
|
return std::make_unique<OneShotBufferizePass>(options);
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<OperationPass<FuncOp>>
|
std::unique_ptr<OperationPass<FuncOp>>
|
||||||
mlir::bufferization::createFinalizingBufferizePass() {
|
mlir::bufferization::createFinalizingBufferizePass() {
|
||||||
return std::make_unique<FinalizingBufferizePass>();
|
return std::make_unique<FinalizingBufferizePass>();
|
||||||
|
|
|
@ -799,11 +799,11 @@ LogicalResult bufferization::analyzeOp(Operation *op,
|
||||||
}
|
}
|
||||||
|
|
||||||
LogicalResult bufferization::runOneShotBufferize(
|
LogicalResult bufferization::runOneShotBufferize(
|
||||||
Operation *op, std::unique_ptr<AnalysisBufferizationOptions> options) {
|
Operation *op, const AnalysisBufferizationOptions &options) {
|
||||||
AnalysisBufferizationState state(op, *options);
|
AnalysisBufferizationState state(op, options);
|
||||||
if (failed(analyzeOp(op, state)))
|
if (failed(analyzeOp(op, state)))
|
||||||
return failure();
|
return failure();
|
||||||
if (options->testAnalysisOnly)
|
if (options.testAnalysisOnly)
|
||||||
return success();
|
return success();
|
||||||
return bufferizeOp(op, state);
|
return bufferizeOp(op, state);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
// RUN: mlir-opt %s \
|
// RUN: mlir-opt %s \
|
||||||
// RUN: -test-comprehensive-function-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \
|
// RUN: -one-shot-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \
|
||||||
// RUN: -split-input-file | \
|
// RUN: -split-input-file | \
|
||||||
// RUN: FileCheck %s --check-prefix=CHECK-NODEALLOC
|
// RUN: FileCheck %s --check-prefix=CHECK-NODEALLOC
|
||||||
|
|
||||||
// RUN: mlir-opt %s \
|
// RUN: mlir-opt %s \
|
||||||
// RUN: -test-comprehensive-function-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \
|
// RUN: -one-shot-bufferize="allow-return-memref allow-unknown-ops create-deallocs=0" \
|
||||||
// RUN: -buffer-deallocation | \
|
// RUN: -buffer-deallocation | \
|
||||||
// RUN: FileCheck %s --check-prefix=CHECK-BUFFERDEALLOC
|
// RUN: FileCheck %s --check-prefix=CHECK-BUFFERDEALLOC
|
||||||
|
|
|
@ -1,30 +1,28 @@
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
|
||||||
|
|
||||||
// Test bufferization using memref types that have no layout map.
|
// Test bufferization using memref types that have no layout map.
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref allow-unknown-ops fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
|
||||||
|
|
||||||
// Run fuzzer with different seeds.
|
// Run fuzzer with different seeds.
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
|
||||||
|
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -test-comprehensive-function-bufferize="dialect-filter=tensor allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=tensor allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
|
||||||
// RUN: mlir-opt %s -allow-unregistered-dialect -test-comprehensive-function-bufferize="dialect-filter=scf allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF
|
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=scf allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF
|
||||||
|
|
||||||
// CHECK: #[[$MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
|
// CHECK: #[[$MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
|
||||||
|
|
||||||
// CHECK-LABEL: func @use_of_unknown_op_1(
|
// CHECK-LABEL: func @use_of_unknown_op_1(
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32, #[[$MAP]]>
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
// CHECK-NO-LAYOUT-MAP-LABEL: func @use_of_unknown_op_1(
|
// CHECK-NO-LAYOUT-MAP-LABEL: func @use_of_unknown_op_1(
|
||||||
// CHECK-NO-LAYOUT-MAP-SAME: %[[m1:.*]]: memref<?xf32>)
|
// CHECK-NO-LAYOUT-MAP-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @use_of_unknown_op_1(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
func @use_of_unknown_op_1(%t1: tensor<?xf32>)
|
||||||
-> vector<5xf32> {
|
-> vector<5xf32> {
|
||||||
// ToTensorOp is generated because the function is bufferized and has a
|
// ToTensorOp is generated because the function is bufferized and has a
|
||||||
// memref block argument.
|
// memref block argument.
|
||||||
// CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] : memref<?xf32, #[[$MAP]]>
|
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
|
||||||
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
|
// CHECK-NO-LAYOUT-MAP: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
|
||||||
// CHECK-NO-LAYOUT-MAP: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] : memref<?xf32>
|
|
||||||
// CHECK-NO-LAYOUT-MAP: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
|
|
||||||
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
|
|
||||||
%idx = arith.constant 0 : index
|
%idx = arith.constant 0 : index
|
||||||
|
@ -40,36 +38,34 @@ func @use_of_unknown_op_1(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
// CHECK-LABEL: func @use_of_unknown_op_2(
|
// CHECK-LABEL: func @use_of_unknown_op_2(
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @use_of_unknown_op_2(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
func @use_of_unknown_op_2(%t1: tensor<?xf32>) -> tensor<?xf32> {
|
||||||
-> tensor<?xf32> {
|
// CHECK: %[[dummy1:.*]] = "test.dummy_op"(%[[t1]])
|
||||||
// CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]]
|
|
||||||
|
|
||||||
// CHECK: %[[dummy1:.*]] = "test.dummy_op"(%[[m1_tensor]])
|
|
||||||
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
// CHECK: %[[dummy2:.*]] = "test.another_dummy_op"(%[[dummy1]])
|
// CHECK: %[[dummy2:.*]] = "test.another_dummy_op"(%[[dummy1]])
|
||||||
%1 = "test.another_dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
|
%1 = "test.another_dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
|
|
||||||
// CHECK: %[[dummy2_memref:.*]] = bufferization.to_memref %[[dummy2]]
|
// CHECK: return %[[dummy2]]
|
||||||
// CHECK: return %[[dummy2_memref]]
|
|
||||||
return %1 : tensor<?xf32>
|
return %1 : tensor<?xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
|
// CHECK: #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
|
||||||
|
|
||||||
// CHECK-LABEL: func @use_of_unknown_op_3(
|
// CHECK-LABEL: func @use_of_unknown_op_3(
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @use_of_unknown_op_3(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
func @use_of_unknown_op_3(%t1: tensor<?xf32>)
|
||||||
-> (vector<5xf32>, vector<5xf32>) {
|
-> (vector<5xf32>, vector<5xf32>) {
|
||||||
%idx = arith.constant 0 : index
|
%idx = arith.constant 0 : index
|
||||||
%cst = arith.constant 0.0 : f32
|
%cst = arith.constant 0.0 : f32
|
||||||
// CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]]
|
// CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]]
|
||||||
// CHECK: %[[v1:.*]] = vector.transfer_read %[[m1]]
|
// CHECK: %[[v1:.*]] = vector.transfer_read %[[m1]]
|
||||||
%1 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<5xf32>
|
%1 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<5xf32>
|
||||||
|
|
||||||
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
|
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
|
||||||
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
|
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref<?xf32, #[[$MAP2]]>
|
||||||
// CHECK: %[[v2:.*]] = vector.transfer_read %[[dummy_memref]]
|
// CHECK: %[[v2:.*]] = vector.transfer_read %[[dummy_memref]]
|
||||||
%2 = vector.transfer_read %0[%idx], %cst : tensor<?xf32>, vector<5xf32>
|
%2 = vector.transfer_read %0[%idx], %cst : tensor<?xf32>, vector<5xf32>
|
||||||
|
|
||||||
|
@ -80,14 +76,13 @@ func @use_of_unknown_op_3(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
// CHECK-LABEL: func @use_of_unknown_op_4(
|
// CHECK-LABEL: func @use_of_unknown_op_4(
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @use_of_unknown_op_4(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
func @use_of_unknown_op_4(%t1: tensor<?xf32>)
|
||||||
-> (vector<5xf32>, tensor<?xf32>) {
|
-> (vector<5xf32>, tensor<?xf32>) {
|
||||||
%idx = arith.constant 0 : index
|
%idx = arith.constant 0 : index
|
||||||
%cst = arith.constant 0.0 : f32
|
%cst = arith.constant 0.0 : f32
|
||||||
|
|
||||||
// CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]]
|
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
|
||||||
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
|
|
||||||
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
|
|
||||||
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
|
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
|
||||||
|
@ -97,40 +92,39 @@ func @use_of_unknown_op_4(%t1: tensor<?xf32> {linalg.inplaceable = true})
|
||||||
// CHECK: %[[another_dummy:.*]] = "test.another_dummy_op"(%[[dummy]])
|
// CHECK: %[[another_dummy:.*]] = "test.another_dummy_op"(%[[dummy]])
|
||||||
%2 = "test.another_dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
|
%2 = "test.another_dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
|
|
||||||
// CHECK: %[[another_dummy_memref:.*]] = bufferization.to_memref %[[another_dummy]]
|
// CHECK: return %[[v1]], %[[another_dummy]]
|
||||||
// CHECK: return %[[v1]], %[[another_dummy_memref]]
|
|
||||||
return %1, %2 : vector<5xf32>, tensor<?xf32>
|
return %1, %2 : vector<5xf32>, tensor<?xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
// CHECK-LABEL: func @use_of_bufferizable_op_in_unbufferizable_op
|
// CHECK-LABEL: func @use_of_bufferizable_op_in_unbufferizable_op
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @use_of_bufferizable_op_in_unbufferizable_op(
|
func @use_of_bufferizable_op_in_unbufferizable_op(
|
||||||
%t1: tensor<?xf32>, %o: index, %s: index) -> (tensor<?xf32>, tensor<?xf32>) {
|
%t1: tensor<?xf32>, %o: index, %s: index) -> (tensor<?xf32>, tensor<?xf32>) {
|
||||||
|
// CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]]
|
||||||
// CHECK: %[[subview:.*]] = memref.subview %[[m1]]
|
// CHECK: %[[subview:.*]] = memref.subview %[[m1]]
|
||||||
%0 = tensor.extract_slice %t1[%o][%s][1] : tensor<?xf32> to tensor<?xf32>
|
%0 = tensor.extract_slice %t1[%o][%s][1] : tensor<?xf32> to tensor<?xf32>
|
||||||
// CHECK: %[[subview_tensor:.*]] = bufferization.to_tensor %[[subview]]
|
// CHECK: %[[subview_tensor:.*]] = bufferization.to_tensor %[[subview]]
|
||||||
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[subview_tensor]])
|
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[subview_tensor]])
|
||||||
%1 = "test.dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
|
%1 = "test.dummy_op"(%0) : (tensor<?xf32>) -> tensor<?xf32>
|
||||||
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
|
// CHECK: return %[[subview_tensor]], %[[dummy]]
|
||||||
// CHECK: return %[[subview]], %[[dummy_memref]]
|
|
||||||
return %0, %1 : tensor<?xf32>, tensor<?xf32>
|
return %0, %1 : tensor<?xf32>, tensor<?xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
// CHECK-LABEL: func @unused_unknown_op(
|
// CHECK-LABEL: func @unused_unknown_op(
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @unused_unknown_op(%t1 : tensor<?xf32>) -> vector<5xf32> {
|
func @unused_unknown_op(%t1 : tensor<?xf32>) -> vector<5xf32> {
|
||||||
%idx = arith.constant 0 : index
|
%idx = arith.constant 0 : index
|
||||||
%cst = arith.constant 0.0 : f32
|
%cst = arith.constant 0.0 : f32
|
||||||
// ToTensorOp is inserted to pass in the result of the above bufferized op.
|
|
||||||
// CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]]
|
// CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]]
|
||||||
// CHECK: vector.transfer_read %[[m1]]
|
// CHECK: vector.transfer_read %[[m1]]
|
||||||
%1 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<5xf32>
|
%1 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<5xf32>
|
||||||
|
|
||||||
// CHECK: "test.dummy_op"(%[[m1_tensor]])
|
// CHECK: "test.dummy_op"(%[[t1]])
|
||||||
"test.dummy_op"(%t1) : (tensor<?xf32>) -> ()
|
"test.dummy_op"(%t1) : (tensor<?xf32>) -> ()
|
||||||
|
|
||||||
return %1 : vector<5xf32>
|
return %1 : vector<5xf32>
|
||||||
|
@ -138,25 +132,60 @@ func @unused_unknown_op(%t1 : tensor<?xf32>) -> vector<5xf32> {
|
||||||
|
|
||||||
// -----
|
// -----
|
||||||
|
|
||||||
|
// CHECK: #[[$MAP3:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
|
||||||
|
|
||||||
|
// CHECK-LABEL: func @unknown_op_may_read(
|
||||||
|
func @unknown_op_may_read(%v: vector<5xf32>)
|
||||||
|
-> (tensor<10xf32>, tensor<10xf32>) {
|
||||||
|
%idx = arith.constant 0 : index
|
||||||
|
%cst = arith.constant 5.0 : f32
|
||||||
|
|
||||||
|
// One alloc for the init_tensor, another one because the transfer_write
|
||||||
|
// bufferizes out-of-place.
|
||||||
|
// CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32>
|
||||||
|
// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32>
|
||||||
|
// CHECK: %[[alloc_casted:.*]] = memref.cast %[[alloc]] : memref<10xf32> to memref<10xf32, #[[$MAP3]]>
|
||||||
|
// CHECK: %[[m1_casted:.*]] = memref.cast %[[m1]] : memref<10xf32> to memref<10xf32, #[[$MAP3]]>
|
||||||
|
%t1 = linalg.init_tensor [10] : tensor<10xf32>
|
||||||
|
|
||||||
|
// CHECK: linalg.fill(%{{.*}}, %[[m1]])
|
||||||
|
// CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1_casted]]
|
||||||
|
%filled = linalg.fill(%cst, %t1) : f32, tensor<10xf32> -> tensor<10xf32>
|
||||||
|
|
||||||
|
// The transfer_write is out-of-place because "dummy_op" may read.
|
||||||
|
// CHECK: memref.copy %[[m1]], %[[alloc]]
|
||||||
|
// CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
|
||||||
|
// CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc_casted]]
|
||||||
|
%1 = vector.transfer_write %v, %filled[%idx] : vector<5xf32>, tensor<10xf32>
|
||||||
|
|
||||||
|
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[filled_tensor]])
|
||||||
|
%2 = "test.dummy_op"(%filled) : (tensor<10xf32>) -> (tensor<10xf32>)
|
||||||
|
|
||||||
|
// CHECK: memref.dealloc %[[alloc]]
|
||||||
|
// CHECK: memref.dealloc %[[m1]]
|
||||||
|
// CHECK: return %[[alloc_tensor]], %[[dummy]]
|
||||||
|
return %1, %2 : tensor<10xf32>, tensor<10xf32>
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
// CHECK-LABEL: func @unknown_op_not_writable
|
// CHECK-LABEL: func @unknown_op_not_writable
|
||||||
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
|
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32>
|
||||||
func @unknown_op_not_writable(
|
func @unknown_op_not_writable(
|
||||||
%t1 : tensor<?xf32>, %v : vector<5xf32>, %idx : index) -> tensor<?xf32> {
|
%t1 : tensor<?xf32>, %v : vector<5xf32>, %idx : index) -> tensor<?xf32> {
|
||||||
// CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]]
|
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[t1]])
|
||||||
// CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
|
|
||||||
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
|
// CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
|
||||||
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> (tensor<?xf32>)
|
%0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> (tensor<?xf32>)
|
||||||
|
|
||||||
// The result of an unknown op is not writable. Always generate a copy.
|
// The result of an unknown op is not writable. Always generate a copy.
|
||||||
// Note: This copy is essential for partial bufferization. Otherwise, we could
|
|
||||||
// introducing a RaW conflict.
|
|
||||||
// CHECK: %[[dim:.*]] = tensor.dim %[[dummy]]
|
// CHECK: %[[dim:.*]] = tensor.dim %[[dummy]]
|
||||||
// CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]])
|
// CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]])
|
||||||
// CHECK: memref.copy %[[dummy_memref]], %[[alloc]]
|
// CHECK: memref.copy %[[dummy_memref]], %[[alloc]]
|
||||||
// CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
|
// CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
|
||||||
%1 = vector.transfer_write %v, %0[%idx] : vector<5xf32>, tensor<?xf32>
|
%1 = vector.transfer_write %v, %0[%idx] : vector<5xf32>, tensor<?xf32>
|
||||||
|
|
||||||
// CHECK: return %[[alloc]]
|
// CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]]
|
||||||
|
// CHECK: return %[[alloc_tensor]]
|
||||||
return %1 : tensor<?xf32>
|
return %1 : tensor<?xf32>
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
|
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
|
||||||
|
|
||||||
// Run fuzzer with different seeds.
|
// Run fuzzer with different seeds.
|
||||||
// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
|
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
|
||||||
// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
|
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
|
||||||
// RUN: mlir-opt %s -test-comprehensive-function-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
|
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
|
||||||
|
|
||||||
// CHECK-LABEL: func @use_tensor_func_arg(
|
// CHECK-LABEL: func @use_tensor_func_arg(
|
||||||
// CHECK-SAME: %[[A:.*]]: tensor<?xf32>
|
// CHECK-SAME: %[[A:.*]]: tensor<?xf32>
|
||||||
|
@ -68,31 +68,4 @@ func @empty_func() -> () {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
// CHECK-LABEL: func @rank_reducing
|
|
||||||
func @rank_reducing(
|
|
||||||
%i: index, %j: index,
|
|
||||||
%arg0: tensor<8x18x32xf32>)
|
|
||||||
-> tensor<?x1x6x8xf32> {
|
|
||||||
%c1 = arith.constant 1 : index
|
|
||||||
%c6 = arith.constant 6 : index
|
|
||||||
%c8 = arith.constant 8 : index
|
|
||||||
%c32 = arith.constant 32 : index
|
|
||||||
%c0 = arith.constant 0 : index
|
|
||||||
%0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
|
|
||||||
%1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
|
|
||||||
%2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
|
|
||||||
%5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor<?x1x6x8xf32>) {
|
|
||||||
%7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7)
|
|
||||||
%8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32>
|
|
||||||
%9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args(%arg10 = %2) -> (tensor<1x6x8xf32>) {
|
|
||||||
%11 = tensor.extract_slice %8[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x6x8xf32> to tensor<1x1x8xf32>
|
|
||||||
%12 = tensor.insert_slice %11 into %arg10[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x1x8xf32> into tensor<1x6x8xf32>
|
|
||||||
scf.yield %12 : tensor<1x6x8xf32>
|
|
||||||
}
|
|
||||||
%10 = tensor.insert_slice %9 into %arg8[%7, 0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : tensor<1x6x8xf32> into tensor<?x1x6x8xf32>
|
|
||||||
scf.yield %10 : tensor<?x1x6x8xf32>
|
|
||||||
}
|
|
||||||
return %5: tensor<?x1x6x8xf32>
|
|
||||||
}
|
|
|
@ -1355,3 +1355,35 @@ func @write_after_select_read_one(
|
||||||
// CHECK: return %[[f]], %[[select]]
|
// CHECK: return %[[f]], %[[select]]
|
||||||
return %f, %w : f32, tensor<?xf32>
|
return %f, %w : f32, tensor<?xf32>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----
|
||||||
|
|
||||||
|
// A regression test to make sure that we handle rank-reducing extract_slice
|
||||||
|
// correctly.
|
||||||
|
|
||||||
|
// CHECK-LABEL: func @rank_reducing
|
||||||
|
func @rank_reducing(
|
||||||
|
%i: index, %j: index,
|
||||||
|
%arg0: tensor<8x18x32xf32>)
|
||||||
|
-> tensor<?x1x6x8xf32> {
|
||||||
|
%c1 = arith.constant 1 : index
|
||||||
|
%c6 = arith.constant 6 : index
|
||||||
|
%c8 = arith.constant 8 : index
|
||||||
|
%c32 = arith.constant 32 : index
|
||||||
|
%c0 = arith.constant 0 : index
|
||||||
|
%0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
|
||||||
|
%1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
|
||||||
|
%2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
|
||||||
|
%5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args(%arg8 = %1) -> (tensor<?x1x6x8xf32>) {
|
||||||
|
%7 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg7)
|
||||||
|
%8 = tensor.extract_slice %arg0[%i, %j, %arg7] [1, 6, 8] [1, 1, 1] : tensor<8x18x32xf32> to tensor<1x6x8xf32>
|
||||||
|
%9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args(%arg10 = %2) -> (tensor<1x6x8xf32>) {
|
||||||
|
%11 = tensor.extract_slice %8[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x6x8xf32> to tensor<1x1x8xf32>
|
||||||
|
%12 = tensor.insert_slice %11 into %arg10[0, %arg9, 0] [1, 1, 8] [1, 1, 1] : tensor<1x1x8xf32> into tensor<1x6x8xf32>
|
||||||
|
scf.yield %12 : tensor<1x6x8xf32>
|
||||||
|
}
|
||||||
|
%10 = tensor.insert_slice %9 into %arg8[%7, 0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] : tensor<1x6x8xf32> into tensor<?x1x6x8xf32>
|
||||||
|
scf.yield %10 : tensor<?x1x6x8xf32>
|
||||||
|
}
|
||||||
|
return %5: tensor<?x1x6x8xf32>
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
# Exclude tests from libMLIR.so
|
# Exclude tests from libMLIR.so
|
||||||
add_mlir_library(MLIRLinalgTestPasses
|
add_mlir_library(MLIRLinalgTestPasses
|
||||||
TestComprehensiveBufferize.cpp
|
|
||||||
TestLinalgCodegenStrategy.cpp
|
TestLinalgCodegenStrategy.cpp
|
||||||
TestLinalgDistribution.cpp
|
TestLinalgDistribution.cpp
|
||||||
TestLinalgElementwiseFusion.cpp
|
TestLinalgElementwiseFusion.cpp
|
||||||
|
|
|
@ -1,138 +0,0 @@
|
||||||
//===- TestComprehensiveBufferize.cpp - Test Comprehensive Bufferize ------===//
|
|
||||||
//
|
|
||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
||||||
// See https://llvm.org/LICENSE.txt for license information.
|
|
||||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file implements logic for testing Comprehensive Bufferize.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
|
||||||
#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
|
|
||||||
#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
|
|
||||||
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
|
||||||
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
|
||||||
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
|
|
||||||
#include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
|
|
||||||
#include "mlir/Dialect/Linalg/IR/Linalg.h"
|
|
||||||
#include "mlir/Dialect/Linalg/Passes.h"
|
|
||||||
#include "mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h"
|
|
||||||
#include "mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h"
|
|
||||||
#include "mlir/Dialect/StandardOps/IR/Ops.h"
|
|
||||||
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
|
|
||||||
#include "mlir/Dialect/Vector/IR/VectorOps.h"
|
|
||||||
#include "mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h"
|
|
||||||
#include "mlir/Pass/PassManager.h"
|
|
||||||
#include "mlir/Transforms/Passes.h"
|
|
||||||
|
|
||||||
using namespace mlir;
|
|
||||||
using namespace mlir::linalg;
|
|
||||||
using namespace mlir::linalg::comprehensive_bufferize;
|
|
||||||
using namespace mlir::bufferization;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
/// A helper struct for FunctionBufferize and ModuleBufferize. Both passes are
|
|
||||||
/// mostly identical.
|
|
||||||
struct TestComprehensiveFunctionBufferize
|
|
||||||
: public PassWrapper<TestComprehensiveFunctionBufferize,
|
|
||||||
OperationPass<FuncOp>> {
|
|
||||||
StringRef getArgument() const final {
|
|
||||||
return "test-comprehensive-function-bufferize";
|
|
||||||
}
|
|
||||||
|
|
||||||
StringRef getDescription() const final {
|
|
||||||
return "Test Comprehensive Bufferize of FuncOps (body only).";
|
|
||||||
}
|
|
||||||
|
|
||||||
TestComprehensiveFunctionBufferize() = default;
|
|
||||||
TestComprehensiveFunctionBufferize(
|
|
||||||
const TestComprehensiveFunctionBufferize &pass)
|
|
||||||
: PassWrapper(pass) {}
|
|
||||||
|
|
||||||
void getDependentDialects(DialectRegistry ®istry) const override {
|
|
||||||
registry.insert<bufferization::BufferizationDialect, linalg::LinalgDialect,
|
|
||||||
memref::MemRefDialect, tensor::TensorDialect,
|
|
||||||
vector::VectorDialect, scf::SCFDialect, StandardOpsDialect,
|
|
||||||
arith::ArithmeticDialect, AffineDialect>();
|
|
||||||
affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
|
|
||||||
arith::registerBufferizableOpInterfaceExternalModels(registry);
|
|
||||||
linalg::registerBufferizableOpInterfaceExternalModels(registry);
|
|
||||||
scf::registerBufferizableOpInterfaceExternalModels(registry);
|
|
||||||
tensor::registerBufferizableOpInterfaceExternalModels(registry);
|
|
||||||
vector::registerBufferizableOpInterfaceExternalModels(registry);
|
|
||||||
}
|
|
||||||
|
|
||||||
void runOnOperation() override;
|
|
||||||
|
|
||||||
Option<bool> allowReturnMemref{
|
|
||||||
*this, "allow-return-memref",
|
|
||||||
llvm::cl::desc("Allow returning/yielding memrefs from functions/blocks"),
|
|
||||||
llvm::cl::init(false)};
|
|
||||||
Option<bool> allowUnknownOps{
|
|
||||||
*this, "allow-unknown-ops",
|
|
||||||
llvm::cl::desc(
|
|
||||||
"Allows the return of memrefs (for testing purposes only)"),
|
|
||||||
llvm::cl::init(false)};
|
|
||||||
Option<bool> testAnalysisOnly{
|
|
||||||
*this, "test-analysis-only",
|
|
||||||
llvm::cl::desc(
|
|
||||||
"Only runs inplaceability analysis (for testing purposes only)"),
|
|
||||||
llvm::cl::init(false)};
|
|
||||||
Option<unsigned> analysisFuzzerSeed{
|
|
||||||
*this, "analysis-fuzzer-seed",
|
|
||||||
llvm::cl::desc("Analyze ops in random order with a given seed (fuzzer)"),
|
|
||||||
llvm::cl::init(0)};
|
|
||||||
ListOption<std::string> dialectFilter{
|
|
||||||
*this, "dialect-filter",
|
|
||||||
llvm::cl::desc("Bufferize only ops from the specified dialects"),
|
|
||||||
llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
|
|
||||||
Option<bool> fullyDynamicLayoutMaps{
|
|
||||||
*this, "fully-dynamic-layout-maps",
|
|
||||||
llvm::cl::desc("Use fully dynamic layout maps on memref types"),
|
|
||||||
llvm::cl::init(true)};
|
|
||||||
Option<bool> createDeallocs{
|
|
||||||
*this, "create-deallocs",
|
|
||||||
llvm::cl::desc("Specify if buffers should be deallocated"),
|
|
||||||
llvm::cl::init(true)};
|
|
||||||
};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void TestComprehensiveFunctionBufferize::runOnOperation() {
|
|
||||||
auto options = std::make_unique<AnalysisBufferizationOptions>();
|
|
||||||
options->allowReturnMemref = allowReturnMemref;
|
|
||||||
options->allowUnknownOps = allowUnknownOps;
|
|
||||||
options->testAnalysisOnly = testAnalysisOnly;
|
|
||||||
options->analysisFuzzerSeed = analysisFuzzerSeed;
|
|
||||||
options->fullyDynamicLayoutMaps = fullyDynamicLayoutMaps;
|
|
||||||
options->createDeallocs = createDeallocs;
|
|
||||||
|
|
||||||
if (dialectFilter.hasValue()) {
|
|
||||||
options->hasFilter = true;
|
|
||||||
for (const std::string &dialectNamespace : dialectFilter)
|
|
||||||
options->allowDialectInFilter(dialectNamespace);
|
|
||||||
}
|
|
||||||
|
|
||||||
Operation *op = getOperation();
|
|
||||||
if (failed(runOneShotBufferize(op, std::move(options))))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (testAnalysisOnly)
|
|
||||||
return;
|
|
||||||
|
|
||||||
OpPassManager cleanupPipeline("builtin.func");
|
|
||||||
cleanupPipeline.addPass(createCanonicalizerPass());
|
|
||||||
cleanupPipeline.addPass(createCSEPass());
|
|
||||||
cleanupPipeline.addPass(createLoopInvariantCodeMotionPass());
|
|
||||||
(void)this->runPipeline(cleanupPipeline, op);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace mlir {
|
|
||||||
namespace test {
|
|
||||||
void registerTestComprehensiveFunctionBufferize() {
|
|
||||||
PassRegistration<TestComprehensiveFunctionBufferize>();
|
|
||||||
}
|
|
||||||
} // namespace test
|
|
||||||
} // namespace mlir
|
|
|
@ -64,7 +64,6 @@ void registerTestAffineLoopParametricTilingPass();
|
||||||
void registerTestAliasAnalysisPass();
|
void registerTestAliasAnalysisPass();
|
||||||
void registerTestBuiltinAttributeInterfaces();
|
void registerTestBuiltinAttributeInterfaces();
|
||||||
void registerTestCallGraphPass();
|
void registerTestCallGraphPass();
|
||||||
void registerTestComprehensiveFunctionBufferize();
|
|
||||||
void registerTestConstantFold();
|
void registerTestConstantFold();
|
||||||
void registerTestGpuSerializeToCubinPass();
|
void registerTestGpuSerializeToCubinPass();
|
||||||
void registerTestGpuSerializeToHsacoPass();
|
void registerTestGpuSerializeToHsacoPass();
|
||||||
|
@ -159,7 +158,6 @@ void registerTestPasses() {
|
||||||
#if MLIR_ROCM_CONVERSIONS_ENABLED
|
#if MLIR_ROCM_CONVERSIONS_ENABLED
|
||||||
mlir::test::registerTestGpuSerializeToHsacoPass();
|
mlir::test::registerTestGpuSerializeToHsacoPass();
|
||||||
#endif
|
#endif
|
||||||
mlir::test::registerTestComprehensiveFunctionBufferize();
|
|
||||||
mlir::test::registerTestDecomposeCallGraphTypes();
|
mlir::test::registerTestDecomposeCallGraphTypes();
|
||||||
mlir::test::registerTestDataLayoutQuery();
|
mlir::test::registerTestDataLayoutQuery();
|
||||||
mlir::test::registerTestDominancePass();
|
mlir::test::registerTestDominancePass();
|
||||||
|
|
Loading…
Reference in New Issue