forked from OSchip/llvm-project
[mlir][linalg] Run CSE after every CodegenStrategy transformation.
Add CSE after every transformation. Transformations such as tiling introduce redundant computation, for example, one AffineMinOp for every operand dimension pair. Follow up transformations such as Padding and Hoisting benefit from CSE since comparing slice sizes simplifies to comparing SSA values instead of analyzing affine expressions. Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D114585
This commit is contained in:
parent
74cbd71072
commit
914e72d400
|
@ -25,9 +25,11 @@
|
|||
#include "mlir/Dialect/Vector/VectorTransforms.h"
|
||||
#include "mlir/IR/AffineExpr.h"
|
||||
#include "mlir/IR/AffineMap.h"
|
||||
#include "mlir/Pass/PassManager.h"
|
||||
#include "mlir/Support/LLVM.h"
|
||||
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
||||
#include "mlir/Transforms/LoopUtils.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
#include "mlir/Transforms/Utils.h"
|
||||
|
||||
using namespace mlir;
|
||||
|
@ -335,6 +337,12 @@ struct LinalgStrategyEnablePass
|
|||
|
||||
if (options.hoistRedundantVectorTransfersOnTensor)
|
||||
hoistRedundantVectorTransfersOnTensor(funcOp);
|
||||
|
||||
// Run CSE to cleanup after canonicalization.
|
||||
OpPassManager dynamicPM("builtin.func");
|
||||
dynamicPM.addPass(createCSEPass());
|
||||
if (failed(runPipeline(dynamicPM, funcOp)))
|
||||
return signalPassFailure();
|
||||
}
|
||||
|
||||
LinalgEnablingOptions options;
|
||||
|
|
|
@ -40,14 +40,19 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
|
|||
|
||||
// -----
|
||||
|
||||
// CHECK-PAD-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (16, -d0 + 72)>
|
||||
|
||||
// CHECK-PAD: func @matmul(
|
||||
func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
|
||||
|
||||
// Check the padding of the input operands has been hoisted out of the tile loop nest.
|
||||
// CHECK-PAD-COUNT=2: linalg.pad_tensor %{{.*}} nofold
|
||||
// CHECK-PAD-COUNT=3: scf.for
|
||||
// CHECK-PAD: scf.for
|
||||
// Check CSE eliminates the duplicate min operations introduced by tiling.
|
||||
// CHECK-PAD: affine.min #[[MAP0]]
|
||||
// CHECK-PAD-NOT: affine.min #[[MAP0]]
|
||||
// CHECK-PAD-COUNT=2: scf.for
|
||||
// CHECK-PAD: linalg.matmul
|
||||
%0 = linalg.matmul ins(%arg0, %arg1: tensor<72x72xf32>, tensor<72x72xf32>) outs(%arg2: tensor<72x72xf32>) -> tensor<72x72xf32>
|
||||
return %0 : tensor<72x72xf32>
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue