forked from OSchip/llvm-project
Revert "[mlir] Adds affine loop fusion transformation function to LoopFusionUtils."
This reverts commit 64871f778d
.
ASAN indicates a use-after-free in in mlir::canFuseLoops(mlir::AffineForOp, mlir::AffineForOp, unsigned int, mlir::ComputationSliceState*) lib/Transforms/Utils/LoopFusionUtils.cpp:202:41
This commit is contained in:
parent
8707c246bc
commit
2724ada8d2
|
@ -51,11 +51,6 @@ FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
||||||
unsigned dstLoopDepth,
|
unsigned dstLoopDepth,
|
||||||
ComputationSliceState *srcSlice);
|
ComputationSliceState *srcSlice);
|
||||||
|
|
||||||
/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point
|
|
||||||
/// and source slice loop bounds specified in 'srcSlice'.
|
|
||||||
void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
|
||||||
ComputationSliceState *srcSlice);
|
|
||||||
|
|
||||||
/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
|
/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
|
||||||
/// and operation count) for a loop nest up until (and including) the innermost
|
/// and operation count) for a loop nest up until (and including) the innermost
|
||||||
/// loop body.
|
/// loop body.
|
||||||
|
|
|
@ -24,7 +24,6 @@
|
||||||
#include "mlir/IR/Builders.h"
|
#include "mlir/IR/Builders.h"
|
||||||
#include "mlir/IR/Function.h"
|
#include "mlir/IR/Function.h"
|
||||||
#include "mlir/IR/Operation.h"
|
#include "mlir/IR/Operation.h"
|
||||||
#include "mlir/Transforms/LoopUtils.h"
|
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
|
@ -247,34 +246,6 @@ FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
||||||
return FusionResult::Success;
|
return FusionResult::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point
|
|
||||||
/// and source slice loop bounds specified in 'srcSlice'.
|
|
||||||
void mlir::fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
|
||||||
ComputationSliceState *srcSlice) {
|
|
||||||
// Clone 'srcForOp' into 'dstForOp' at 'srcSlice->insertPoint'.
|
|
||||||
OpBuilder b(srcSlice->insertPoint->getBlock(), srcSlice->insertPoint);
|
|
||||||
BlockAndValueMapping mapper;
|
|
||||||
b.clone(*srcForOp, mapper);
|
|
||||||
|
|
||||||
// Update 'sliceLoopNest' upper and lower bounds from computed 'srcSlice'.
|
|
||||||
SmallVector<AffineForOp, 4> sliceLoops;
|
|
||||||
for (unsigned i = 0, e = srcSlice->ivs.size(); i < e; ++i) {
|
|
||||||
auto loopIV = mapper.lookupOrNull(srcSlice->ivs[i]);
|
|
||||||
if (!loopIV)
|
|
||||||
continue;
|
|
||||||
auto forOp = getForInductionVarOwner(loopIV);
|
|
||||||
sliceLoops.push_back(forOp);
|
|
||||||
if (AffineMap lbMap = srcSlice->lbs[i])
|
|
||||||
forOp.setLowerBound(srcSlice->lbOperands[i], lbMap);
|
|
||||||
if (AffineMap ubMap = srcSlice->ubs[i])
|
|
||||||
forOp.setUpperBound(srcSlice->ubOperands[i], ubMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Promote any single iteration slice loops.
|
|
||||||
for (auto forOp : sliceLoops)
|
|
||||||
promoteIfSingleIteration(forOp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Collect loop nest statistics (eg. loop trip count and operation count)
|
/// Collect loop nest statistics (eg. loop trip count and operation count)
|
||||||
/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
|
/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
|
||||||
/// returns false otherwise.
|
/// returns false otherwise.
|
||||||
|
|
|
@ -1,105 +0,0 @@
|
||||||
// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s
|
|
||||||
|
|
||||||
// CHECK-LABEL: func @slice_depth1_loop_nest() {
|
|
||||||
func @slice_depth1_loop_nest() {
|
|
||||||
%0 = alloc() : memref<100xf32>
|
|
||||||
%cst = constant 7.000000e+00 : f32
|
|
||||||
affine.for %i0 = 0 to 16 {
|
|
||||||
affine.store %cst, %0[%i0] : memref<100xf32>
|
|
||||||
}
|
|
||||||
affine.for %i1 = 0 to 5 {
|
|
||||||
%1 = affine.load %0[%i1] : memref<100xf32>
|
|
||||||
}
|
|
||||||
// CHECK: affine.for %[[IV0:.*]] = 0 to 5 {
|
|
||||||
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[IV0]]] : memref<100xf32>
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%[[IV0]]] : memref<100xf32>
|
|
||||||
// CHECK-NEXT: }
|
|
||||||
// CHECK-NEXT: return
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
// CHECK-LABEL: func @should_fuse_reduction_to_pointwise() {
|
|
||||||
func @should_fuse_reduction_to_pointwise() {
|
|
||||||
%a = alloc() : memref<10x10xf32>
|
|
||||||
%b = alloc() : memref<10xf32>
|
|
||||||
%c = alloc() : memref<10xf32>
|
|
||||||
|
|
||||||
%cf7 = constant 7.0 : f32
|
|
||||||
|
|
||||||
affine.for %i0 = 0 to 10 {
|
|
||||||
affine.for %i1 = 0 to 10 {
|
|
||||||
%v0 = affine.load %b[%i0] : memref<10xf32>
|
|
||||||
%v1 = affine.load %a[%i0, %i1] : memref<10x10xf32>
|
|
||||||
%v3 = addf %v0, %v1 : f32
|
|
||||||
affine.store %v3, %b[%i0] : memref<10xf32>
|
|
||||||
}
|
|
||||||
}
|
|
||||||
affine.for %i2 = 0 to 10 {
|
|
||||||
%v4 = affine.load %b[%i2] : memref<10xf32>
|
|
||||||
affine.store %v4, %c[%i2] : memref<10xf32>
|
|
||||||
}
|
|
||||||
|
|
||||||
// Match on the fused loop nest.
|
|
||||||
// Should fuse in entire inner loop on %i1 from source loop nest, as %i1
|
|
||||||
// is not used in the access function of the store/load on %b.
|
|
||||||
// CHECK: affine.for %{{.*}} = 0 to 10 {
|
|
||||||
// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
|
|
||||||
// CHECK-NEXT: addf %{{.*}}, %{{.*}} : f32
|
|
||||||
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: }
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: }
|
|
||||||
// CHECK-NEXT: return
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// -----
|
|
||||||
|
|
||||||
// CHECK-LABEL: func @should_fuse_avoiding_dependence_cycle() {
|
|
||||||
func @should_fuse_avoiding_dependence_cycle() {
|
|
||||||
%a = alloc() : memref<10xf32>
|
|
||||||
%b = alloc() : memref<10xf32>
|
|
||||||
%c = alloc() : memref<10xf32>
|
|
||||||
|
|
||||||
%cf7 = constant 7.0 : f32
|
|
||||||
|
|
||||||
// Set up the following dependences:
|
|
||||||
// 1) loop0 -> loop1 on memref '%{{.*}}'
|
|
||||||
// 2) loop0 -> loop2 on memref '%{{.*}}'
|
|
||||||
// 3) loop1 -> loop2 on memref '%{{.*}}'
|
|
||||||
affine.for %i0 = 0 to 10 {
|
|
||||||
%v0 = affine.load %a[%i0] : memref<10xf32>
|
|
||||||
affine.store %cf7, %b[%i0] : memref<10xf32>
|
|
||||||
}
|
|
||||||
affine.for %i1 = 0 to 10 {
|
|
||||||
affine.store %cf7, %a[%i1] : memref<10xf32>
|
|
||||||
%v1 = affine.load %c[%i1] : memref<10xf32>
|
|
||||||
}
|
|
||||||
affine.for %i2 = 0 to 10 {
|
|
||||||
%v2 = affine.load %b[%i2] : memref<10xf32>
|
|
||||||
affine.store %cf7, %c[%i2] : memref<10xf32>
|
|
||||||
}
|
|
||||||
// Fusing loop first loop into last would create a cycle:
|
|
||||||
// {1} <--> {0, 2}
|
|
||||||
// However, we can avoid the dependence cycle if we first fuse loop0 into
|
|
||||||
// loop1:
|
|
||||||
// {0, 1) --> {2}
|
|
||||||
// Then fuse this loop nest with loop2:
|
|
||||||
// {0, 1, 2}
|
|
||||||
//
|
|
||||||
// CHECK: affine.for %{{.*}} = 0 to 10 {
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
|
|
||||||
// CHECK-NEXT: }
|
|
||||||
// CHECK-NEXT: return
|
|
||||||
return
|
|
||||||
}
|
|
|
@ -41,11 +41,6 @@ static llvm::cl::opt<bool> clTestSliceComputation(
|
||||||
llvm::cl::desc("Enable testing of loop fusion slice computation"),
|
llvm::cl::desc("Enable testing of loop fusion slice computation"),
|
||||||
llvm::cl::cat(clOptionsCategory));
|
llvm::cl::cat(clOptionsCategory));
|
||||||
|
|
||||||
static llvm::cl::opt<bool> clTestLoopFusionTransformation(
|
|
||||||
"test-loop-fusion-transformation",
|
|
||||||
llvm::cl::desc("Enable testing of loop fusion transformation"),
|
|
||||||
llvm::cl::cat(clOptionsCategory));
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
struct TestLoopFusion : public FunctionPass<TestLoopFusion> {
|
struct TestLoopFusion : public FunctionPass<TestLoopFusion> {
|
||||||
|
@ -74,9 +69,11 @@ gatherLoops(Block *block, unsigned currLoopDepth,
|
||||||
// Run fusion dependence check on 'loops[i]' and 'loops[j]' at loop depths
|
// Run fusion dependence check on 'loops[i]' and 'loops[j]' at loop depths
|
||||||
// in range ['loopDepth' + 1, 'maxLoopDepth'].
|
// in range ['loopDepth' + 1, 'maxLoopDepth'].
|
||||||
// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists.
|
// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists.
|
||||||
static bool testDependenceCheck(AffineForOp srcForOp, AffineForOp dstForOp,
|
static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i,
|
||||||
unsigned i, unsigned j, unsigned loopDepth,
|
unsigned j, unsigned loopDepth,
|
||||||
unsigned maxLoopDepth) {
|
unsigned maxLoopDepth) {
|
||||||
|
AffineForOp srcForOp = loops[i];
|
||||||
|
AffineForOp dstForOp = loops[j];
|
||||||
mlir::ComputationSliceState sliceUnion;
|
mlir::ComputationSliceState sliceUnion;
|
||||||
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
||||||
FusionResult result =
|
FusionResult result =
|
||||||
|
@ -87,7 +84,6 @@ static bool testDependenceCheck(AffineForOp srcForOp, AffineForOp dstForOp,
|
||||||
<< i << " into loop nest " << j << " at depth " << loopDepth;
|
<< i << " into loop nest " << j << " at depth " << loopDepth;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the index of 'op' in its block.
|
// Returns the index of 'op' in its block.
|
||||||
|
@ -125,9 +121,11 @@ static std::string getSliceStr(const mlir::ComputationSliceState &sliceUnion) {
|
||||||
// Computes fusion slice union on 'loops[i]' and 'loops[j]' at loop depths
|
// Computes fusion slice union on 'loops[i]' and 'loops[j]' at loop depths
|
||||||
// in range ['loopDepth' + 1, 'maxLoopDepth'].
|
// in range ['loopDepth' + 1, 'maxLoopDepth'].
|
||||||
// Emits a string representation of the slice union as a remark on 'loops[j]'.
|
// Emits a string representation of the slice union as a remark on 'loops[j]'.
|
||||||
static bool testSliceComputation(AffineForOp forOpA, AffineForOp forOpB,
|
static void testSliceComputation(SmallVector<AffineForOp, 2> &loops, unsigned i,
|
||||||
unsigned i, unsigned j, unsigned loopDepth,
|
unsigned j, unsigned loopDepth,
|
||||||
unsigned maxLoopDepth) {
|
unsigned maxLoopDepth) {
|
||||||
|
AffineForOp forOpA = loops[i];
|
||||||
|
AffineForOp forOpB = loops[j];
|
||||||
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
||||||
mlir::ComputationSliceState sliceUnion;
|
mlir::ComputationSliceState sliceUnion;
|
||||||
FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
|
FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
|
||||||
|
@ -137,77 +135,31 @@ static bool testSliceComputation(AffineForOp forOpA, AffineForOp forOpB,
|
||||||
<< " : " << getSliceStr(sliceUnion) << ")";
|
<< " : " << getSliceStr(sliceUnion) << ")";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool testLoopFusionTransformation(AffineForOp forOpA, AffineForOp forOpB,
|
void TestLoopFusion::runOnFunction() {
|
||||||
unsigned i, unsigned j,
|
// Gather all AffineForOps by loop depth.
|
||||||
unsigned loopDepth,
|
DenseMap<unsigned, SmallVector<AffineForOp, 2>> depthToLoops;
|
||||||
unsigned maxLoopDepth) {
|
for (auto &block : getFunction()) {
|
||||||
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
|
gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
|
||||||
mlir::ComputationSliceState sliceUnion;
|
|
||||||
FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
|
|
||||||
if (result.value == FusionResult::Success) {
|
|
||||||
mlir::fuseLoops(forOpA, forOpB, &sliceUnion);
|
|
||||||
// Note: 'forOpA' is removed to simplify test output. A proper loop
|
|
||||||
// fusion pass should check the data dependence graph and run memref
|
|
||||||
// region analysis to ensure removing 'forOpA' is safe.
|
|
||||||
forOpA.erase();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
using LoopFunc = function_ref<bool(AffineForOp, AffineForOp, unsigned, unsigned,
|
// Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
|
||||||
unsigned, unsigned)>;
|
|
||||||
|
|
||||||
// Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
|
|
||||||
static bool
|
|
||||||
iterateLoops(DenseMap<unsigned, SmallVector<AffineForOp, 2>> &depthToLoops,
|
|
||||||
LoopFunc fn) {
|
|
||||||
bool changed = false;
|
|
||||||
for (auto &depthAndLoops : depthToLoops) {
|
for (auto &depthAndLoops : depthToLoops) {
|
||||||
unsigned loopDepth = depthAndLoops.first;
|
unsigned loopDepth = depthAndLoops.first;
|
||||||
auto &loops = depthAndLoops.second;
|
auto &loops = depthAndLoops.second;
|
||||||
unsigned numLoops = loops.size();
|
unsigned numLoops = loops.size();
|
||||||
for (unsigned j = 0; j < numLoops; ++j) {
|
for (unsigned j = 0; j < numLoops; ++j) {
|
||||||
for (unsigned k = 0; k < numLoops; ++k) {
|
for (unsigned k = 0; k < numLoops; ++k) {
|
||||||
if (j != k)
|
if (j == k)
|
||||||
changed |=
|
continue;
|
||||||
fn(loops[j], loops[k], j, k, loopDepth, depthToLoops.size());
|
if (clTestDependenceCheck)
|
||||||
|
testDependenceCheck(loops, j, k, loopDepth, depthToLoops.size());
|
||||||
|
if (clTestSliceComputation)
|
||||||
|
testSliceComputation(loops, j, k, loopDepth, depthToLoops.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TestLoopFusion::runOnFunction() {
|
|
||||||
DenseMap<unsigned, SmallVector<AffineForOp, 2>> depthToLoops;
|
|
||||||
if (clTestLoopFusionTransformation) {
|
|
||||||
// Run loop fusion until a fixed point is reached.
|
|
||||||
bool changed = true;
|
|
||||||
while (changed) {
|
|
||||||
depthToLoops.clear();
|
|
||||||
// Gather all AffineForOps by loop depth.
|
|
||||||
for (auto &block : getFunction())
|
|
||||||
gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
|
|
||||||
|
|
||||||
// Try to fuse all combinations of src/dst loop nests in 'depthToLoops'.
|
|
||||||
changed = iterateLoops(depthToLoops, testLoopFusionTransformation);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gather all AffineForOps by loop depth.
|
|
||||||
for (auto &block : getFunction()) {
|
|
||||||
gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
|
|
||||||
}
|
|
||||||
// Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
|
|
||||||
if (clTestDependenceCheck)
|
|
||||||
iterateLoops(depthToLoops, testDependenceCheck);
|
|
||||||
if (clTestSliceComputation)
|
|
||||||
iterateLoops(depthToLoops, testSliceComputation);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PassRegistration<TestLoopFusion>
|
static PassRegistration<TestLoopFusion>
|
||||||
|
|
Loading…
Reference in New Issue