[mlir][Affine] Revisit fusion candidates after successful fusion

This patch changes the fusion algorithm so that after fusing two loop nests
we revisit previously visited nodes so that they are considered again for
fusion in the context of the new fused loop nest.

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D81609
This commit is contained in:
Diego Caballero 2020-06-11 14:39:44 -07:00
parent 1c03389c29
commit 2e7a084591
2 changed files with 54 additions and 4 deletions

View File

@ -1628,14 +1628,22 @@ public:
// Add new load ops to current Node load op list 'loads' to
// continue fusing based on new operands.
for (auto *loadOpInst : dstLoopCollector.loadOpInsts) {
auto loadMemRef =
cast<AffineReadOpInterface>(loadOpInst).getMemRef();
// NOTE: Change 'loads' to a hash set in case efficiency is an
// issue. We still use a vector since it's expected to be small.
if (visitedMemrefs.count(loadMemRef) == 0 &&
!llvm::is_contained(loads, loadOpInst))
if (!llvm::is_contained(loads, loadOpInst))
loads.push_back(loadOpInst);
}
// Clear visited memrefs after fusion so that previously visited src
// nodes are considered for fusion again in the context of the new
// fused node.
// TODO: This shouldn't be necessary if we visited candidates in the
// dependence graph in post-order or once we fully support
// multi-store producers. Currently, in a multi-store producer
// scenario such as A->B, A->C, B->C, we fail to fuse A+B due to the
// multiple outgoing edges. However, after fusing B+C, A has a
// single outgoing edge and can be fused if we revisit it in the
// context of the new fused B+C node.
visitedMemrefs.clear();
// Clear and add back loads and stores.
mdg->clearNodeLoadAndStores(dstNode->id);

View File

@ -2493,3 +2493,45 @@ func @vector_loop(%a : memref<10x20xf32>, %b : memref<10x20xf32>,
// CHECK-NEXT: affine.vector_load
// CHECK-NEXT: affine.vector_store
// CHECK-NOT: affine.for
// -----
// CHECK-LABEL: func @multi_outgoing_edges
func @multi_outgoing_edges(%in0 : memref<32xf32>,
%in1 : memref<32xf32>) {
affine.for %d = 0 to 32 {
%lhs = affine.load %in0[%d] : memref<32xf32>
%rhs = affine.load %in1[%d] : memref<32xf32>
%add = addf %lhs, %rhs : f32
affine.store %add, %in0[%d] : memref<32xf32>
}
affine.for %d = 0 to 32 {
%lhs = affine.load %in0[%d] : memref<32xf32>
%rhs = affine.load %in1[%d] : memref<32xf32>
%add = subf %lhs, %rhs : f32
affine.store %add, %in0[%d] : memref<32xf32>
}
affine.for %d = 0 to 32 {
%lhs = affine.load %in0[%d] : memref<32xf32>
%rhs = affine.load %in1[%d] : memref<32xf32>
%add = mulf %lhs, %rhs : f32
affine.store %add, %in0[%d] : memref<32xf32>
}
affine.for %d = 0 to 32 {
%lhs = affine.load %in0[%d] : memref<32xf32>
%rhs = affine.load %in1[%d] : memref<32xf32>
%add = divf %lhs, %rhs : f32
affine.store %add, %in0[%d] : memref<32xf32>
}
return
}
// CHECK: affine.for
// CHECK-NOT: affine.for
// CHECK: addf
// CHECK-NOT: affine.for
// CHECK: subf
// CHECK-NOT: affine.for
// CHECK: mulf
// CHECK-NOT: affine.for
// CHECK: divf