From 39a1ddeb1cd36510639ab5e0948de9fd4edf9b63 Mon Sep 17 00:00:00 2001
From: MLIR Team <no-reply@google.com>
Date: Mon, 4 Mar 2019 15:14:12 -0800
Subject: [PATCH] Adds loop attribute as a temporary work around to prevent
 slice fusion of loop nests containing instructions with side effects (the
 proper solution will be do use memref read/write regions in the future).

PiperOrigin-RevId: 236733739
---
 mlir/lib/Analysis/Utils.cpp           | 33 ++++++++++++++----------
 mlir/test/Transforms/loop-fusion.mlir | 37 +++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 14 deletions(-)
diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp
index 8ba4157a8120..f41e2c4f27d9 100644
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@@ -435,6 +435,7 @@ static Instruction *getInstAtPosition(ArrayRef<unsigned> positions,
   return nullptr;
 }
 
+const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
 // Computes memref dependence between 'srcAccess' and 'dstAccess', projects
 // out any dst loop IVs at depth greater than 'dstLoopDepth', and computes slice
 // bounds in 'sliceState' which represent the src IVs in terms of the dst IVs,
@@ -491,24 +492,28 @@ bool mlir::getBackwardComputationSliceState(const MemRefAccess &srcAccess,
   sliceState->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands);
   sliceState->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands);
 
-  // For read-read access pairs, clear any slice bounds on sequential loops.
+  llvm::SmallDenseSet<Value *, 8> sequentialLoops;
   if (readReadAccesses) {
+    // For read-read access pairs, clear any slice bounds on sequential loops.
     // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
-    llvm::SmallDenseSet<Value *, 8> sequentialLoops;
     getSequentialLoops(srcLoopIVs[0], &sequentialLoops);
-
-    // Clear all sliced loop bounds beginning at the first sequential loop.
-    for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
-      Value *iv = srcLoopIVs[i]->getInductionVar();
-      if (sequentialLoops.count(iv) == 0)
-        continue;
-      for (unsigned j = i; j < numSrcLoopIVs; ++j) {
-        sliceState->lbs[j] = AffineMap();
-        sliceState->ubs[j] = AffineMap();
-      }
-      break;
-    }
   }
+  // Clear all sliced loop bounds beginning at the first sequential loop, or
+  // first loop with a slice fusion barrier attribute..
+  // TODO(andydavis, bondhugula) Use MemRef read/write regions instead of
+  // using 'kSliceFusionBarrierAttrName'.
+  for (unsigned i = 0; i < numSrcLoopIVs; ++i) {
+    Value *iv = srcLoopIVs[i]->getInductionVar();
+    if (sequentialLoops.count(iv) == 0 &&
+        srcLoopIVs[i]->getAttr(kSliceFusionBarrierAttrName) == nullptr)
+      continue;
+    for (unsigned j = i; j < numSrcLoopIVs; ++j) {
+      sliceState->lbs[j] = AffineMap();
+      sliceState->ubs[j] = AffineMap();
+    }
+    break;
+  }
+
   return true;
 }
 
diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir
index 68dd07abac04..0f8addca8c02 100644
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
@@ -2062,3 +2062,40 @@ func @two_matrix_vector_products() {
 // CHECK-NEXT:   return
   return
 }
+
+// -----
+// CHECK-DAG: [[MAP3:#map[0-9]+]] = (d0, d1, d2) -> (-d0 + d1)
+// CHECK-DAG: [[MAP4:#map[0-9]+]] = (d0, d1, d2) -> (d2)
+
+func @should_not_slice_past_slice_barrier() {
+  %0 = alloc() : memref<100x16xf32>
+  for %i0 = 0 to 100 {
+    for %i1 = 0 to 16 {
+      %1 = "op1"() : () -> f32
+      store %1, %0[%i0, %i1] : memref<100x16xf32>
+    } {slice_fusion_barrier: true}
+  }
+  for %i2 = 0 to 100 {
+    for %i3 = 0 to 16 {
+      %2 = load %0[%i2, %i3] : memref<100x16xf32>
+      "op2"(%2) : (f32) -> ()
+    }
+  }
+  // The 'slice_fusion_barrier' attribute on '%i1' prevents slicing the
+  // iteration space of '%i1' and any enclosing loop nests.
+// CHECK:        for %i0 = 0 to 100 {
+// CHECK-NEXT:     for %i1 = 0 to 16 {
+// CHECK-NEXT:       %1 = "op1"() : () -> f32
+// CHECK-NEXT:       %2 = affine.apply [[MAP3]](%i0, %i0, %i1)
+// CHECK-NEXT:       %3 = affine.apply [[MAP4]](%i0, %i0, %i1)
+// CHECK-NEXT:       store %1, %0[%2, %3] : memref<1x16xf32>
+// CHECK-NEXT:     } {slice_fusion_barrier: true}
+// CHECK-NEXT:     for %i2 = 0 to 16 {
+// CHECK-NEXT:       %4 = affine.apply [[MAP3]](%i0, %i0, %i2)
+// CHECK-NEXT:       %5 = affine.apply [[MAP4]](%i0, %i0, %i2)
+// CHECK-NEXT:       %6 = load %0[%4, %5] : memref<1x16xf32>
+// CHECK-NEXT:       "op2"(%6) : (f32) -> ()
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+  return
+}