From 39a1ddeb1cd36510639ab5e0948de9fd4edf9b63 Mon Sep 17 00:00:00 2001 From: MLIR Team Date: Mon, 4 Mar 2019 15:14:12 -0800 Subject: [PATCH] Adds loop attribute as a temporary work around to prevent slice fusion of loop nests containing instructions with side effects (the proper solution will be do use memref read/write regions in the future). PiperOrigin-RevId: 236733739 --- mlir/lib/Analysis/Utils.cpp | 33 ++++++++++++++---------- mlir/test/Transforms/loop-fusion.mlir | 37 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index 8ba4157a8120..f41e2c4f27d9 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -435,6 +435,7 @@ static Instruction *getInstAtPosition(ArrayRef positions, return nullptr; } +const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier"; // Computes memref dependence between 'srcAccess' and 'dstAccess', projects // out any dst loop IVs at depth greater than 'dstLoopDepth', and computes slice // bounds in 'sliceState' which represent the src IVs in terms of the dst IVs, @@ -491,24 +492,28 @@ bool mlir::getBackwardComputationSliceState(const MemRefAccess &srcAccess, sliceState->lbOperands.resize(numSrcLoopIVs, sliceBoundOperands); sliceState->ubOperands.resize(numSrcLoopIVs, sliceBoundOperands); - // For read-read access pairs, clear any slice bounds on sequential loops. + llvm::SmallDenseSet sequentialLoops; if (readReadAccesses) { + // For read-read access pairs, clear any slice bounds on sequential loops. // Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'. - llvm::SmallDenseSet sequentialLoops; getSequentialLoops(srcLoopIVs[0], &sequentialLoops); - - // Clear all sliced loop bounds beginning at the first sequential loop. - for (unsigned i = 0; i < numSrcLoopIVs; ++i) { - Value *iv = srcLoopIVs[i]->getInductionVar(); - if (sequentialLoops.count(iv) == 0) - continue; - for (unsigned j = i; j < numSrcLoopIVs; ++j) { - sliceState->lbs[j] = AffineMap(); - sliceState->ubs[j] = AffineMap(); - } - break; - } } + // Clear all sliced loop bounds beginning at the first sequential loop, or + // first loop with a slice fusion barrier attribute.. + // TODO(andydavis, bondhugula) Use MemRef read/write regions instead of + // using 'kSliceFusionBarrierAttrName'. + for (unsigned i = 0; i < numSrcLoopIVs; ++i) { + Value *iv = srcLoopIVs[i]->getInductionVar(); + if (sequentialLoops.count(iv) == 0 && + srcLoopIVs[i]->getAttr(kSliceFusionBarrierAttrName) == nullptr) + continue; + for (unsigned j = i; j < numSrcLoopIVs; ++j) { + sliceState->lbs[j] = AffineMap(); + sliceState->ubs[j] = AffineMap(); + } + break; + } + return true; } diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir index 68dd07abac04..0f8addca8c02 100644 --- a/mlir/test/Transforms/loop-fusion.mlir +++ b/mlir/test/Transforms/loop-fusion.mlir @@ -2062,3 +2062,40 @@ func @two_matrix_vector_products() { // CHECK-NEXT: return return } + +// ----- +// CHECK-DAG: [[MAP3:#map[0-9]+]] = (d0, d1, d2) -> (-d0 + d1) +// CHECK-DAG: [[MAP4:#map[0-9]+]] = (d0, d1, d2) -> (d2) + +func @should_not_slice_past_slice_barrier() { + %0 = alloc() : memref<100x16xf32> + for %i0 = 0 to 100 { + for %i1 = 0 to 16 { + %1 = "op1"() : () -> f32 + store %1, %0[%i0, %i1] : memref<100x16xf32> + } {slice_fusion_barrier: true} + } + for %i2 = 0 to 100 { + for %i3 = 0 to 16 { + %2 = load %0[%i2, %i3] : memref<100x16xf32> + "op2"(%2) : (f32) -> () + } + } + // The 'slice_fusion_barrier' attribute on '%i1' prevents slicing the + // iteration space of '%i1' and any enclosing loop nests. +// CHECK: for %i0 = 0 to 100 { +// CHECK-NEXT: for %i1 = 0 to 16 { +// CHECK-NEXT: %1 = "op1"() : () -> f32 +// CHECK-NEXT: %2 = affine.apply [[MAP3]](%i0, %i0, %i1) +// CHECK-NEXT: %3 = affine.apply [[MAP4]](%i0, %i0, %i1) +// CHECK-NEXT: store %1, %0[%2, %3] : memref<1x16xf32> +// CHECK-NEXT: } {slice_fusion_barrier: true} +// CHECK-NEXT: for %i2 = 0 to 16 { +// CHECK-NEXT: %4 = affine.apply [[MAP3]](%i0, %i0, %i2) +// CHECK-NEXT: %5 = affine.apply [[MAP4]](%i0, %i0, %i2) +// CHECK-NEXT: %6 = load %0[%4, %5] : memref<1x16xf32> +// CHECK-NEXT: "op2"(%6) : (f32) -> () +// CHECK-NEXT: } +// CHECK-NEXT: } + return +}