Fix b/122139732; update FlatAffineConstraints::isEmpty() to eliminate IDs in a

better order. - update isEmpty() to eliminate IDs in a better order. Speed improvement for complex cases (for eg. high-d reshape's involving mod's/div's). - minor efficiency update to projectOut (was earlier making an extra albeit benign call to gaussianEliminateIds) (NFC). - move getBestIdToEliminate further up in the file (NFC). - add the failing test case. - add debug info to checkMemRefAccessDependence. PiperOrigin-RevId: 227244634
2018-12-29 15:51:30 -08:00 · 2018-12-29 15:51:30 -08:00 · 6e3462d251
parent dffc589ad2
commit 6e3462d251
3 changed files with 139 additions and 59 deletions
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@ -30,7 +30,9 @@
 #include "mlir/Support/Functional.h"
 #include "mlir/Support/MathExtras.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "affine-analysis"

 using namespace mlir;

@ -1193,6 +1195,11 @@ bool mlir::checkMemrefAccessDependence(
    const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
    unsigned loopDepth, FlatAffineConstraints *dependenceConstraints,
    llvm::SmallVector<DependenceComponent, 2> *dependenceComponents) {
+  LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
+                          << Twine(loopDepth) << " between:\n";);
+  LLVM_DEBUG(srcAccess.opInst->dump(););
+  LLVM_DEBUG(dstAccess.opInst->dump(););
+
  // Return 'false' if these accesses do not acces the same memref.
  if (srcAccess.memref != dstAccess.memref)
    return false;
@ -1269,10 +1276,14 @@ bool mlir::checkMemrefAccessDependence(
  if (dependenceConstraints->isEmpty()) {
    return false;
  }
+
  // Compute dependence direction vector and return true.
  if (dependenceComponents != nullptr) {
    computeDirectionVector(srcDomain, dstDomain, loopDepth,
                           dependenceConstraints, dependenceComponents);
  }
+
+  LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
+  LLVM_DEBUG(dependenceConstraints->dump());
  return true;
 }
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@ -961,6 +961,40 @@ void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) {
  // No resize necessary. numReservedCols remains the same.
 }

+/// Returns the position of the identifier that has the minimum <number of lower
+/// bounds> times <number of upper bounds> from the specified range of
+/// identifiers [start, end). It is often best to eliminate in the increasing
+/// order of these counts when doing Fourier-Motzkin elimination since FM adds
+/// that many new constraints.
+static unsigned getBestIdToEliminate(const FlatAffineConstraints &cst,
+                                     unsigned start, unsigned end) {
+  assert(start < cst.getNumIds() && end < cst.getNumIds() + 1);
+
+  auto getProductOfNumLowerUpperBounds = [&](unsigned pos) {
+    unsigned numLb = 0;
+    unsigned numUb = 0;
+    for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
+      if (cst.atIneq(r, pos) > 0) {
+        ++numLb;
+      } else if (cst.atIneq(r, pos) < 0) {
+        ++numUb;
+      }
+    }
+    return numLb * numUb;
+  };
+
+  unsigned minLoc = start;
+  unsigned min = getProductOfNumLowerUpperBounds(start);
+  for (unsigned c = start + 1; c < end; c++) {
+    unsigned numLbUbProduct = getProductOfNumLowerUpperBounds(c);
+    if (numLbUbProduct < min) {
+      min = numLbUbProduct;
+      minLoc = c;
+    }
+  }
+  return minLoc;
+}
+
 // Checks for emptiness of the set by eliminating identifiers successively and
 // using the GCD test (on all equality constraints) and checking for trivially
 // invalid constraints. Returns 'true' if the constraint system is found to be
@ -969,23 +1003,29 @@ bool FlatAffineConstraints::isEmpty() const {
  if (isEmptyByGCDTest() || hasInvalidConstraint())
    return true;

-  auto tmpCst = clone();
-  for (unsigned i = 0, e = tmpCst->getNumIds(); i < e; i++) {
+  // First, eliminate as many identifiers as possible using Gaussian
+  // elimination.
+  FlatAffineConstraints tmpCst(*this);
+  unsigned currentPos = 0;
+  while (currentPos < tmpCst.getNumIds()) {
+    tmpCst.gaussianEliminateIds(currentPos, tmpCst.getNumIds());
+    ++currentPos;
    // We check emptiness through trivial checks after eliminating each ID to
    // detect emptiness early. Since the checks isEmptyByGCDTest() and
    // hasInvalidConstraint() are linear time and single sweep on the constraint
    // buffer, this appears reasonable - but can optimize in the future.
-    if (tmpCst->gaussianEliminateId(0)) {
-      if (tmpCst->hasInvalidConstraint() || tmpCst->isEmptyByGCDTest())
-        return true;
-    } else {
-      tmpCst->FourierMotzkinEliminate(0);
-      // If the variable couldn't be eliminated by Gaussian, FM wouldn't have
-      // modified the equalities in any way. So no need to again run GCD test.
-      // Check for trivial invalid constraints.
-      if (tmpCst->hasInvalidConstraint())
-        return true;
-    }
+    if (tmpCst.hasInvalidConstraint() || tmpCst.isEmptyByGCDTest())
+      return true;
+  }
+
+  // Eliminate the remaining using FM.
+  for (unsigned i = 0, e = tmpCst.getNumIds(); i < e; i++) {
+    tmpCst.FourierMotzkinEliminate(
+        getBestIdToEliminate(tmpCst, 0, tmpCst.getNumIds()));
+    // FM wouldn't have modified the equalities in any way. So no need to again
+    // run GCD test. Check for trivial invalid constraints.
+    if (tmpCst.hasInvalidConstraint())
+      return true;
  }
  return false;
 }
@ -1049,7 +1089,7 @@ void FlatAffineConstraints::GCDTightenInequalities() {
 unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart,
                                                     unsigned posLimit) {
  // Return if identifier positions to eliminate are out of range.
-  assert(posStart >= 0 && posLimit <= numIds);
+  assert(posLimit <= numIds);
  assert(hasConsistentState());

  if (posStart >= posLimit)
@ -1869,45 +1909,11 @@ void FlatAffineConstraints::FourierMotzkinEliminate(
  LLVM_DEBUG(dump());
 }

-/// Returns the position of the identifier that has the minimum <number of lower
-/// bounds> times <number of upper bounds> from the specified range of
-/// identifiers [start, end). It is often best to eliminate in the increasing
-/// order of these counts when doing Fourier-Motzkin elimination since FM adds
-/// that many new constraints.
-static unsigned getBestIdToEliminate(const FlatAffineConstraints &cst,
-                                     unsigned start, unsigned end) {
-  assert(start < cst.getNumIds() && end < cst.getNumIds() + 1);
-
-  auto getProductOfNumLowerUpperBounds = [&](unsigned pos) {
-    unsigned numLb = 0;
-    unsigned numUb = 0;
-    for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
-      if (cst.atIneq(r, pos) > 0) {
-        ++numLb;
-      } else if (cst.atIneq(r, pos) < 0) {
-        ++numUb;
-      }
-    }
-    return numLb * numUb;
-  };
-
-  unsigned minLoc = start;
-  unsigned min = getProductOfNumLowerUpperBounds(start);
-  for (unsigned c = start + 1; c < end; c++) {
-    unsigned numLbUbProduct = getProductOfNumLowerUpperBounds(c);
-    if (numLbUbProduct < min) {
-      min = numLbUbProduct;
-      minLoc = c;
-    }
-  }
-  return minLoc;
-}
-
 void FlatAffineConstraints::projectOut(unsigned pos, unsigned num) {
  if (num == 0)
    return;

-  // 'pos' can be at most getNumCols() - 2.
+  // 'pos' can be at most getNumCols() - 2 if num > 0.
  assert(pos <= getNumCols() - 2 && "invalid position");
  assert(pos + num < getNumCols() && "invalid range");

@ -1915,17 +1921,14 @@ void FlatAffineConstraints::projectOut(unsigned pos, unsigned num) {
  unsigned currentPos = pos;
  unsigned numToEliminate = num;
  unsigned numGaussianEliminated = 0;
-  do {
+
+  while (currentPos < getNumIds()) {
    unsigned curNumEliminated =
        gaussianEliminateIds(currentPos, currentPos + numToEliminate);
-    if (curNumEliminated == 0) {
-      ++currentPos;
-      --numToEliminate;
-    } else {
-      numToEliminate -= curNumEliminated;
-    }
+    ++currentPos;
+    numToEliminate -= curNumEliminated + 1;
    numGaussianEliminated += curNumEliminated;
-  } while (numToEliminate != 0);
+  }

  // Eliminate the remaining using Fourier-Motzkin.
  for (unsigned i = 0; i < num - numGaussianEliminated; i++) {
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@ -698,4 +698,70 @@ mlfunc @mod_div_3d() {
  return
 }

-// TODO(bondhugula): add more test cases exercising mod/div affine_apply's.
+// -----
+// This test case arises in the context of a 6-d to 2-d reshape.
+// CHECK-LABEL: mlfunc @delinearize_mod_floordiv
+mlfunc @delinearize_mod_floordiv() {
+  %c0 = constant 0 : index
+  %val = constant 0 : i32
+  %in = alloc() : memref<2x2x3x3x16x1xi32>
+  %out = alloc() : memref<64x9xi32>
+
+  for %i0 = 0 to 2 {
+    for %i1 = 0 to 2 {
+      for %i2 = 0 to 3 {
+        for %i3 = 0 to 3 {
+          for %i4 = 0 to 16 {
+            for %i5 = 0 to 1 {
+              store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
+// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
+// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
+// expected-note@-3 {{dependence from 0 to 0 at depth 3 = false}}
+// expected-note@-4 {{dependence from 0 to 0 at depth 4 = false}}
+// expected-note@-5 {{dependence from 0 to 0 at depth 5 = false}}
+// expected-note@-6 {{dependence from 0 to 0 at depth 6 = false}}
+// expected-note@-7 {{dependence from 0 to 0 at depth 7 = false}}
+// expected-note@-8 {{dependence from 0 to 1 at depth 1 = true}}
+// expected-note@-9 {{dependence from 0 to 2 at depth 1 = false}}
+            }
+          }
+        }
+      }
+    }
+  }
+
+  for %ii = 0 to 64 {
+    for %jj = 0 to 9 {
+      %a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
+      %a1 = affine_apply (d0) ->
+        (d0 floordiv (2 * 3 * 3 * 128 * 128),
+        (d0 mod 294912) floordiv (3 * 3 * 128 * 128),
+        (((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
+        (((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
+        ((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
+        (((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
+          floordiv 128) (%a0)
+      %v0 = load %in[%a1#0, %a1#1, %a1#3, %a1#4, %a1#2, %a1#5] : memref<2x2x3x3x16x1xi32>
+// expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
+// expected-note@-2 {{dependence from 1 to 1 at depth 1 = false}}
+// expected-note@-3 {{dependence from 1 to 1 at depth 2 = false}}
+// expected-note@-4 {{dependence from 1 to 1 at depth 3 = false}}
+// expected-note@-5 {{dependence from 1 to 2 at depth 1 = false}}
+// expected-note@-6 {{dependence from 1 to 2 at depth 2 = false}}
+// expected-note@-7 {{dependence from 1 to 2 at depth 3 = false}}
+// TODO(andydavis): the dep tester shouldn't be printing out these messages
+// below; they are redundant.
+      store %v0, %out[%ii, %jj] : memref<64x9xi32>
+// expected-note@-1 {{dependence from 2 to 0 at depth 1 = false}}
+// expected-note@-2 {{dependence from 2 to 1 at depth 1 = false}}
+// expected-note@-3 {{dependence from 2 to 1 at depth 2 = false}}
+// expected-note@-4 {{dependence from 2 to 1 at depth 3 = false}}
+// expected-note@-5 {{dependence from 2 to 2 at depth 1 = false}}
+// expected-note@-6 {{dependence from 2 to 2 at depth 2 = false}}
+// expected-note@-7 {{dependence from 2 to 2 at depth 3 = false}}
+    }
+  }
+  return
+}
+
+// TODO(bondhugula): add more test cases involving mod's/div's.