forked from OSchip/llvm-project
Fix b/122139732; update FlatAffineConstraints::isEmpty() to eliminate IDs in a
better order. - update isEmpty() to eliminate IDs in a better order. Speed improvement for complex cases (for eg. high-d reshape's involving mod's/div's). - minor efficiency update to projectOut (was earlier making an extra albeit benign call to gaussianEliminateIds) (NFC). - move getBestIdToEliminate further up in the file (NFC). - add the failing test case. - add debug info to checkMemRefAccessDependence. PiperOrigin-RevId: 227244634
This commit is contained in:
parent
dffc589ad2
commit
6e3462d251
|
@ -30,7 +30,9 @@
|
|||
#include "mlir/Support/Functional.h"
|
||||
#include "mlir/Support/MathExtras.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#define DEBUG_TYPE "affine-analysis"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
|
@ -1193,6 +1195,11 @@ bool mlir::checkMemrefAccessDependence(
|
|||
const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
|
||||
unsigned loopDepth, FlatAffineConstraints *dependenceConstraints,
|
||||
llvm::SmallVector<DependenceComponent, 2> *dependenceComponents) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
|
||||
<< Twine(loopDepth) << " between:\n";);
|
||||
LLVM_DEBUG(srcAccess.opInst->dump(););
|
||||
LLVM_DEBUG(dstAccess.opInst->dump(););
|
||||
|
||||
// Return 'false' if these accesses do not acces the same memref.
|
||||
if (srcAccess.memref != dstAccess.memref)
|
||||
return false;
|
||||
|
@ -1269,10 +1276,14 @@ bool mlir::checkMemrefAccessDependence(
|
|||
if (dependenceConstraints->isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute dependence direction vector and return true.
|
||||
if (dependenceComponents != nullptr) {
|
||||
computeDirectionVector(srcDomain, dstDomain, loopDepth,
|
||||
dependenceConstraints, dependenceComponents);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
|
||||
LLVM_DEBUG(dependenceConstraints->dump());
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -961,6 +961,40 @@ void FlatAffineConstraints::removeIdRange(unsigned idStart, unsigned idLimit) {
|
|||
// No resize necessary. numReservedCols remains the same.
|
||||
}
|
||||
|
||||
/// Returns the position of the identifier that has the minimum <number of lower
|
||||
/// bounds> times <number of upper bounds> from the specified range of
|
||||
/// identifiers [start, end). It is often best to eliminate in the increasing
|
||||
/// order of these counts when doing Fourier-Motzkin elimination since FM adds
|
||||
/// that many new constraints.
|
||||
static unsigned getBestIdToEliminate(const FlatAffineConstraints &cst,
|
||||
unsigned start, unsigned end) {
|
||||
assert(start < cst.getNumIds() && end < cst.getNumIds() + 1);
|
||||
|
||||
auto getProductOfNumLowerUpperBounds = [&](unsigned pos) {
|
||||
unsigned numLb = 0;
|
||||
unsigned numUb = 0;
|
||||
for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
|
||||
if (cst.atIneq(r, pos) > 0) {
|
||||
++numLb;
|
||||
} else if (cst.atIneq(r, pos) < 0) {
|
||||
++numUb;
|
||||
}
|
||||
}
|
||||
return numLb * numUb;
|
||||
};
|
||||
|
||||
unsigned minLoc = start;
|
||||
unsigned min = getProductOfNumLowerUpperBounds(start);
|
||||
for (unsigned c = start + 1; c < end; c++) {
|
||||
unsigned numLbUbProduct = getProductOfNumLowerUpperBounds(c);
|
||||
if (numLbUbProduct < min) {
|
||||
min = numLbUbProduct;
|
||||
minLoc = c;
|
||||
}
|
||||
}
|
||||
return minLoc;
|
||||
}
|
||||
|
||||
// Checks for emptiness of the set by eliminating identifiers successively and
|
||||
// using the GCD test (on all equality constraints) and checking for trivially
|
||||
// invalid constraints. Returns 'true' if the constraint system is found to be
|
||||
|
@ -969,23 +1003,29 @@ bool FlatAffineConstraints::isEmpty() const {
|
|||
if (isEmptyByGCDTest() || hasInvalidConstraint())
|
||||
return true;
|
||||
|
||||
auto tmpCst = clone();
|
||||
for (unsigned i = 0, e = tmpCst->getNumIds(); i < e; i++) {
|
||||
// First, eliminate as many identifiers as possible using Gaussian
|
||||
// elimination.
|
||||
FlatAffineConstraints tmpCst(*this);
|
||||
unsigned currentPos = 0;
|
||||
while (currentPos < tmpCst.getNumIds()) {
|
||||
tmpCst.gaussianEliminateIds(currentPos, tmpCst.getNumIds());
|
||||
++currentPos;
|
||||
// We check emptiness through trivial checks after eliminating each ID to
|
||||
// detect emptiness early. Since the checks isEmptyByGCDTest() and
|
||||
// hasInvalidConstraint() are linear time and single sweep on the constraint
|
||||
// buffer, this appears reasonable - but can optimize in the future.
|
||||
if (tmpCst->gaussianEliminateId(0)) {
|
||||
if (tmpCst->hasInvalidConstraint() || tmpCst->isEmptyByGCDTest())
|
||||
return true;
|
||||
} else {
|
||||
tmpCst->FourierMotzkinEliminate(0);
|
||||
// If the variable couldn't be eliminated by Gaussian, FM wouldn't have
|
||||
// modified the equalities in any way. So no need to again run GCD test.
|
||||
// Check for trivial invalid constraints.
|
||||
if (tmpCst->hasInvalidConstraint())
|
||||
return true;
|
||||
}
|
||||
if (tmpCst.hasInvalidConstraint() || tmpCst.isEmptyByGCDTest())
|
||||
return true;
|
||||
}
|
||||
|
||||
// Eliminate the remaining using FM.
|
||||
for (unsigned i = 0, e = tmpCst.getNumIds(); i < e; i++) {
|
||||
tmpCst.FourierMotzkinEliminate(
|
||||
getBestIdToEliminate(tmpCst, 0, tmpCst.getNumIds()));
|
||||
// FM wouldn't have modified the equalities in any way. So no need to again
|
||||
// run GCD test. Check for trivial invalid constraints.
|
||||
if (tmpCst.hasInvalidConstraint())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1049,7 +1089,7 @@ void FlatAffineConstraints::GCDTightenInequalities() {
|
|||
unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart,
|
||||
unsigned posLimit) {
|
||||
// Return if identifier positions to eliminate are out of range.
|
||||
assert(posStart >= 0 && posLimit <= numIds);
|
||||
assert(posLimit <= numIds);
|
||||
assert(hasConsistentState());
|
||||
|
||||
if (posStart >= posLimit)
|
||||
|
@ -1869,45 +1909,11 @@ void FlatAffineConstraints::FourierMotzkinEliminate(
|
|||
LLVM_DEBUG(dump());
|
||||
}
|
||||
|
||||
/// Returns the position of the identifier that has the minimum <number of lower
|
||||
/// bounds> times <number of upper bounds> from the specified range of
|
||||
/// identifiers [start, end). It is often best to eliminate in the increasing
|
||||
/// order of these counts when doing Fourier-Motzkin elimination since FM adds
|
||||
/// that many new constraints.
|
||||
static unsigned getBestIdToEliminate(const FlatAffineConstraints &cst,
|
||||
unsigned start, unsigned end) {
|
||||
assert(start < cst.getNumIds() && end < cst.getNumIds() + 1);
|
||||
|
||||
auto getProductOfNumLowerUpperBounds = [&](unsigned pos) {
|
||||
unsigned numLb = 0;
|
||||
unsigned numUb = 0;
|
||||
for (unsigned r = 0, e = cst.getNumInequalities(); r < e; r++) {
|
||||
if (cst.atIneq(r, pos) > 0) {
|
||||
++numLb;
|
||||
} else if (cst.atIneq(r, pos) < 0) {
|
||||
++numUb;
|
||||
}
|
||||
}
|
||||
return numLb * numUb;
|
||||
};
|
||||
|
||||
unsigned minLoc = start;
|
||||
unsigned min = getProductOfNumLowerUpperBounds(start);
|
||||
for (unsigned c = start + 1; c < end; c++) {
|
||||
unsigned numLbUbProduct = getProductOfNumLowerUpperBounds(c);
|
||||
if (numLbUbProduct < min) {
|
||||
min = numLbUbProduct;
|
||||
minLoc = c;
|
||||
}
|
||||
}
|
||||
return minLoc;
|
||||
}
|
||||
|
||||
void FlatAffineConstraints::projectOut(unsigned pos, unsigned num) {
|
||||
if (num == 0)
|
||||
return;
|
||||
|
||||
// 'pos' can be at most getNumCols() - 2.
|
||||
// 'pos' can be at most getNumCols() - 2 if num > 0.
|
||||
assert(pos <= getNumCols() - 2 && "invalid position");
|
||||
assert(pos + num < getNumCols() && "invalid range");
|
||||
|
||||
|
@ -1915,17 +1921,14 @@ void FlatAffineConstraints::projectOut(unsigned pos, unsigned num) {
|
|||
unsigned currentPos = pos;
|
||||
unsigned numToEliminate = num;
|
||||
unsigned numGaussianEliminated = 0;
|
||||
do {
|
||||
|
||||
while (currentPos < getNumIds()) {
|
||||
unsigned curNumEliminated =
|
||||
gaussianEliminateIds(currentPos, currentPos + numToEliminate);
|
||||
if (curNumEliminated == 0) {
|
||||
++currentPos;
|
||||
--numToEliminate;
|
||||
} else {
|
||||
numToEliminate -= curNumEliminated;
|
||||
}
|
||||
++currentPos;
|
||||
numToEliminate -= curNumEliminated + 1;
|
||||
numGaussianEliminated += curNumEliminated;
|
||||
} while (numToEliminate != 0);
|
||||
}
|
||||
|
||||
// Eliminate the remaining using Fourier-Motzkin.
|
||||
for (unsigned i = 0; i < num - numGaussianEliminated; i++) {
|
||||
|
|
|
@ -698,4 +698,70 @@ mlfunc @mod_div_3d() {
|
|||
return
|
||||
}
|
||||
|
||||
// TODO(bondhugula): add more test cases exercising mod/div affine_apply's.
|
||||
// -----
|
||||
// This test case arises in the context of a 6-d to 2-d reshape.
|
||||
// CHECK-LABEL: mlfunc @delinearize_mod_floordiv
|
||||
mlfunc @delinearize_mod_floordiv() {
|
||||
%c0 = constant 0 : index
|
||||
%val = constant 0 : i32
|
||||
%in = alloc() : memref<2x2x3x3x16x1xi32>
|
||||
%out = alloc() : memref<64x9xi32>
|
||||
|
||||
for %i0 = 0 to 2 {
|
||||
for %i1 = 0 to 2 {
|
||||
for %i2 = 0 to 3 {
|
||||
for %i3 = 0 to 3 {
|
||||
for %i4 = 0 to 16 {
|
||||
for %i5 = 0 to 1 {
|
||||
store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
|
||||
// expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
|
||||
// expected-note@-3 {{dependence from 0 to 0 at depth 3 = false}}
|
||||
// expected-note@-4 {{dependence from 0 to 0 at depth 4 = false}}
|
||||
// expected-note@-5 {{dependence from 0 to 0 at depth 5 = false}}
|
||||
// expected-note@-6 {{dependence from 0 to 0 at depth 6 = false}}
|
||||
// expected-note@-7 {{dependence from 0 to 0 at depth 7 = false}}
|
||||
// expected-note@-8 {{dependence from 0 to 1 at depth 1 = true}}
|
||||
// expected-note@-9 {{dependence from 0 to 2 at depth 1 = false}}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for %ii = 0 to 64 {
|
||||
for %jj = 0 to 9 {
|
||||
%a0 = affine_apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
|
||||
%a1 = affine_apply (d0) ->
|
||||
(d0 floordiv (2 * 3 * 3 * 128 * 128),
|
||||
(d0 mod 294912) floordiv (3 * 3 * 128 * 128),
|
||||
(((d0 mod 294912) mod 147456) floordiv 1152) floordiv 8,
|
||||
(((d0 mod 294912) mod 147456) mod 1152) floordiv 384,
|
||||
((((d0 mod 294912) mod 147456) mod 1152) mod 384) floordiv 128,
|
||||
(((((d0 mod 294912) mod 147456) mod 1152) mod 384) mod 128)
|
||||
floordiv 128) (%a0)
|
||||
%v0 = load %in[%a1#0, %a1#1, %a1#3, %a1#4, %a1#2, %a1#5] : memref<2x2x3x3x16x1xi32>
|
||||
// expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 1 to 1 at depth 1 = false}}
|
||||
// expected-note@-3 {{dependence from 1 to 1 at depth 2 = false}}
|
||||
// expected-note@-4 {{dependence from 1 to 1 at depth 3 = false}}
|
||||
// expected-note@-5 {{dependence from 1 to 2 at depth 1 = false}}
|
||||
// expected-note@-6 {{dependence from 1 to 2 at depth 2 = false}}
|
||||
// expected-note@-7 {{dependence from 1 to 2 at depth 3 = false}}
|
||||
// TODO(andydavis): the dep tester shouldn't be printing out these messages
|
||||
// below; they are redundant.
|
||||
store %v0, %out[%ii, %jj] : memref<64x9xi32>
|
||||
// expected-note@-1 {{dependence from 2 to 0 at depth 1 = false}}
|
||||
// expected-note@-2 {{dependence from 2 to 1 at depth 1 = false}}
|
||||
// expected-note@-3 {{dependence from 2 to 1 at depth 2 = false}}
|
||||
// expected-note@-4 {{dependence from 2 to 1 at depth 3 = false}}
|
||||
// expected-note@-5 {{dependence from 2 to 2 at depth 1 = false}}
|
||||
// expected-note@-6 {{dependence from 2 to 2 at depth 2 = false}}
|
||||
// expected-note@-7 {{dependence from 2 to 2 at depth 3 = false}}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// TODO(bondhugula): add more test cases involving mod's/div's.
|
||||
|
|
Loading…
Reference in New Issue