Refactor the affine analysis by moving some functionality to IR and some to AffineOps. This is important for allowing the affine dialect to define canonicalizations directly on the operations instead of relying on transformation passes, e.g. ComposeAffineMaps. A summary of the refactoring:

* AffineStructures has moved to IR. * simplifyAffineExpr/simplifyAffineMap/getFlattenedAffineExpr have moved to IR. * makeComposedAffineApply/fullyComposeAffineMapAndOperands have moved to AffineOps. * ComposeAffineMaps is replaced by AffineApplyOp::canonicalize and deleted. PiperOrigin-RevId: 232586468
2019-02-05 17:00:13 -08:00 · 2019-02-05 17:00:13 -08:00 · 10237de8eb
parent 6f7470a56a
commit 10237de8eb
24 changed files with 907 additions and 1035 deletions
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@ -29,6 +29,9 @@

 namespace mlir {
 class AffineBound;
+class AffineValueMap;
+class FlatAffineConstraints;
+class FuncBuilder;

 class AffineOpsDialect : public Dialect {
 public:
@ -61,6 +64,9 @@ public:
    return getAttrOfType<AffineMapAttr>("map").getValue();
  }

+  /// Returns an AffineValueMap representing this affine apply.
+  AffineValueMap getAsAffineValueMap();
+
  /// Returns true if the result of this operation can be used as dimension id.
  bool isValidDim() const;

@ -247,6 +253,19 @@ ConstOpPointer<AffineForOp> getForInductionVarOwner(const Value *val);
 void extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
                             SmallVectorImpl<Value *> *ivs);

+/// Adds constraints (lower and upper bounds) for the specified 'for'
+/// instruction's Value using IR information stored in its bound maps. The
+/// right identifier is first looked up using forOp's Value. Returns
+/// false for the yet unimplemented/unsupported cases, and true if the
+/// information is successfully added. Asserts if the Value corresponding to
+/// the 'for' instruction isn't found in the constraint system. Any new
+/// identifiers that are found in the bound operands of the 'for' instruction
+/// are added as trailing identifiers (either dimensional or symbolic
+/// depending on whether the operand is a valid ML Function symbol).
+//  TODO(bondhugula): add support for non-unit strides.
+bool addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp,
+                          FlatAffineConstraints *constraints);
+
 /// AffineBound represents a lower or upper bound in the for instruction.
 /// This class does not own the underlying operands. Instead, it refers
 /// to the operands stored in the AffineForOp. Its life span should not exceed
@ -256,6 +275,9 @@ public:
  ConstOpPointer<AffineForOp> getAffineForOp() const { return inst; }
  AffineMap getMap() const { return map; }

+  /// Returns an AffineValueMap representing this bound.
+  AffineValueMap getAsAffineValueMap();
+
  unsigned getNumOperands() const { return opEnd - opStart; }
  const Value *getOperand(unsigned idx) const {
    return inst->getInstruction()->getOperand(opStart + idx);
@ -354,6 +376,23 @@ bool isValidSymbol(const Value *value);
 void canonicalizeMapAndOperands(AffineMap *map,
                                llvm::SmallVectorImpl<Value *> *operands);

+/// Returns a composed AffineApplyOp by composing `map` and `operands` with
+/// other AffineApplyOps supplying those operands. The operands of the resulting
+/// AffineApplyOp do not change the length of  AffineApplyOp chains.
+OpPointer<AffineApplyOp>
+makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
+                        llvm::ArrayRef<Value *> operands);
+
+/// Given an affine map `map` and its input `operands`, this method composes
+/// into `map`, maps of AffineApplyOps whose results are the values in
+/// `operands`, iteratively until no more of `operands` are the result of an
+/// AffineApplyOp. When this function returns, `map` becomes the composed affine
+/// map, and each Value in `operands` is guaranteed to be either a loop IV or a
+/// terminal symbol, i.e., a symbol defined at the top level or a block/function
+/// argument.
+void fullyComposeAffineMapAndOperands(AffineMap *map,
+                                      llvm::SmallVectorImpl<Value *> *operands);
+
 } // end namespace mlir

 #endif
--- a/mlir/include/mlir/Analysis/AffineAnalysis.h
+++ b/mlir/include/mlir/Analysis/AffineAnalysis.h
@ -31,47 +31,13 @@
 namespace mlir {

 class AffineApplyOp;
-class AffineExpr;
 class AffineForOp;
-class AffineMap;
 class AffineValueMap;
 class FlatAffineConstraints;
-class FuncBuilder;
 class Instruction;
-class IntegerSet;
-class Location;
-class MLIRContext;
 template <typename OpType> class OpPointer;
 class Value;

-/// Simplify an affine expression by flattening and some amount of
-/// simple analysis. This has complexity linear in the number of nodes in
-/// 'expr'. Returns the simplified expression, which is the same as the input
-///  expression if it can't be simplified.
-AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims,
-                              unsigned numSymbols);
-
-/// Simplify an affine map by simplifying its underlying AffineExpr results and
-/// sizes.
-AffineMap simplifyAffineMap(AffineMap map);
-
-/// Returns a composed AffineApplyOp by composing `map` and `operands` with
-/// other AffineApplyOps supplying those operands. The operands of the resulting
-/// AffineApplyOp do not change the length of  AffineApplyOp chains.
-OpPointer<AffineApplyOp>
-makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
-                        llvm::ArrayRef<Value *> operands);
-
-/// Given an affine map `map` and its input `operands`, this method composes
-/// into `map`, maps of AffineApplyOps whose results are the values in
-/// `operands`, iteratively until no more of `operands` are the result of an
-/// AffineApplyOp. When this function returns, `map` becomes the composed affine
-/// map, and each Value in `operands` is guaranteed to be either a loop IV or a
-/// terminal symbol, i.e., a symbol defined at the top level or a block/function
-/// argument.
-void fullyComposeAffineMapAndOperands(AffineMap *map,
-                                      llvm::SmallVectorImpl<Value *> *operands);
-
 /// Returns in `affineApplyOps`, the sequence of those AffineApplyOp
 /// Instructions that are reachable via a search starting from `operands` and
 /// ending at those operands that are not the result of an AffineApplyOp.
@ -79,33 +45,6 @@ void getReachableAffineApplyOps(
    llvm::ArrayRef<Value *> operands,
    llvm::SmallVectorImpl<Instruction *> &affineApplyOps);

-/// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
-/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled).
-/// 'cst' contains constraints that connect newly introduced local identifiers
-/// to existing dimensional and / symbolic identifiers. See documentation for
-/// AffineExprFlattener on how mod's and div's are flattened.
-bool getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
-                            unsigned numSymbols,
-                            llvm::SmallVectorImpl<int64_t> *flattenedExpr,
-                            FlatAffineConstraints *cst = nullptr);
-
-/// Flattens the result expressions of the map to their corresponding flattened
-/// forms and set in 'flattenedExprs'. Returns true on success or false
-/// if any expression in the map could not be flattened (i.e., semi-affine is
-/// not yet handled). 'cst' contains constraints that connect newly introduced
-/// local identifiers to existing dimensional and / symbolic identifiers. See
-/// documentation for AffineExprFlattener on how mod's and div's are flattened.
-/// For all affine expressions that share the same operands (like those of an
-/// affine map), this method should be used instead of repeatedly calling
-/// getFlattenedAffineExpr since local variables added to deal with div's and
-/// mod's will be reused across expressions.
-bool getFlattenedAffineExprs(
-    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *cst = nullptr);
-bool getFlattenedAffineExprs(
-    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *cst = nullptr);
-
 /// Builds a system of constraints with dimensional identifiers corresponding to
 /// the loop IVs of the forOps appearing in that order. Bounds of the loop are
 /// used to add appropriate inequalities. Any symbols founds in the bound
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@ -25,8 +25,8 @@
 #ifndef MLIR_ANALYSIS_UTILS_H
 #define MLIR_ANALYSIS_UTILS_H

-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/IR/AffineMap.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Location.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
--- a/mlir/include/mlir/IR/AffineExpr.h
+++ b/mlir/include/mlir/IR/AffineExpr.h
@ -32,6 +32,8 @@ namespace mlir {

 class MLIRContext;
 class AffineMap;
+class IntegerSet;
+class FlatAffineConstraints;

 namespace detail {

@ -247,6 +249,40 @@ template <typename U> U AffineExpr::cast() const {
  return U(expr);
 }

+/// Simplify an affine expression by flattening and some amount of
+/// simple analysis. This has complexity linear in the number of nodes in
+/// 'expr'. Returns the simplified expression, which is the same as the input
+///  expression if it can't be simplified.
+AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims,
+                              unsigned numSymbols);
+
+/// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
+/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled).
+/// 'cst' contains constraints that connect newly introduced local identifiers
+/// to existing dimensional and / symbolic identifiers. See documentation for
+/// AffineExprFlattener on how mod's and div's are flattened.
+bool getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
+                            unsigned numSymbols,
+                            llvm::SmallVectorImpl<int64_t> *flattenedExpr,
+                            FlatAffineConstraints *cst = nullptr);
+
+/// Flattens the result expressions of the map to their corresponding flattened
+/// forms and set in 'flattenedExprs'. Returns true on success or false
+/// if any expression in the map could not be flattened (i.e., semi-affine is
+/// not yet handled). 'cst' contains constraints that connect newly introduced
+/// local identifiers to existing dimensional and / symbolic identifiers. See
+/// documentation for AffineExprFlattener on how mod's and div's are flattened.
+/// For all affine expressions that share the same operands (like those of an
+/// affine map), this method should be used instead of repeatedly calling
+/// getFlattenedAffineExpr since local variables added to deal with div's and
+/// mod's will be reused across expressions.
+bool getFlattenedAffineExprs(
+    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
+    FlatAffineConstraints *cst = nullptr);
+bool getFlattenedAffineExprs(
+    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
+    FlatAffineConstraints *cst = nullptr);
+
 } // namespace mlir

 namespace llvm {
--- a/mlir/include/mlir/IR/AffineMap.h
+++ b/mlir/include/mlir/IR/AffineMap.h
@ -150,6 +150,10 @@ inline ::llvm::hash_code hash_value(AffineMap arg) {
  return ::llvm::hash_value(arg.map);
 }

+/// Simplify an affine map by simplifying its underlying AffineExpr results and
+/// sizes.
+AffineMap simplifyAffineMap(AffineMap map);
+
 } // end namespace mlir

 namespace llvm {
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@ -19,19 +19,15 @@
 //
 //===----------------------------------------------------------------------===//

-#ifndef MLIR_ANALYSIS_AFFINE_STRUCTURES_H
-#define MLIR_ANALYSIS_AFFINE_STRUCTURES_H
+#ifndef MLIR_IR_AFFINE_STRUCTURES_H
+#define MLIR_IR_AFFINE_STRUCTURES_H

 #include "mlir/IR/AffineExpr.h"

 namespace mlir {

-class AffineApplyOp;
-class AffineBound;
-class AffineForOp;
 class AffineCondition;
 class AffineMap;
-template <typename T> class ConstOpPointer;
 class IntegerSet;
 class MLIRContext;
 class Value;
@ -126,15 +122,16 @@ public:
  // Creates an empty AffineValueMap (users should call 'reset' to reset map
  // and operands).
  AffineValueMap() {}
-  AffineValueMap(const AffineApplyOp &op);
-  AffineValueMap(const AffineBound &bound);
  AffineValueMap(AffineMap map);
-  AffineValueMap(AffineMap map, ArrayRef<Value *> operands);
+  AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
+                 ArrayRef<Value *> results = llvm::None);

  ~AffineValueMap();

-  // Resets this AffineValueMap with 'map' and 'operands'.
-  void reset(AffineMap map, ArrayRef<Value *> operands);
+  // Resets this AffineValueMap with 'map', 'operands', and 'results'.
+  void reset(AffineMap map, ArrayRef<Value *> operands,
+             ArrayRef<Value *> results = llvm::None);
+
  /// Return true if the idx^th result can be proved to be a multiple of
  /// 'factor', false otherwise.
  inline bool isMultipleOf(unsigned idx, int64_t factor) const;
@ -398,18 +395,6 @@ public:
  /// q = dividend floordiv c    <=>   c*q <= dividend <= c*q + c - 1.
  void addLocalFloorDiv(ArrayRef<int64_t> dividend, int64_t divisor);

-  /// Adds constraints (lower and upper bounds) for the specified 'for'
-  /// instruction's Value using IR information stored in its bound maps. The
-  /// right identifier is first looked up using forOp's Value. Returns
-  /// false for the yet unimplemented/unsupported cases, and true if the
-  /// information is succesfully added. Asserts if the Value corresponding to
-  /// the 'for' instruction isn't found in the constraint system. Any new
-  /// identifiers that are found in the bound operands of the 'for' instruction
-  /// are added as trailing identifiers (either dimensional or symbolic
-  /// depending on whether the operand is a valid ML Function symbol).
-  //  TODO(bondhugula): add support for non-unit strides.
-  bool addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp);
-
  /// Adds a constant lower bound constraint for the specified expression.
  void addConstantLowerBound(ArrayRef<int64_t> expr, int64_t lb);
  /// Adds a constant upper bound constraint for the specified expression.
@ -694,4 +679,4 @@ private:

 } // end namespace mlir.

-#endif // MLIR_ANALYSIS_AFFINE_STRUCTURES_H
+#endif // MLIR_IR_AFFINE_STRUCTURES_H
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@ -79,9 +79,6 @@ FunctionPass *createLoopFusionPass();
 /// memory hierarchy.
 FunctionPass *createPipelineDataTransferPass();

-/// Creates a pass which composes all affine maps applied to loads and stores.
-FunctionPass *createComposeAffineMapsPass();
-
 /// Lowers affine control flow instructions (ForStmt, IfStmt and AffineApplyOp)
 /// to equivalent lower-level constructs (flow of basic blocks and arithmetic
 /// primitives).
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@ -16,6 +16,7 @@
 // =============================================================================

 #include "mlir/AffineOps/AffineOps.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Block.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
@ -23,7 +24,11 @@
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Support/Debug.h"
 using namespace mlir;
+using llvm::dbgs;
+
+#define DEBUG_TYPE "affine-analysis"

 //===----------------------------------------------------------------------===//
 // AffineOpsDialect
@ -130,6 +135,12 @@ bool AffineApplyOp::verify() const {
  return false;
 }

+/// Returns an AffineValueMap representing this affine apply.
+AffineValueMap AffineApplyOp::getAsAffineValueMap() {
+  SmallVector<Value *, 8> operands(getOperands());
+  return AffineValueMap(getAffineMap(), operands, getResult());
+}
+
 // The result of the affine apply operation can be used as a dimension id if it
 // is a CFG value or if it is an Value, and all the operands are valid
 // dimension ids.
@ -168,6 +179,77 @@ struct SimplifyAffineApply : public RewritePattern {
 } // end anonymous namespace.

 namespace {
+/// An `AffineApplyNormalizer` is a helper class that is not visible to the user
+/// and supports renumbering operands of AffineApplyOp. This acts as a
+/// reindexing map of Value* to positional dims or symbols and allows
+/// simplifications such as:
+///
+/// ```mlir
+///    %1 = affine_apply (d0, d1) -> (d0 - d1) (%0, %0)
+/// ```
+///
+/// into:
+///
+/// ```mlir
+///    %1 = affine_apply () -> (0)
+/// ```
+struct AffineApplyNormalizer {
+  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
+
+  /// Returns the AffineMap resulting from normalization.
+  AffineMap getAffineMap() { return affineMap; }
+
+  SmallVector<Value *, 8> getOperands() {
+    SmallVector<Value *, 8> res(reorderedDims);
+    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
+    return res;
+  }
+
+private:
+  /// Helper function to insert `v` into the coordinate system of the current
+  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
+  /// renumbered position.
+  AffineDimExpr applyOneDim(Value *v);
+
+  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
+  /// coordinate system of the current AffineApplyNormalizer.
+  /// Returns the rewritten AffineMap and updates the dims and symbols of
+  /// `this`.
+  AffineMap renumber(const AffineApplyNormalizer &other);
+
+  /// Given an `app`, rewrites `app.getAffineMap()` in the coordinate system of
+  /// the current AffineApplyNormalizer.
+  /// Returns the rewritten AffineMap and updates the dims and symbols of
+  /// `this`.
+  AffineMap renumber(const AffineApplyOp &app);
+
+  /// Maps of Value* to position in `affineMap`.
+  DenseMap<Value *, unsigned> dimValueToPosition;
+
+  /// Ordered dims and symbols matching positional dims and symbols in
+  /// `affineMap`.
+  SmallVector<Value *, 8> reorderedDims;
+  SmallVector<Value *, 8> concatenatedSymbols;
+
+  AffineMap affineMap;
+
+  /// Used with RAII to control the depth at which AffineApply are composed
+  /// recursively. Only accepts depth 1 for now.
+  /// Note that if one wishes to compose all AffineApply in the program and
+  /// follows program order, maxdepth 1 is sufficient. This is as much as this
+  /// abstraction is willing to support for now.
+  static unsigned &affineApplyDepth() {
+    static thread_local unsigned depth = 0;
+    return depth;
+  }
+  static constexpr unsigned kMaxAffineApplyDepth = 1;
+
+  AffineApplyNormalizer() { affineApplyDepth()++; }
+
+public:
+  ~AffineApplyNormalizer() { affineApplyDepth()--; }
+};
+
 /// FIXME: this is massive overkill for simple obviously always matching
 /// canonicalizations.  Fix the pattern rewriter to make this easy.
 struct SimplifyAffineApplyState : public PatternState {
@ -181,6 +263,136 @@ struct SimplifyAffineApplyState : public PatternState {

 } // end anonymous namespace.

+AffineDimExpr AffineApplyNormalizer::applyOneDim(Value *v) {
+  DenseMap<Value *, unsigned>::iterator iterPos;
+  bool inserted = false;
+  std::tie(iterPos, inserted) =
+      dimValueToPosition.insert(std::make_pair(v, dimValueToPosition.size()));
+  if (inserted) {
+    reorderedDims.push_back(v);
+  }
+  return getAffineDimExpr(iterPos->second, v->getFunction()->getContext())
+      .cast<AffineDimExpr>();
+}
+
+AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
+  SmallVector<AffineExpr, 8> dimRemapping;
+  for (auto *v : other.reorderedDims) {
+    auto kvp = other.dimValueToPosition.find(v);
+    if (dimRemapping.size() <= kvp->second)
+      dimRemapping.resize(kvp->second + 1);
+    dimRemapping[kvp->second] = applyOneDim(kvp->first);
+  }
+  unsigned numSymbols = concatenatedSymbols.size();
+  unsigned numOtherSymbols = other.concatenatedSymbols.size();
+  SmallVector<AffineExpr, 8> symRemapping(numOtherSymbols);
+  for (unsigned idx = 0; idx < numOtherSymbols; ++idx) {
+    symRemapping[idx] =
+        getAffineSymbolExpr(idx + numSymbols, other.affineMap.getContext());
+  }
+  concatenatedSymbols.insert(concatenatedSymbols.end(),
+                             other.concatenatedSymbols.begin(),
+                             other.concatenatedSymbols.end());
+  auto map = other.affineMap;
+  return map.replaceDimsAndSymbols(dimRemapping, symRemapping,
+                                   dimRemapping.size(), symRemapping.size());
+}
+
+AffineMap AffineApplyNormalizer::renumber(const AffineApplyOp &app) {
+  assert(app.getAffineMap().getRangeSizes().empty() && "Non-empty range sizes");
+
+  // Create the AffineApplyNormalizer for the operands of this
+  // AffineApplyOp and combine it with the current AffineApplyNormalizer.
+  SmallVector<Value *, 8> operands(
+      const_cast<AffineApplyOp &>(app).getOperands().begin(),
+      const_cast<AffineApplyOp &>(app).getOperands().end());
+  AffineApplyNormalizer normalizer(app.getAffineMap(), operands);
+  return renumber(normalizer);
+}
+
+AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
+                                             ArrayRef<Value *> operands)
+    : AffineApplyNormalizer() {
+  assert(map.getRangeSizes().empty() && "Unbounded map expected");
+  assert(map.getNumInputs() == operands.size() &&
+         "number of operands does not match the number of map inputs");
+
+  SmallVector<AffineExpr, 8> exprs;
+  for (auto en : llvm::enumerate(operands)) {
+    auto *t = en.value();
+    assert(t->getType().isIndex());
+    bool operandNotFromAffineApply =
+        !t->getDefiningInst() || !t->getDefiningInst()->isa<AffineApplyOp>();
+    if (operandNotFromAffineApply ||
+        affineApplyDepth() > kMaxAffineApplyDepth) {
+      if (en.index() < map.getNumDims()) {
+        exprs.push_back(applyOneDim(t));
+      } else {
+        // Composition of mathematical symbols must occur by concatenation.
+        // A subsequent canonicalization will drop duplicates. Duplicates are
+        // not dropped here because it would just amount to code duplication.
+        concatenatedSymbols.push_back(t);
+      }
+    } else {
+      auto *inst = t->getDefiningInst();
+      auto app = inst->dyn_cast<AffineApplyOp>();
+      auto tmpMap = renumber(*app);
+      exprs.push_back(tmpMap.getResult(0));
+    }
+  }
+
+  // Map is already composed.
+  if (exprs.empty()) {
+    affineMap = map;
+    return;
+  }
+
+  auto numDims = dimValueToPosition.size();
+  auto numSymbols = concatenatedSymbols.size() - map.getNumSymbols();
+  auto exprsMap = AffineMap::get(numDims, numSymbols, exprs, {});
+  LLVM_DEBUG(map.print(dbgs() << "\nCompose map: "));
+  LLVM_DEBUG(exprsMap.print(dbgs() << "\nWith map: "));
+  LLVM_DEBUG(map.compose(exprsMap).print(dbgs() << "\nResult: "));
+
+  affineMap = simplifyAffineMap(map.compose(exprsMap));
+  LLVM_DEBUG(affineMap.print(dbgs() << "\nSimplified result: "));
+  LLVM_DEBUG(dbgs() << "\n");
+}
+
+/// Implements `map` and `operands` composition and simplification to support
+/// `makeComposedAffineApply`. This can be called to achieve the same effects
+/// on `map` and `operands` without creating an AffineApplyOp that needs to be
+/// immediately deleted.
+static void composeAffineMapAndOperands(AffineMap *map,
+                                        SmallVectorImpl<Value *> *operands) {
+  AffineApplyNormalizer normalizer(*map, *operands);
+  auto normalizedMap = normalizer.getAffineMap();
+  auto normalizedOperands = normalizer.getOperands();
+  canonicalizeMapAndOperands(&normalizedMap, &normalizedOperands);
+  *map = normalizedMap;
+  *operands = normalizedOperands;
+  assert(*map);
+}
+
+void mlir::fullyComposeAffineMapAndOperands(
+    AffineMap *map, SmallVectorImpl<Value *> *operands) {
+  while (llvm::any_of(*operands, [](Value *v) {
+    return v->getDefiningInst() && v->getDefiningInst()->isa<AffineApplyOp>();
+  })) {
+    composeAffineMapAndOperands(map, operands);
+  }
+}
+
+OpPointer<AffineApplyOp>
+mlir::makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
+                              ArrayRef<Value *> operands) {
+  AffineMap normalizedMap = map;
+  SmallVector<Value *, 8> normalizedOperands(operands.begin(), operands.end());
+  composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
+  assert(normalizedMap);
+  return b->create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
+}
+
 void mlir::canonicalizeMapAndOperands(
    AffineMap *map, llvm::SmallVectorImpl<Value *> *operands) {
  if (!map || operands->empty())
@ -245,9 +457,8 @@ PatternMatchResult SimplifyAffineApply::match(Instruction *op) const {
  auto map = apply->getAffineMap();

  AffineMap oldMap = map;
-  SmallVector<Value *, 8> resultOperands(apply->getOperands().begin(),
-                                         apply->getOperands().end());
-  canonicalizeMapAndOperands(&map, &resultOperands);
+  SmallVector<Value *, 8> resultOperands(apply->getOperands());
+  composeAffineMapAndOperands(&map, &resultOperands);
  if (map != oldMap)
    return matchSuccess(
        std::make_unique<SimplifyAffineApplyState>(map, resultOperands));
@ -678,6 +889,106 @@ void mlir::extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
    ivs->push_back(forInst->getInductionVar());
 }

+bool mlir::addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp,
+                                FlatAffineConstraints *constraints) {
+  unsigned pos;
+  // Pre-condition for this method.
+  if (!constraints->findId(*forOp->getInductionVar(), &pos)) {
+    assert(0 && "Value not found");
+    return false;
+  }
+
+  if (forOp->getStep() != 1)
+    LLVM_DEBUG(llvm::dbgs()
+               << "Domain conservative: non-unit stride not handled\n");
+
+  // Adds a lower or upper bound when the bounds aren't constant.
+  auto addLowerOrUpperBound = [&](bool lower) -> bool {
+    auto operands =
+        lower ? forOp->getLowerBoundOperands() : forOp->getUpperBoundOperands();
+    for (const auto &operand : operands) {
+      unsigned loc;
+      if (!constraints->findId(*operand, &loc)) {
+        if (isValidSymbol(operand)) {
+          constraints->addSymbolId(constraints->getNumSymbolIds(),
+                                   const_cast<Value *>(operand));
+          loc =
+              constraints->getNumDimIds() + constraints->getNumSymbolIds() - 1;
+          // Check if the symbol is a constant.
+          if (auto *opInst = operand->getDefiningInst()) {
+            if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
+              constraints->setIdToConstant(*operand, constOp->getValue());
+            }
+          }
+        } else {
+          constraints->addDimId(constraints->getNumDimIds(),
+                                const_cast<Value *>(operand));
+          loc = constraints->getNumDimIds() - 1;
+        }
+      }
+    }
+    // Record positions of the operands in the constraint system.
+    SmallVector<unsigned, 8> positions;
+    for (const auto &operand : operands) {
+      unsigned loc;
+      if (!constraints->findId(*operand, &loc))
+        assert(0 && "expected to be found");
+      positions.push_back(loc);
+    }
+
+    auto boundMap =
+        lower ? forOp->getLowerBoundMap() : forOp->getUpperBoundMap();
+
+    FlatAffineConstraints localVarCst;
+    std::vector<SmallVector<int64_t, 8>> flatExprs;
+    if (!getFlattenedAffineExprs(boundMap, &flatExprs, &localVarCst)) {
+      LLVM_DEBUG(llvm::dbgs() << "semi-affine expressions not yet supported\n");
+      return false;
+    }
+    if (localVarCst.getNumLocalIds() > 0) {
+      LLVM_DEBUG(llvm::dbgs()
+                 << "loop bounds with mod/floordiv expr's not yet supported\n");
+      return false;
+    }
+
+    for (const auto &flatExpr : flatExprs) {
+      SmallVector<int64_t, 4> ineq(constraints->getNumCols(), 0);
+      ineq[pos] = lower ? 1 : -1;
+      for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) {
+        ineq[positions[j]] = lower ? -flatExpr[j] : flatExpr[j];
+      }
+      // Constant term.
+      ineq[constraints->getNumCols() - 1] =
+          lower ? -flatExpr[flatExpr.size() - 1]
+                // Upper bound in flattenedExpr is an exclusive one.
+                : flatExpr[flatExpr.size() - 1] - 1;
+      constraints->addInequality(ineq);
+    }
+    return true;
+  };
+
+  if (forOp->hasConstantLowerBound()) {
+    constraints->addConstantLowerBound(pos, forOp->getConstantLowerBound());
+  } else {
+    // Non-constant lower bound case.
+    if (!addLowerOrUpperBound(/*lower=*/true))
+      return false;
+  }
+
+  if (forOp->hasConstantUpperBound()) {
+    constraints->addConstantUpperBound(pos, forOp->getConstantUpperBound() - 1);
+    return true;
+  }
+  // Non-constant upper bound case.
+  return addLowerOrUpperBound(/*lower=*/false);
+}
+
+/// Returns an AffineValueMap representing this bound.
+AffineValueMap AffineBound::getAsAffineValueMap() {
+  SmallVector<Value *, 8> operands(getOperands());
+  return AffineValueMap(getMap(), operands);
+}
+
 //===----------------------------------------------------------------------===//
 // AffineIfOp
 //===----------------------------------------------------------------------===//
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@ -22,9 +22,9 @@

 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/AffineOps/AffineOps.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/IR/AffineExprVisitor.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Instruction.h"
@ -42,462 +42,6 @@ using namespace mlir;

 using llvm::dbgs;

-/// Constructs an affine expression from a flat ArrayRef. If there are local
-/// identifiers (neither dimensional nor symbolic) that appear in the sum of
-/// products expression, 'localExprs' is expected to have the AffineExpr
-/// for it, and is substituted into. The ArrayRef 'eq' is expected to be in the
-/// format [dims, symbols, locals, constant term].
-//  TODO(bondhugula): refactor getAddMulPureAffineExpr to reuse it from here.
-static AffineExpr toAffineExpr(ArrayRef<int64_t> eq, unsigned numDims,
-                               unsigned numSymbols,
-                               ArrayRef<AffineExpr> localExprs,
-                               MLIRContext *context) {
-  // Assert expected numLocals = eq.size() - numDims - numSymbols - 1
-  assert(eq.size() - numDims - numSymbols - 1 == localExprs.size() &&
-         "unexpected number of local expressions");
-
-  auto expr = getAffineConstantExpr(0, context);
-  // Dimensions and symbols.
-  for (unsigned j = 0; j < numDims + numSymbols; j++) {
-    if (eq[j] == 0) {
-      continue;
-    }
-    auto id = j < numDims ? getAffineDimExpr(j, context)
-                          : getAffineSymbolExpr(j - numDims, context);
-    expr = expr + id * eq[j];
-  }
-
-  // Local identifiers.
-  for (unsigned j = numDims + numSymbols, e = eq.size() - 1; j < e; j++) {
-    if (eq[j] == 0) {
-      continue;
-    }
-    auto term = localExprs[j - numDims - numSymbols] * eq[j];
-    expr = expr + term;
-  }
-
-  // Constant term.
-  int64_t constTerm = eq[eq.size() - 1];
-  if (constTerm != 0)
-    expr = expr + constTerm;
-  return expr;
-}
-
-AffineMap mlir::simplifyAffineMap(AffineMap map) {
-  SmallVector<AffineExpr, 8> exprs, sizes;
-  for (auto e : map.getResults()) {
-    exprs.push_back(
-        simplifyAffineExpr(e, map.getNumDims(), map.getNumSymbols()));
-  }
-  for (auto e : map.getRangeSizes()) {
-    sizes.push_back(
-        simplifyAffineExpr(e, map.getNumDims(), map.getNumSymbols()));
-  }
-  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), exprs, sizes);
-}
-
-namespace {
-
-// This class is used to flatten a pure affine expression (AffineExpr,
-// which is in a tree form) into a sum of products (w.r.t constants) when
-// possible, and in that process simplifying the expression. For a modulo,
-// floordiv, or a ceildiv expression, an additional identifier, called a local
-// identifier, is introduced to rewrite the expression as a sum of product
-// affine expression. Each local identifier is always and by construction a
-// floordiv of a pure add/mul affine function of dimensional, symbolic, and
-// other local identifiers, in a non-mutually recursive way. Hence, every local
-// identifier can ultimately always be recovered as an affine function of
-// dimensional and symbolic identifiers (involving floordiv's); note however
-// that by AffineExpr construction, some floordiv combinations are converted to
-// mod's. The result of the flattening is a flattened expression and a set of
-// constraints involving just the local variables.
-//
-// d2 + (d0 + d1) floordiv 4  is flattened to d2 + q where 'q' is the local
-// variable introduced, with localVarCst containing 4*q <= d0 + d1 <= 4*q + 3.
-//
-// The simplification performed includes the accumulation of contributions for
-// each dimensional and symbolic identifier together, the simplification of
-// floordiv/ceildiv/mod expressions and other simplifications that in turn
-// happen as a result. A simplification that this flattening naturally performs
-// is of simplifying the numerator and denominator of floordiv/ceildiv, and
-// folding a modulo expression to a zero, if possible. Three examples are below:
-//
-// (d0 + 3 * d1) + d0) - 2 * d1) - d0    simplified to     d0 + d1
-// (d0 - d0 mod 4 + 4) mod 4             simplified to     0
-// (3*d0 + 2*d1 + d0) floordiv 2 + d1    simplified to     2*d0 + 2*d1
-//
-// The way the flattening works for the second example is as follows: d0 % 4 is
-// replaced by d0 - 4*q with q being introduced: the expression then simplifies
-// to: (d0 - (d0 - 4q) + 4) = 4q + 4, modulo of which w.r.t 4 simplifies to
-// zero. Note that an affine expression may not always be expressible purely as
-// a sum of products involving just the original dimensional and symbolic
-// identifiers due to the presence of modulo/floordiv/ceildiv expressions that
-// may not be eliminated after simplification; in such cases, the final
-// expression can be reconstructed by replacing the local identifiers with their
-// corresponding explicit form stored in 'localExprs' (note that each of the
-// explicit forms itself would have been simplified).
-//
-// The expression walk method here performs a linear time post order walk that
-// performs the above simplifications through visit methods, with partial
-// results being stored in 'operandExprStack'. When a parent expr is visited,
-// the flattened expressions corresponding to its two operands would already be
-// on the stack - the parent expression looks at the two flattened expressions
-// and combines the two. It pops off the operand expressions and pushes the
-// combined result (although this is done in-place on its LHS operand expr).
-// When the walk is completed, the flattened form of the top-level expression
-// would be left on the stack.
-//
-// A flattener can be repeatedly used for multiple affine expressions that bind
-// to the same operands, for example, for all result expressions of an
-// AffineMap or AffineValueMap. In such cases, using it for multiple expressions
-// is more efficient than creating a new flattener for each expression since
-// common idenical div and mod expressions appearing across different
-// expressions are mapped to the same local identifier (same column position in
-// 'localVarCst').
-struct AffineExprFlattener : public AffineExprVisitor<AffineExprFlattener> {
-public:
-  // Flattend expression layout: [dims, symbols, locals, constant]
-  // Stack that holds the LHS and RHS operands while visiting a binary op expr.
-  // In future, consider adding a prepass to determine how big the SmallVector's
-  // will be, and linearize this to std::vector<int64_t> to prevent
-  // SmallVector moves on re-allocation.
-  std::vector<SmallVector<int64_t, 8>> operandExprStack;
-  // Constraints connecting newly introduced local variables (for mod's and
-  // div's) to existing (dimensional and symbolic) ones. These are always
-  // inequalities.
-  FlatAffineConstraints localVarCst;
-
-  unsigned numDims;
-  unsigned numSymbols;
-  // Number of newly introduced identifiers to flatten mod/floordiv/ceildiv
-  // expressions that could not be simplified.
-  unsigned numLocals;
-  // AffineExpr's corresponding to the floordiv/ceildiv/mod expressions for
-  // which new identifiers were introduced; if the latter do not get canceled
-  // out, these expressions can be readily used to reconstruct the AffineExpr
-  // (tree) form. Note that these expressions themselves would have been
-  // simplified (recursively) by this pass. Eg. d0 + (d0 + 2*d1 + d0) ceildiv 4
-  // will be simplified to d0 + q, where q = (d0 + d1) ceildiv 2. (d0 + d1)
-  // ceildiv 2 would be the local expression stored for q.
-  SmallVector<AffineExpr, 4> localExprs;
-  MLIRContext *context;
-
-  AffineExprFlattener(unsigned numDims, unsigned numSymbols,
-                      MLIRContext *context)
-      : numDims(numDims), numSymbols(numSymbols), numLocals(0),
-        context(context) {
-    operandExprStack.reserve(8);
-    localVarCst.reset(numDims, numSymbols, numLocals);
-  }
-
-  void visitMulExpr(AffineBinaryOpExpr expr) {
-    assert(operandExprStack.size() >= 2);
-    // This is a pure affine expr; the RHS will be a constant.
-    assert(expr.getRHS().isa<AffineConstantExpr>());
-    // Get the RHS constant.
-    auto rhsConst = operandExprStack.back()[getConstantIndex()];
-    operandExprStack.pop_back();
-    // Update the LHS in place instead of pop and push.
-    auto &lhs = operandExprStack.back();
-    for (unsigned i = 0, e = lhs.size(); i < e; i++) {
-      lhs[i] *= rhsConst;
-    }
-  }
-
-  void visitAddExpr(AffineBinaryOpExpr expr) {
-    assert(operandExprStack.size() >= 2);
-    const auto &rhs = operandExprStack.back();
-    auto &lhs = operandExprStack[operandExprStack.size() - 2];
-    assert(lhs.size() == rhs.size());
-    // Update the LHS in place.
-    for (unsigned i = 0, e = rhs.size(); i < e; i++) {
-      lhs[i] += rhs[i];
-    }
-    // Pop off the RHS.
-    operandExprStack.pop_back();
-  }
-
-  //
-  // t = expr mod c   <=>  t = expr - c*q and c*q <= expr <= c*q + c - 1
-  //
-  // A mod expression "expr mod c" is thus flattened by introducing a new local
-  // variable q (= expr floordiv c), such that expr mod c is replaced with
-  // 'expr - c * q' and c * q <= expr <= c * q + c - 1 are added to localVarCst.
-  void visitModExpr(AffineBinaryOpExpr expr) {
-    assert(operandExprStack.size() >= 2);
-    // This is a pure affine expr; the RHS will be a constant.
-    assert(expr.getRHS().isa<AffineConstantExpr>());
-    auto rhsConst = operandExprStack.back()[getConstantIndex()];
-    operandExprStack.pop_back();
-    auto &lhs = operandExprStack.back();
-    // TODO(bondhugula): handle modulo by zero case when this issue is fixed
-    // at the other places in the IR.
-    assert(rhsConst > 0 && "RHS constant has to be positive");
-
-    // Check if the LHS expression is a multiple of modulo factor.
-    unsigned i, e;
-    for (i = 0, e = lhs.size(); i < e; i++)
-      if (lhs[i] % rhsConst != 0)
-        break;
-    // If yes, modulo expression here simplifies to zero.
-    if (i == lhs.size()) {
-      std::fill(lhs.begin(), lhs.end(), 0);
-      return;
-    }
-
-    // Add a local variable for the quotient, i.e., expr % c is replaced by
-    // (expr - q * c) where q = expr floordiv c. Do this while canceling out
-    // the GCD of expr and c.
-    SmallVector<int64_t, 8> floorDividend(lhs);
-    uint64_t gcd = rhsConst;
-    for (unsigned i = 0, e = lhs.size(); i < e; i++)
-      gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(lhs[i]));
-    // Simplify the numerator and the denominator.
-    if (gcd != 1) {
-      for (unsigned i = 0, e = floorDividend.size(); i < e; i++)
-        floorDividend[i] = floorDividend[i] / static_cast<int64_t>(gcd);
-    }
-    int64_t floorDivisor = rhsConst / static_cast<int64_t>(gcd);
-
-    // Construct the AffineExpr form of the floordiv to store in localExprs.
-    auto dividendExpr =
-        toAffineExpr(floorDividend, numDims, numSymbols, localExprs, context);
-    auto divisorExpr = getAffineConstantExpr(floorDivisor, context);
-    auto floorDivExpr = dividendExpr.floorDiv(divisorExpr);
-    int loc;
-    if ((loc = findLocalId(floorDivExpr)) == -1) {
-      addLocalFloorDivId(floorDividend, floorDivisor, floorDivExpr);
-      // Set result at top of stack to "lhs - rhsConst * q".
-      lhs[getLocalVarStartIndex() + numLocals - 1] = -rhsConst;
-    } else {
-      // Reuse the existing local id.
-      lhs[getLocalVarStartIndex() + loc] = -rhsConst;
-    }
-  }
-
-  void visitCeilDivExpr(AffineBinaryOpExpr expr) {
-    visitDivExpr(expr, /*isCeil=*/true);
-  }
-  void visitFloorDivExpr(AffineBinaryOpExpr expr) {
-    visitDivExpr(expr, /*isCeil=*/false);
-  }
-
-  void visitDimExpr(AffineDimExpr expr) {
-    operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
-    auto &eq = operandExprStack.back();
-    assert(expr.getPosition() < numDims && "Inconsistent number of dims");
-    eq[getDimStartIndex() + expr.getPosition()] = 1;
-  }
-
-  void visitSymbolExpr(AffineSymbolExpr expr) {
-    operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
-    auto &eq = operandExprStack.back();
-    assert(expr.getPosition() < numSymbols && "inconsistent number of symbols");
-    eq[getSymbolStartIndex() + expr.getPosition()] = 1;
-  }
-
-  void visitConstantExpr(AffineConstantExpr expr) {
-    operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
-    auto &eq = operandExprStack.back();
-    eq[getConstantIndex()] = expr.getValue();
-  }
-
-private:
-  // t = expr floordiv c   <=> t = q, c * q <= expr <= c * q + c - 1
-  // A floordiv is thus flattened by introducing a new local variable q, and
-  // replacing that expression with 'q' while adding the constraints
-  // c * q <= expr <= c * q + c - 1 to localVarCst (done by
-  // FlatAffineConstraints::addLocalFloorDiv).
-  //
-  // A ceildiv is similarly flattened:
-  // t = expr ceildiv c   <=> t =  (expr + c - 1) floordiv c
-  void visitDivExpr(AffineBinaryOpExpr expr, bool isCeil) {
-    assert(operandExprStack.size() >= 2);
-    assert(expr.getRHS().isa<AffineConstantExpr>());
-
-    // This is a pure affine expr; the RHS is a positive constant.
-    int64_t rhsConst = operandExprStack.back()[getConstantIndex()];
-    // TODO(bondhugula): handle division by zero at the same time the issue is
-    // fixed at other places.
-    assert(rhsConst > 0 && "RHS constant has to be positive");
-    operandExprStack.pop_back();
-    auto &lhs = operandExprStack.back();
-
-    // Simplify the floordiv, ceildiv if possible by canceling out the greatest
-    // common divisors of the numerator and denominator.
-    uint64_t gcd = std::abs(rhsConst);
-    for (unsigned i = 0, e = lhs.size(); i < e; i++)
-      gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(lhs[i]));
-    // Simplify the numerator and the denominator.
-    if (gcd != 1) {
-      for (unsigned i = 0, e = lhs.size(); i < e; i++)
-        lhs[i] = lhs[i] / static_cast<int64_t>(gcd);
-    }
-    int64_t divisor = rhsConst / static_cast<int64_t>(gcd);
-    // If the divisor becomes 1, the updated LHS is the result. (The
-    // divisor can't be negative since rhsConst is positive).
-    if (divisor == 1)
-      return;
-
-    // If the divisor cannot be simplified to one, we will have to retain
-    // the ceil/floor expr (simplified up until here). Add an existential
-    // quantifier to express its result, i.e., expr1 div expr2 is replaced
-    // by a new identifier, q.
-    auto a = toAffineExpr(lhs, numDims, numSymbols, localExprs, context);
-    auto b = getAffineConstantExpr(divisor, context);
-
-    int loc;
-    auto divExpr = isCeil ? a.ceilDiv(b) : a.floorDiv(b);
-    if ((loc = findLocalId(divExpr)) == -1) {
-      if (!isCeil) {
-        SmallVector<int64_t, 8> dividend(lhs);
-        addLocalFloorDivId(dividend, divisor, divExpr);
-      } else {
-        // lhs ceildiv c <=>  (lhs + c - 1) floordiv c
-        SmallVector<int64_t, 8> dividend(lhs);
-        dividend.back() += divisor - 1;
-        addLocalFloorDivId(dividend, divisor, divExpr);
-      }
-    }
-    // Set the expression on stack to the local var introduced to capture the
-    // result of the division (floor or ceil).
-    std::fill(lhs.begin(), lhs.end(), 0);
-    if (loc == -1)
-      lhs[getLocalVarStartIndex() + numLocals - 1] = 1;
-    else
-      lhs[getLocalVarStartIndex() + loc] = 1;
-  }
-
-  // Add a local identifier (needed to flatten a mod, floordiv, ceildiv expr).
-  // The local identifier added is always a floordiv of a pure add/mul affine
-  // function of other identifiers, coefficients of which are specified in
-  // dividend and with respect to a positive constant divisor. localExpr is the
-  // simplified tree expression (AffineExpr) corresponding to the quantifier.
-  void addLocalFloorDivId(ArrayRef<int64_t> dividend, int64_t divisor,
-                          AffineExpr localExpr) {
-    assert(divisor > 0 && "positive constant divisor expected");
-    for (auto &subExpr : operandExprStack)
-      subExpr.insert(subExpr.begin() + getLocalVarStartIndex() + numLocals, 0);
-    localExprs.push_back(localExpr);
-    numLocals++;
-    // Update localVarCst.
-    localVarCst.addLocalFloorDiv(dividend, divisor);
-  }
-
-  int findLocalId(AffineExpr localExpr) {
-    SmallVectorImpl<AffineExpr>::iterator it;
-    if ((it = std::find(localExprs.begin(), localExprs.end(), localExpr)) ==
-        localExprs.end())
-      return -1;
-    return it - localExprs.begin();
-  }
-
-  inline unsigned getNumCols() const {
-    return numDims + numSymbols + numLocals + 1;
-  }
-  inline unsigned getConstantIndex() const { return getNumCols() - 1; }
-  inline unsigned getLocalVarStartIndex() const { return numDims + numSymbols; }
-  inline unsigned getSymbolStartIndex() const { return numDims; }
-  inline unsigned getDimStartIndex() const { return 0; }
-};
-
-} // end anonymous namespace
-
-/// Simplify the affine expression by flattening it and reconstructing it.
-AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims,
-                                    unsigned numSymbols) {
-  // TODO(bondhugula): only pure affine for now. The simplification here can
-  // be extended to semi-affine maps in the future.
-  if (!expr.isPureAffine())
-    return expr;
-
-  AffineExprFlattener flattener(numDims, numSymbols, expr.getContext());
-  flattener.walkPostOrder(expr);
-  ArrayRef<int64_t> flattenedExpr = flattener.operandExprStack.back();
-  auto simplifiedExpr = toAffineExpr(flattenedExpr, numDims, numSymbols,
-                                     flattener.localExprs, expr.getContext());
-  flattener.operandExprStack.pop_back();
-  assert(flattener.operandExprStack.empty());
-
-  return simplifiedExpr;
-}
-
-// Flattens the expressions in map. Returns true on success or false
-// if 'expr' was unable to be flattened (i.e., semi-affine expressions not
-// handled yet).
-static bool getFlattenedAffineExprs(
-    ArrayRef<AffineExpr> exprs, unsigned numDims, unsigned numSymbols,
-    std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *localVarCst) {
-  if (exprs.empty()) {
-    localVarCst->reset(numDims, numSymbols);
-    return true;
-  }
-
-  flattenedExprs->clear();
-  flattenedExprs->reserve(exprs.size());
-
-  AffineExprFlattener flattener(numDims, numSymbols, exprs[0].getContext());
-  // Use the same flattener to simplify each expression successively. This way
-  // local identifiers / expressions are shared.
-  for (auto expr : exprs) {
-    if (!expr.isPureAffine())
-      return false;
-
-    flattener.walkPostOrder(expr);
-  }
-
-  assert(flattener.operandExprStack.size() == exprs.size());
-  flattenedExprs->insert(flattenedExprs->end(),
-                         flattener.operandExprStack.begin(),
-                         flattener.operandExprStack.end());
-  if (localVarCst)
-    localVarCst->clearAndCopyFrom(flattener.localVarCst);
-
-  return true;
-}
-
-// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
-// if 'expr' was unable to be flattened (semi-affine expressions not handled
-// yet).
-bool mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
-                                  unsigned numSymbols,
-                                  llvm::SmallVectorImpl<int64_t> *flattenedExpr,
-                                  FlatAffineConstraints *localVarCst) {
-  std::vector<SmallVector<int64_t, 8>> flattenedExprs;
-  bool ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols,
-                                       &flattenedExprs, localVarCst);
-  *flattenedExpr = flattenedExprs[0];
-  return ret;
-}
-
-/// Flattens the expressions in map. Returns true on success or false
-/// if 'expr' was unable to be flattened (i.e., semi-affine expressions not
-/// handled yet).
-bool mlir::getFlattenedAffineExprs(
-    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *localVarCst) {
-  if (map.getNumResults() == 0) {
-    localVarCst->reset(map.getNumDims(), map.getNumSymbols());
-    return true;
-  }
-  return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(),
-                                   map.getNumSymbols(), flattenedExprs,
-                                   localVarCst);
-}
-
-bool mlir::getFlattenedAffineExprs(
-    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
-    FlatAffineConstraints *localVarCst) {
-  if (set.getNumConstraints() == 0) {
-    localVarCst->reset(set.getNumDims(), set.getNumSymbols());
-    return true;
-  }
-  return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(),
-                                   set.getNumSymbols(), flattenedExprs,
-                                   localVarCst);
-}
-
 /// Returns the sequence of AffineApplyOp Instructions operation in
 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
 /// and ending at operands which are not defined by AffineApplyOps.
@ -563,7 +107,7 @@ bool mlir::getIndexSet(MutableArrayRef<OpPointer<AffineForOp>> forOps,
  domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices);
  for (auto forOp : forOps) {
    // Add constraints from forOp's bounds.
-    if (!domain->addAffineForOpDomain(forOp))
+    if (!addAffineForOpDomain(forOp, domain))
      return false;
  }
  return true;
@ -1355,208 +899,3 @@ bool mlir::checkMemrefAccessDependence(
  LLVM_DEBUG(dependenceConstraints->dump());
  return true;
 }
-
-namespace {
-
-/// An `AffineApplyNormalizer` is a helper class that is not visible to the user
-/// and supports renumbering operands of AffineApplyOp. This acts as a
-/// reindexing map of Value* to positional dims or symbols and allows
-/// simplifications such as:
-///
-/// ```mlir
-///    %1 = affine_apply (d0, d1) -> (d0 - d1) (%0, %0)
-/// ```
-///
-/// into:
-///
-/// ```mlir
-///    %1 = affine_apply () -> (0)
-/// ```
-struct AffineApplyNormalizer {
-  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
-
-  /// Returns the AffineMap resulting from normalization.
-  AffineMap getAffineMap() { return affineMap; }
-
-  SmallVector<Value *, 8> getOperands() {
-    SmallVector<Value *, 8> res(reorderedDims);
-    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
-    return res;
-  }
-
-private:
-  /// Helper function to insert `v` into the coordinate system of the current
-  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
-  /// renumbered position.
-  AffineDimExpr applyOneDim(Value *v);
-
-  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
-  /// coordinate system of the current AffineApplyNormalizer.
-  /// Returns the rewritten AffineMap and updates the dims and symbols of
-  /// `this`.
-  AffineMap renumber(const AffineApplyNormalizer &other);
-
-  /// Given an `app`, rewrites `app.getAffineMap()` in the coordinate system of
-  /// the current AffineApplyNormalizer.
-  /// Returns the rewritten AffineMap and updates the dims and symbols of
-  /// `this`.
-  AffineMap renumber(const AffineApplyOp &app);
-
-  /// Maps of Value* to position in `affineMap`.
-  DenseMap<Value *, unsigned> dimValueToPosition;
-
-  /// Ordered dims and symbols matching positional dims and symbols in
-  /// `affineMap`.
-  SmallVector<Value *, 8> reorderedDims;
-  SmallVector<Value *, 8> concatenatedSymbols;
-
-  AffineMap affineMap;
-
-  /// Used with RAII to control the depth at which AffineApply are composed
-  /// recursively. Only accepts depth 1 for now.
-  /// Note that if one wishes to compose all AffineApply in the program and
-  /// follows program order, maxdepth 1 is sufficient. This is as much as this
-  /// abstraction is willing to support for now.
-  static unsigned &affineApplyDepth() {
-    static thread_local unsigned depth = 0;
-    return depth;
-  }
-  static constexpr unsigned kMaxAffineApplyDepth = 1;
-
-  AffineApplyNormalizer() { affineApplyDepth()++; }
-
-public:
-  ~AffineApplyNormalizer() { affineApplyDepth()--; }
-};
-
-} // namespace
-
-AffineDimExpr AffineApplyNormalizer::applyOneDim(Value *v) {
-  DenseMap<Value *, unsigned>::iterator iterPos;
-  bool inserted = false;
-  std::tie(iterPos, inserted) =
-      dimValueToPosition.insert(std::make_pair(v, dimValueToPosition.size()));
-  if (inserted) {
-    reorderedDims.push_back(v);
-  }
-  return getAffineDimExpr(iterPos->second, v->getFunction()->getContext())
-      .cast<AffineDimExpr>();
-}
-
-AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
-  SmallVector<AffineExpr, 8> dimRemapping;
-  for (auto *v : other.reorderedDims) {
-    auto kvp = other.dimValueToPosition.find(v);
-    if (dimRemapping.size() <= kvp->second)
-      dimRemapping.resize(kvp->second + 1);
-    dimRemapping[kvp->second] = applyOneDim(kvp->first);
-  }
-  unsigned numSymbols = concatenatedSymbols.size();
-  unsigned numOtherSymbols = other.concatenatedSymbols.size();
-  SmallVector<AffineExpr, 8> symRemapping(numOtherSymbols);
-  for (unsigned idx = 0; idx < numOtherSymbols; ++idx) {
-    symRemapping[idx] =
-        getAffineSymbolExpr(idx + numSymbols, other.affineMap.getContext());
-  }
-  concatenatedSymbols.insert(concatenatedSymbols.end(),
-                             other.concatenatedSymbols.begin(),
-                             other.concatenatedSymbols.end());
-  auto map = other.affineMap;
-  return map.replaceDimsAndSymbols(dimRemapping, symRemapping,
-                                   dimRemapping.size(), symRemapping.size());
-}
-
-AffineMap AffineApplyNormalizer::renumber(const AffineApplyOp &app) {
-  assert(app.getAffineMap().getRangeSizes().empty() && "Non-empty range sizes");
-
-  // Create the AffineApplyNormalizer for the operands of this
-  // AffineApplyOp and combine it with the current AffineApplyNormalizer.
-  SmallVector<Value *, 8> operands(
-      const_cast<AffineApplyOp &>(app).getOperands().begin(),
-      const_cast<AffineApplyOp &>(app).getOperands().end());
-  AffineApplyNormalizer normalizer(app.getAffineMap(), operands);
-  return renumber(normalizer);
-}
-
-AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
-                                             ArrayRef<Value *> operands)
-    : AffineApplyNormalizer() {
-  assert(map.getRangeSizes().empty() && "Unbounded map expected");
-  assert(map.getNumInputs() == operands.size() &&
-         "number of operands does not match the number of map inputs");
-
-  SmallVector<AffineExpr, 8> exprs;
-  for (auto en : llvm::enumerate(operands)) {
-    auto *t = en.value();
-    assert(t->getType().isIndex());
-    bool operandNotFromAffineApply =
-        !t->getDefiningInst() || !t->getDefiningInst()->isa<AffineApplyOp>();
-    if (operandNotFromAffineApply ||
-        affineApplyDepth() > kMaxAffineApplyDepth) {
-      if (en.index() < map.getNumDims()) {
-        exprs.push_back(applyOneDim(t));
-      } else {
-        // Composition of mathematical symbols must occur by concatenation.
-        // A subsequent canonicalization will drop duplicates. Duplicates are
-        // not dropped here because it would just amount to code duplication.
-        concatenatedSymbols.push_back(t);
-      }
-    } else {
-      auto *inst = t->getDefiningInst();
-      auto app = inst->dyn_cast<AffineApplyOp>();
-      auto tmpMap = renumber(*app);
-      exprs.push_back(tmpMap.getResult(0));
-    }
-  }
-
-  // Map is already composed.
-  if (exprs.empty()) {
-    affineMap = map;
-    return;
-  }
-
-  auto numDims = dimValueToPosition.size();
-  auto numSymbols = concatenatedSymbols.size() - map.getNumSymbols();
-  auto exprsMap = AffineMap::get(numDims, numSymbols, exprs, {});
-  LLVM_DEBUG(map.print(dbgs() << "\nCompose map: "));
-  LLVM_DEBUG(exprsMap.print(dbgs() << "\nWith map: "));
-  LLVM_DEBUG(map.compose(exprsMap).print(dbgs() << "\nResult: "));
-
-  affineMap = simplifyAffineMap(map.compose(exprsMap));
-  LLVM_DEBUG(affineMap.print(dbgs() << "\nSimplified result: "));
-  LLVM_DEBUG(dbgs() << "\n");
-}
-
-/// Implements `map` and `operands` composition and simplification to support
-/// `makeComposedAffineApply`. This can be called to achieve the same effects
-/// on `map` and `operands` without creating an AffineApplyOp that needs to be
-/// immediately deleted.
-static void composeAffineMapAndOperands(AffineMap *map,
-                                        SmallVectorImpl<Value *> *operands) {
-  AffineApplyNormalizer normalizer(*map, *operands);
-  auto normalizedMap = normalizer.getAffineMap();
-  auto normalizedOperands = normalizer.getOperands();
-  canonicalizeMapAndOperands(&normalizedMap, &normalizedOperands);
-  *map = normalizedMap;
-  *operands = normalizedOperands;
-  assert(*map);
-}
-
-void mlir::fullyComposeAffineMapAndOperands(
-    AffineMap *map, SmallVectorImpl<Value *> *operands) {
-  while (llvm::any_of(*operands, [](Value *v) {
-    return v->getDefiningInst() && v->getDefiningInst()->isa<AffineApplyOp>();
-  })) {
-    composeAffineMapAndOperands(map, operands);
-  }
-}
-
-OpPointer<AffineApplyOp>
-mlir::makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
-                              ArrayRef<Value *> operands) {
-  AffineMap normalizedMap = map;
-  SmallVector<Value *, 8> normalizedOperands(operands.begin(), operands.end());
-  composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
-  assert(normalizedMap);
-  return b->create<AffineApplyOp>(loc, normalizedMap, normalizedOperands);
-}
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@ -23,9 +23,9 @@

 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/NestedMatcher.h"
 #include "mlir/Analysis/VectorAnalysis.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Instruction.h"
@ -147,7 +147,8 @@ bool mlir::isAccessInvariant(const Value &iv, const Value &index) {
  auto composeOp = affineApplyOps[0]->cast<AffineApplyOp>();
  // We need yet another level of indirection because the `dim` index of the
  // access may not correspond to the `dim` index of composeOp.
-  return !AffineValueMap(*composeOp).isFunctionOf(0, const_cast<Value *>(&iv));
+  return !composeOp->getAsAffineValueMap().isFunctionOf(
+      0, const_cast<Value *>(&iv));
 }

 llvm::DenseSet<const Value *>
--- a/mlir/lib/Analysis/MemRefBoundCheck.cpp
+++ b/mlir/lib/Analysis/MemRefBoundCheck.cpp
@ -21,9 +21,9 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Passes.h"
 #include "mlir/Analysis/Utils.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass.h"
--- a/mlir/lib/Analysis/MemRefDependenceCheck.cpp
+++ b/mlir/lib/Analysis/MemRefDependenceCheck.cpp
@ -20,9 +20,9 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Passes.h"
 #include "mlir/Analysis/Utils.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass.h"
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@ -24,7 +24,7 @@

 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/StandardOps/StandardOps.h"
@ -163,7 +163,7 @@ bool MemRefRegion::compute(Instruction *inst, unsigned loopDepth) {
      // bounds expressions involve outer loops or other symbols.
      // TODO(bondhugula): rewrite this to use getInstIndexSet; this way
      // conditionals will be handled when the latter supports it.
-      if (!cst.addAffineForOpDomain(loop))
+      if (!addAffineForOpDomain(loop, &cst))
        return false;
    } else {
      // Has to be a valid symbol.
--- a/mlir/lib/IR/AffineExpr.cpp
+++ b/mlir/lib/IR/AffineExpr.cpp
@ -19,6 +19,8 @@
 #include "AffineExprDetail.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
+#include "mlir/IR/AffineStructures.h"
+#include "mlir/IR/IntegerSet.h"
 #include "mlir/Support/STLExtras.h"
 #include "llvm/ADT/STLExtras.h"

@ -293,3 +295,446 @@ raw_ostream &operator<<(raw_ostream &os, AffineExpr &expr) {
  expr.print(os);
  return os;
 }
+
+/// Constructs an affine expression from a flat ArrayRef. If there are local
+/// identifiers (neither dimensional nor symbolic) that appear in the sum of
+/// products expression, 'localExprs' is expected to have the AffineExpr
+/// for it, and is substituted into. The ArrayRef 'eq' is expected to be in the
+/// format [dims, symbols, locals, constant term].
+//  TODO(bondhugula): refactor getAddMulPureAffineExpr to reuse it from here.
+static AffineExpr toAffineExpr(ArrayRef<int64_t> eq, unsigned numDims,
+                               unsigned numSymbols,
+                               ArrayRef<AffineExpr> localExprs,
+                               MLIRContext *context) {
+  // Assert expected numLocals = eq.size() - numDims - numSymbols - 1
+  assert(eq.size() - numDims - numSymbols - 1 == localExprs.size() &&
+         "unexpected number of local expressions");
+
+  auto expr = getAffineConstantExpr(0, context);
+  // Dimensions and symbols.
+  for (unsigned j = 0; j < numDims + numSymbols; j++) {
+    if (eq[j] == 0) {
+      continue;
+    }
+    auto id = j < numDims ? getAffineDimExpr(j, context)
+                          : getAffineSymbolExpr(j - numDims, context);
+    expr = expr + id * eq[j];
+  }
+
+  // Local identifiers.
+  for (unsigned j = numDims + numSymbols, e = eq.size() - 1; j < e; j++) {
+    if (eq[j] == 0) {
+      continue;
+    }
+    auto term = localExprs[j - numDims - numSymbols] * eq[j];
+    expr = expr + term;
+  }
+
+  // Constant term.
+  int64_t constTerm = eq[eq.size() - 1];
+  if (constTerm != 0)
+    expr = expr + constTerm;
+  return expr;
+}
+
+namespace {
+
+// This class is used to flatten a pure affine expression (AffineExpr,
+// which is in a tree form) into a sum of products (w.r.t constants) when
+// possible, and in that process simplifying the expression. For a modulo,
+// floordiv, or a ceildiv expression, an additional identifier, called a local
+// identifier, is introduced to rewrite the expression as a sum of product
+// affine expression. Each local identifier is always and by construction a
+// floordiv of a pure add/mul affine function of dimensional, symbolic, and
+// other local identifiers, in a non-mutually recursive way. Hence, every local
+// identifier can ultimately always be recovered as an affine function of
+// dimensional and symbolic identifiers (involving floordiv's); note however
+// that by AffineExpr construction, some floordiv combinations are converted to
+// mod's. The result of the flattening is a flattened expression and a set of
+// constraints involving just the local variables.
+//
+// d2 + (d0 + d1) floordiv 4  is flattened to d2 + q where 'q' is the local
+// variable introduced, with localVarCst containing 4*q <= d0 + d1 <= 4*q + 3.
+//
+// The simplification performed includes the accumulation of contributions for
+// each dimensional and symbolic identifier together, the simplification of
+// floordiv/ceildiv/mod expressions and other simplifications that in turn
+// happen as a result. A simplification that this flattening naturally performs
+// is of simplifying the numerator and denominator of floordiv/ceildiv, and
+// folding a modulo expression to a zero, if possible. Three examples are below:
+//
+// (d0 + 3 * d1) + d0) - 2 * d1) - d0    simplified to     d0 + d1
+// (d0 - d0 mod 4 + 4) mod 4             simplified to     0
+// (3*d0 + 2*d1 + d0) floordiv 2 + d1    simplified to     2*d0 + 2*d1
+//
+// The way the flattening works for the second example is as follows: d0 % 4 is
+// replaced by d0 - 4*q with q being introduced: the expression then simplifies
+// to: (d0 - (d0 - 4q) + 4) = 4q + 4, modulo of which w.r.t 4 simplifies to
+// zero. Note that an affine expression may not always be expressible purely as
+// a sum of products involving just the original dimensional and symbolic
+// identifiers due to the presence of modulo/floordiv/ceildiv expressions that
+// may not be eliminated after simplification; in such cases, the final
+// expression can be reconstructed by replacing the local identifiers with their
+// corresponding explicit form stored in 'localExprs' (note that each of the
+// explicit forms itself would have been simplified).
+//
+// The expression walk method here performs a linear time post order walk that
+// performs the above simplifications through visit methods, with partial
+// results being stored in 'operandExprStack'. When a parent expr is visited,
+// the flattened expressions corresponding to its two operands would already be
+// on the stack - the parent expression looks at the two flattened expressions
+// and combines the two. It pops off the operand expressions and pushes the
+// combined result (although this is done in-place on its LHS operand expr).
+// When the walk is completed, the flattened form of the top-level expression
+// would be left on the stack.
+//
+// A flattener can be repeatedly used for multiple affine expressions that bind
+// to the same operands, for example, for all result expressions of an
+// AffineMap or AffineValueMap. In such cases, using it for multiple expressions
+// is more efficient than creating a new flattener for each expression since
+// common idenical div and mod expressions appearing across different
+// expressions are mapped to the same local identifier (same column position in
+// 'localVarCst').
+struct AffineExprFlattener : public AffineExprVisitor<AffineExprFlattener> {
+public:
+  // Flattend expression layout: [dims, symbols, locals, constant]
+  // Stack that holds the LHS and RHS operands while visiting a binary op expr.
+  // In future, consider adding a prepass to determine how big the SmallVector's
+  // will be, and linearize this to std::vector<int64_t> to prevent
+  // SmallVector moves on re-allocation.
+  std::vector<SmallVector<int64_t, 8>> operandExprStack;
+  // Constraints connecting newly introduced local variables (for mod's and
+  // div's) to existing (dimensional and symbolic) ones. These are always
+  // inequalities.
+  FlatAffineConstraints localVarCst;
+
+  unsigned numDims;
+  unsigned numSymbols;
+  // Number of newly introduced identifiers to flatten mod/floordiv/ceildiv
+  // expressions that could not be simplified.
+  unsigned numLocals;
+  // AffineExpr's corresponding to the floordiv/ceildiv/mod expressions for
+  // which new identifiers were introduced; if the latter do not get canceled
+  // out, these expressions can be readily used to reconstruct the AffineExpr
+  // (tree) form. Note that these expressions themselves would have been
+  // simplified (recursively) by this pass. Eg. d0 + (d0 + 2*d1 + d0) ceildiv 4
+  // will be simplified to d0 + q, where q = (d0 + d1) ceildiv 2. (d0 + d1)
+  // ceildiv 2 would be the local expression stored for q.
+  SmallVector<AffineExpr, 4> localExprs;
+  MLIRContext *context;
+
+  AffineExprFlattener(unsigned numDims, unsigned numSymbols,
+                      MLIRContext *context)
+      : numDims(numDims), numSymbols(numSymbols), numLocals(0),
+        context(context) {
+    operandExprStack.reserve(8);
+    localVarCst.reset(numDims, numSymbols, numLocals);
+  }
+
+  void visitMulExpr(AffineBinaryOpExpr expr) {
+    assert(operandExprStack.size() >= 2);
+    // This is a pure affine expr; the RHS will be a constant.
+    assert(expr.getRHS().isa<AffineConstantExpr>());
+    // Get the RHS constant.
+    auto rhsConst = operandExprStack.back()[getConstantIndex()];
+    operandExprStack.pop_back();
+    // Update the LHS in place instead of pop and push.
+    auto &lhs = operandExprStack.back();
+    for (unsigned i = 0, e = lhs.size(); i < e; i++) {
+      lhs[i] *= rhsConst;
+    }
+  }
+
+  void visitAddExpr(AffineBinaryOpExpr expr) {
+    assert(operandExprStack.size() >= 2);
+    const auto &rhs = operandExprStack.back();
+    auto &lhs = operandExprStack[operandExprStack.size() - 2];
+    assert(lhs.size() == rhs.size());
+    // Update the LHS in place.
+    for (unsigned i = 0, e = rhs.size(); i < e; i++) {
+      lhs[i] += rhs[i];
+    }
+    // Pop off the RHS.
+    operandExprStack.pop_back();
+  }
+
+  //
+  // t = expr mod c   <=>  t = expr - c*q and c*q <= expr <= c*q + c - 1
+  //
+  // A mod expression "expr mod c" is thus flattened by introducing a new local
+  // variable q (= expr floordiv c), such that expr mod c is replaced with
+  // 'expr - c * q' and c * q <= expr <= c * q + c - 1 are added to localVarCst.
+  void visitModExpr(AffineBinaryOpExpr expr) {
+    assert(operandExprStack.size() >= 2);
+    // This is a pure affine expr; the RHS will be a constant.
+    assert(expr.getRHS().isa<AffineConstantExpr>());
+    auto rhsConst = operandExprStack.back()[getConstantIndex()];
+    operandExprStack.pop_back();
+    auto &lhs = operandExprStack.back();
+    // TODO(bondhugula): handle modulo by zero case when this issue is fixed
+    // at the other places in the IR.
+    assert(rhsConst > 0 && "RHS constant has to be positive");
+
+    // Check if the LHS expression is a multiple of modulo factor.
+    unsigned i, e;
+    for (i = 0, e = lhs.size(); i < e; i++)
+      if (lhs[i] % rhsConst != 0)
+        break;
+    // If yes, modulo expression here simplifies to zero.
+    if (i == lhs.size()) {
+      std::fill(lhs.begin(), lhs.end(), 0);
+      return;
+    }
+
+    // Add a local variable for the quotient, i.e., expr % c is replaced by
+    // (expr - q * c) where q = expr floordiv c. Do this while canceling out
+    // the GCD of expr and c.
+    SmallVector<int64_t, 8> floorDividend(lhs);
+    uint64_t gcd = rhsConst;
+    for (unsigned i = 0, e = lhs.size(); i < e; i++)
+      gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(lhs[i]));
+    // Simplify the numerator and the denominator.
+    if (gcd != 1) {
+      for (unsigned i = 0, e = floorDividend.size(); i < e; i++)
+        floorDividend[i] = floorDividend[i] / static_cast<int64_t>(gcd);
+    }
+    int64_t floorDivisor = rhsConst / static_cast<int64_t>(gcd);
+
+    // Construct the AffineExpr form of the floordiv to store in localExprs.
+    auto dividendExpr =
+        toAffineExpr(floorDividend, numDims, numSymbols, localExprs, context);
+    auto divisorExpr = getAffineConstantExpr(floorDivisor, context);
+    auto floorDivExpr = dividendExpr.floorDiv(divisorExpr);
+    int loc;
+    if ((loc = findLocalId(floorDivExpr)) == -1) {
+      addLocalFloorDivId(floorDividend, floorDivisor, floorDivExpr);
+      // Set result at top of stack to "lhs - rhsConst * q".
+      lhs[getLocalVarStartIndex() + numLocals - 1] = -rhsConst;
+    } else {
+      // Reuse the existing local id.
+      lhs[getLocalVarStartIndex() + loc] = -rhsConst;
+    }
+  }
+
+  void visitCeilDivExpr(AffineBinaryOpExpr expr) {
+    visitDivExpr(expr, /*isCeil=*/true);
+  }
+  void visitFloorDivExpr(AffineBinaryOpExpr expr) {
+    visitDivExpr(expr, /*isCeil=*/false);
+  }
+
+  void visitDimExpr(AffineDimExpr expr) {
+    operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
+    auto &eq = operandExprStack.back();
+    assert(expr.getPosition() < numDims && "Inconsistent number of dims");
+    eq[getDimStartIndex() + expr.getPosition()] = 1;
+  }
+
+  void visitSymbolExpr(AffineSymbolExpr expr) {
+    operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
+    auto &eq = operandExprStack.back();
+    assert(expr.getPosition() < numSymbols && "inconsistent number of symbols");
+    eq[getSymbolStartIndex() + expr.getPosition()] = 1;
+  }
+
+  void visitConstantExpr(AffineConstantExpr expr) {
+    operandExprStack.emplace_back(SmallVector<int64_t, 32>(getNumCols(), 0));
+    auto &eq = operandExprStack.back();
+    eq[getConstantIndex()] = expr.getValue();
+  }
+
+private:
+  // t = expr floordiv c   <=> t = q, c * q <= expr <= c * q + c - 1
+  // A floordiv is thus flattened by introducing a new local variable q, and
+  // replacing that expression with 'q' while adding the constraints
+  // c * q <= expr <= c * q + c - 1 to localVarCst (done by
+  // FlatAffineConstraints::addLocalFloorDiv).
+  //
+  // A ceildiv is similarly flattened:
+  // t = expr ceildiv c   <=> t =  (expr + c - 1) floordiv c
+  void visitDivExpr(AffineBinaryOpExpr expr, bool isCeil) {
+    assert(operandExprStack.size() >= 2);
+    assert(expr.getRHS().isa<AffineConstantExpr>());
+
+    // This is a pure affine expr; the RHS is a positive constant.
+    int64_t rhsConst = operandExprStack.back()[getConstantIndex()];
+    // TODO(bondhugula): handle division by zero at the same time the issue is
+    // fixed at other places.
+    assert(rhsConst > 0 && "RHS constant has to be positive");
+    operandExprStack.pop_back();
+    auto &lhs = operandExprStack.back();
+
+    // Simplify the floordiv, ceildiv if possible by canceling out the greatest
+    // common divisors of the numerator and denominator.
+    uint64_t gcd = std::abs(rhsConst);
+    for (unsigned i = 0, e = lhs.size(); i < e; i++)
+      gcd = llvm::GreatestCommonDivisor64(gcd, std::abs(lhs[i]));
+    // Simplify the numerator and the denominator.
+    if (gcd != 1) {
+      for (unsigned i = 0, e = lhs.size(); i < e; i++)
+        lhs[i] = lhs[i] / static_cast<int64_t>(gcd);
+    }
+    int64_t divisor = rhsConst / static_cast<int64_t>(gcd);
+    // If the divisor becomes 1, the updated LHS is the result. (The
+    // divisor can't be negative since rhsConst is positive).
+    if (divisor == 1)
+      return;
+
+    // If the divisor cannot be simplified to one, we will have to retain
+    // the ceil/floor expr (simplified up until here). Add an existential
+    // quantifier to express its result, i.e., expr1 div expr2 is replaced
+    // by a new identifier, q.
+    auto a = toAffineExpr(lhs, numDims, numSymbols, localExprs, context);
+    auto b = getAffineConstantExpr(divisor, context);
+
+    int loc;
+    auto divExpr = isCeil ? a.ceilDiv(b) : a.floorDiv(b);
+    if ((loc = findLocalId(divExpr)) == -1) {
+      if (!isCeil) {
+        SmallVector<int64_t, 8> dividend(lhs);
+        addLocalFloorDivId(dividend, divisor, divExpr);
+      } else {
+        // lhs ceildiv c <=>  (lhs + c - 1) floordiv c
+        SmallVector<int64_t, 8> dividend(lhs);
+        dividend.back() += divisor - 1;
+        addLocalFloorDivId(dividend, divisor, divExpr);
+      }
+    }
+    // Set the expression on stack to the local var introduced to capture the
+    // result of the division (floor or ceil).
+    std::fill(lhs.begin(), lhs.end(), 0);
+    if (loc == -1)
+      lhs[getLocalVarStartIndex() + numLocals - 1] = 1;
+    else
+      lhs[getLocalVarStartIndex() + loc] = 1;
+  }
+
+  // Add a local identifier (needed to flatten a mod, floordiv, ceildiv expr).
+  // The local identifier added is always a floordiv of a pure add/mul affine
+  // function of other identifiers, coefficients of which are specified in
+  // dividend and with respect to a positive constant divisor. localExpr is the
+  // simplified tree expression (AffineExpr) corresponding to the quantifier.
+  void addLocalFloorDivId(ArrayRef<int64_t> dividend, int64_t divisor,
+                          AffineExpr localExpr) {
+    assert(divisor > 0 && "positive constant divisor expected");
+    for (auto &subExpr : operandExprStack)
+      subExpr.insert(subExpr.begin() + getLocalVarStartIndex() + numLocals, 0);
+    localExprs.push_back(localExpr);
+    numLocals++;
+    // Update localVarCst.
+    localVarCst.addLocalFloorDiv(dividend, divisor);
+  }
+
+  int findLocalId(AffineExpr localExpr) {
+    SmallVectorImpl<AffineExpr>::iterator it;
+    if ((it = std::find(localExprs.begin(), localExprs.end(), localExpr)) ==
+        localExprs.end())
+      return -1;
+    return it - localExprs.begin();
+  }
+
+  inline unsigned getNumCols() const {
+    return numDims + numSymbols + numLocals + 1;
+  }
+  inline unsigned getConstantIndex() const { return getNumCols() - 1; }
+  inline unsigned getLocalVarStartIndex() const { return numDims + numSymbols; }
+  inline unsigned getSymbolStartIndex() const { return numDims; }
+  inline unsigned getDimStartIndex() const { return 0; }
+};
+
+} // end anonymous namespace
+
+/// Simplify the affine expression by flattening it and reconstructing it.
+AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims,
+                                    unsigned numSymbols) {
+  // TODO(bondhugula): only pure affine for now. The simplification here can
+  // be extended to semi-affine maps in the future.
+  if (!expr.isPureAffine())
+    return expr;
+
+  AffineExprFlattener flattener(numDims, numSymbols, expr.getContext());
+  flattener.walkPostOrder(expr);
+  ArrayRef<int64_t> flattenedExpr = flattener.operandExprStack.back();
+  auto simplifiedExpr = toAffineExpr(flattenedExpr, numDims, numSymbols,
+                                     flattener.localExprs, expr.getContext());
+  flattener.operandExprStack.pop_back();
+  assert(flattener.operandExprStack.empty());
+
+  return simplifiedExpr;
+}
+
+// Flattens the expressions in map. Returns true on success or false
+// if 'expr' was unable to be flattened (i.e., semi-affine expressions not
+// handled yet).
+static bool getFlattenedAffineExprs(
+    ArrayRef<AffineExpr> exprs, unsigned numDims, unsigned numSymbols,
+    std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
+    FlatAffineConstraints *localVarCst) {
+  if (exprs.empty()) {
+    localVarCst->reset(numDims, numSymbols);
+    return true;
+  }
+
+  flattenedExprs->clear();
+  flattenedExprs->reserve(exprs.size());
+
+  AffineExprFlattener flattener(numDims, numSymbols, exprs[0].getContext());
+  // Use the same flattener to simplify each expression successively. This way
+  // local identifiers / expressions are shared.
+  for (auto expr : exprs) {
+    if (!expr.isPureAffine())
+      return false;
+
+    flattener.walkPostOrder(expr);
+  }
+
+  assert(flattener.operandExprStack.size() == exprs.size());
+  flattenedExprs->insert(flattenedExprs->end(),
+                         flattener.operandExprStack.begin(),
+                         flattener.operandExprStack.end());
+  if (localVarCst)
+    localVarCst->clearAndCopyFrom(flattener.localVarCst);
+
+  return true;
+}
+
+// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
+// if 'expr' was unable to be flattened (semi-affine expressions not handled
+// yet).
+bool mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
+                                  unsigned numSymbols,
+                                  llvm::SmallVectorImpl<int64_t> *flattenedExpr,
+                                  FlatAffineConstraints *localVarCst) {
+  std::vector<SmallVector<int64_t, 8>> flattenedExprs;
+  bool ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols,
+                                       &flattenedExprs, localVarCst);
+  *flattenedExpr = flattenedExprs[0];
+  return ret;
+}
+
+/// Flattens the expressions in map. Returns true on success or false
+/// if 'expr' was unable to be flattened (i.e., semi-affine expressions not
+/// handled yet).
+bool mlir::getFlattenedAffineExprs(
+    AffineMap map, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
+    FlatAffineConstraints *localVarCst) {
+  if (map.getNumResults() == 0) {
+    localVarCst->reset(map.getNumDims(), map.getNumSymbols());
+    return true;
+  }
+  return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(),
+                                   map.getNumSymbols(), flattenedExprs,
+                                   localVarCst);
+}
+
+bool mlir::getFlattenedAffineExprs(
+    IntegerSet set, std::vector<llvm::SmallVector<int64_t, 8>> *flattenedExprs,
+    FlatAffineConstraints *localVarCst) {
+  if (set.getNumConstraints() == 0) {
+    localVarCst->reset(set.getNumDims(), set.getNumSymbols());
+    return true;
+  }
+  return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(),
+                                   set.getNumSymbols(), flattenedExprs,
+                                   localVarCst);
+}
--- a/mlir/lib/IR/AffineMap.cpp
+++ b/mlir/lib/IR/AffineMap.cpp
@ -246,3 +246,16 @@ AffineMap AffineMap::compose(AffineMap map) {
    exprs.push_back(expr.compose(newMap));
  return AffineMap::get(numDims, numSymbols, exprs, {});
 }
+
+AffineMap mlir::simplifyAffineMap(AffineMap map) {
+  SmallVector<AffineExpr, 8> exprs, sizes;
+  for (auto e : map.getResults()) {
+    exprs.push_back(
+        simplifyAffineExpr(e, map.getNumDims(), map.getNumSymbols()));
+  }
+  for (auto e : map.getRangeSizes()) {
+    sizes.push_back(
+        simplifyAffineExpr(e, map.getNumDims(), map.getNumSymbols()));
+  }
+  return AffineMap::get(map.getNumDims(), map.getNumSymbols(), exprs, sizes);
+}
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@ -19,9 +19,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/AffineOps/AffineOps.h"
-#include "mlir/Analysis/AffineAnalysis.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/AffineExprVisitor.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BuiltinOps.h"
@ -102,27 +100,16 @@ MutableIntegerSet::MutableIntegerSet(unsigned numDims, unsigned numSymbols,
 // AffineValueMap.
 //===----------------------------------------------------------------------===//

-AffineValueMap::AffineValueMap(const AffineApplyOp &op)
-    : map(op.getAffineMap()) {
-  for (auto *operand : op.getOperands())
-    operands.push_back(const_cast<Value *>(operand));
-  results.push_back(const_cast<Value *>(op.getResult()));
-}
+AffineValueMap::AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
+                               ArrayRef<Value *> results)
+    : map(map), operands(operands.begin(), operands.end()),
+      results(results.begin(), results.end()) {}

-AffineValueMap::AffineValueMap(AffineMap map, ArrayRef<Value *> operands)
-    : map(map) {
-  for (Value *operand : operands) {
-    this->operands.push_back(operand);
-  }
-}
-
-void AffineValueMap::reset(AffineMap map, ArrayRef<Value *> operands) {
-  this->operands.clear();
-  this->results.clear();
+void AffineValueMap::reset(AffineMap map, ArrayRef<Value *> operands,
+                           ArrayRef<Value *> results) {
  this->map.reset(map);
-  for (Value *operand : operands) {
-    this->operands.push_back(operand);
-  }
+  this->operands.assign(operands.begin(), operands.end());
+  this->results.assign(results.begin(), results.end());
 }

 // Returns true and sets 'indexOfMatch' if 'valueToMatch' is found in
@ -1248,97 +1235,6 @@ void FlatAffineConstraints::setDimSymbolSeparation(unsigned newSymbolCount) {
  numSymbols = newSymbolCount;
 }

-bool FlatAffineConstraints::addAffineForOpDomain(
-    ConstOpPointer<AffineForOp> forOp) {
-  unsigned pos;
-  // Pre-condition for this method.
-  if (!findId(*forOp->getInductionVar(), &pos)) {
-    assert(0 && "Value not found");
-    return false;
-  }
-
-  if (forOp->getStep() != 1)
-    LLVM_DEBUG(llvm::dbgs()
-               << "Domain conservative: non-unit stride not handled\n");
-
-  // Adds a lower or upper bound when the bounds aren't constant.
-  auto addLowerOrUpperBound = [&](bool lower) -> bool {
-    auto operands =
-        lower ? forOp->getLowerBoundOperands() : forOp->getUpperBoundOperands();
-    for (const auto &operand : operands) {
-      unsigned loc;
-      if (!findId(*operand, &loc)) {
-        if (isValidSymbol(operand)) {
-          addSymbolId(getNumSymbolIds(), const_cast<Value *>(operand));
-          loc = getNumDimIds() + getNumSymbolIds() - 1;
-          // Check if the symbol is a constant.
-          if (auto *opInst = operand->getDefiningInst()) {
-            if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
-              setIdToConstant(*operand, constOp->getValue());
-            }
-          }
-        } else {
-          addDimId(getNumDimIds(), const_cast<Value *>(operand));
-          loc = getNumDimIds() - 1;
-        }
-      }
-    }
-    // Record positions of the operands in the constraint system.
-    SmallVector<unsigned, 8> positions;
-    for (const auto &operand : operands) {
-      unsigned loc;
-      if (!findId(*operand, &loc))
-        assert(0 && "expected to be found");
-      positions.push_back(loc);
-    }
-
-    auto boundMap =
-        lower ? forOp->getLowerBoundMap() : forOp->getUpperBoundMap();
-
-    FlatAffineConstraints localVarCst;
-    std::vector<SmallVector<int64_t, 8>> flatExprs;
-    if (!getFlattenedAffineExprs(boundMap, &flatExprs, &localVarCst)) {
-      LLVM_DEBUG(llvm::dbgs() << "semi-affine expressions not yet supported\n");
-      return false;
-    }
-    if (localVarCst.getNumLocalIds() > 0) {
-      LLVM_DEBUG(llvm::dbgs()
-                 << "loop bounds with mod/floordiv expr's not yet supported\n");
-      return false;
-    }
-
-    for (const auto &flatExpr : flatExprs) {
-      SmallVector<int64_t, 4> ineq(getNumCols(), 0);
-      ineq[pos] = lower ? 1 : -1;
-      for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) {
-        ineq[positions[j]] = lower ? -flatExpr[j] : flatExpr[j];
-      }
-      // Constant term.
-      ineq[getNumCols() - 1] =
-          lower ? -flatExpr[flatExpr.size() - 1]
-                // Upper bound in flattenedExpr is an exclusive one.
-                : flatExpr[flatExpr.size() - 1] - 1;
-      addInequality(ineq);
-    }
-    return true;
-  };
-
-  if (forOp->hasConstantLowerBound()) {
-    addConstantLowerBound(pos, forOp->getConstantLowerBound());
-  } else {
-    // Non-constant lower bound case.
-    if (!addLowerOrUpperBound(/*lower=*/true))
-      return false;
-  }
-
-  if (forOp->hasConstantUpperBound()) {
-    addConstantUpperBound(pos, forOp->getConstantUpperBound() - 1);
-    return true;
-  }
-  // Non-constant upper bound case.
-  return addLowerOrUpperBound(/*lower=*/false);
-}
-
 /// Sets the specified identifer to a constant value.
 void FlatAffineConstraints::setIdToConstant(unsigned pos, int64_t val) {
  unsigned offset = equalities.size();
--- a/mlir/lib/Transforms/ComposeAffineMaps.cpp
+++ b/mlir/lib/Transforms/ComposeAffineMaps.cpp
@ -1,96 +0,0 @@
-//===- ComposeAffineMaps.cpp - MLIR Affine Transform Class-----*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements a testing pass which composes affine maps from
-// AffineApplyOps in a Function, by forward subtituting results from an
-// AffineApplyOp into any of its users which are also AffineApplyOps.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/AffineOps/AffineOps.h"
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass.h"
-#include "mlir/StandardOps/StandardOps.h"
-#include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace mlir;
-
-namespace {
-
-// ComposeAffineMaps walks all affine apply op's in a function, and for each
-// such op, composes into it the results of any other AffineApplyOps - so
-// that all operands of the composed AffineApplyOp are guaranteed to be either
-// loop IVs or terminal symbols, (i.e., Values that are themselves not the
-// result of any AffineApplyOp). After this composition, AffineApplyOps with no
-// remaining uses are erased.
-// TODO(andydavis) Remove this when Chris adds instruction combiner pass.
-struct ComposeAffineMaps : public FunctionPass {
-  explicit ComposeAffineMaps() : FunctionPass(&ComposeAffineMaps::passID) {}
-  PassResult runOnFunction(Function *f) override;
-
-  SmallVector<OpPointer<AffineApplyOp>, 8> affineApplyOps;
-
-  static char passID;
-};
-
-} // end anonymous namespace
-
-char ComposeAffineMaps::passID = 0;
-
-FunctionPass *mlir::createComposeAffineMapsPass() {
-  return new ComposeAffineMaps();
-}
-
-static bool affineApplyOp(const Instruction &inst) {
-  return inst.isa<AffineApplyOp>();
-}
-
-PassResult ComposeAffineMaps::runOnFunction(Function *f) {
-  // If needed for future efficiency, reserve space based on a pre-walk.
-  affineApplyOps.clear();
-  f->walk<AffineApplyOp>(
-      [&](OpPointer<AffineApplyOp> afOp) { affineApplyOps.push_back(afOp); });
-  for (auto afOp : affineApplyOps) {
-    SmallVector<Value *, 8> operands(afOp->getOperands());
-    FuncBuilder b(afOp->getInstruction());
-    auto newAfOp = makeComposedAffineApply(&b, afOp->getLoc(),
-                                           afOp->getAffineMap(), operands);
-    afOp->replaceAllUsesWith(newAfOp);
-  }
-
-  // Erase dead affine apply ops.
-  affineApplyOps.clear();
-  f->walk<AffineApplyOp>(
-      [&](OpPointer<AffineApplyOp> afOp) { affineApplyOps.push_back(afOp); });
-  for (auto it = affineApplyOps.rbegin(); it != affineApplyOps.rend(); ++it) {
-    if ((*it)->use_empty()) {
-      (*it)->erase();
-    }
-  }
-
-  return success();
-}
-
-static PassRegistration<ComposeAffineMaps> pass("compose-affine-maps",
-                                                "Compose affine maps");
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@ -22,8 +22,8 @@
 //===----------------------------------------------------------------------===//

 #include "mlir/AffineOps/AffineOps.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Utils.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass.h"
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@ -21,11 +21,11 @@

 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/LoopAnalysis.h"
 #include "mlir/Analysis/Utils.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass.h"
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@ -21,8 +21,8 @@

 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/LoopAnalysis.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass.h"
 #include "mlir/Transforms/LoopUtils.h"
--- a/mlir/lib/Transforms/SimplifyAffineStructures.cpp
+++ b/mlir/lib/Transforms/SimplifyAffineStructures.cpp
@ -19,7 +19,7 @@
 //
 //===----------------------------------------------------------------------===//

-#include "mlir/Analysis/AffineStructures.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Function.h"
 #include "mlir/IR/Instruction.h"
 #include "mlir/IR/IntegerSet.h"
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@ -24,9 +24,9 @@

 #include "mlir/AffineOps/AffineOps.h"
 #include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/Dominance.h"
 #include "mlir/Analysis/Utils.h"
+#include "mlir/IR/AffineStructures.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/Module.h"
 #include "mlir/StandardOps/StandardOps.h"
--- a/mlir/test/Transforms/compose-affine-maps.mlir
+++ b/mlir/test/Transforms/compose-affine-maps.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -compose-affine-maps | FileCheck %s
+// RUN: mlir-opt %s -canonicalize | FileCheck %s

 // Affine maps for test case: compose_affine_maps_1dto2d_no_symbols
 // CHECK-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 - 1)
@ -15,8 +15,8 @@
 // Affine maps for test case: compose_affine_maps_dependent_loads
 // CHECK-DAG: [[MAP9:#map[0-9]+]] = (d0)[s0] -> (d0 + s0)
 // CHECK-DAG: [[MAP10:#map[0-9]+]] = (d0)[s0] -> (d0 * s0)
-// CHECK-DAG: [[MAP12A:#map[0-9]+]] = (d0)[s0, s1] -> ((d0 - s1) * s0)
-// CHECK-DAG: [[MAP12B:#map[0-9]+]] = (d0)[s0, s1] -> ((d0 + s1) ceildiv s0)
+// CHECK-DAG: [[MAP11:#map[0-9]+]] = (d0)[s0, s1] -> ((d0 + s1) ceildiv s0)
+// CHECK-DAG: [[MAP12:#map[0-9]+]] = (d0)[s0] -> ((d0 - s0) * s0)

 // Affine maps for test case: compose_affine_maps_diamond_dependency
 // CHECK-DAG: [[MAP13A:#map[0-9]+]] = (d0) -> ((d0 + 6) ceildiv 8)
@ -25,11 +25,8 @@
 // Affine maps for test case: arg_used_as_dim_and_symbol
 // CHECK-DAG: [[MAP14:#map[0-9]+]] = (d0, d1, d2)[s0, s1] -> (-d0 - d1 + d2 + s0 + s1)

-// Affine maps for test case: zero_map
-// CHECK-DAG: [[MAP15:#map[0-9]+]] = ()[s0] -> (s0)
-
-// Affine maps for test case: zero_map
-// CHECK-DAG: [[MAP16:#map[0-9]+]] = () -> (0)
+// Affine maps for test case: partial_fold_map
+// CHECK-DAG: [[MAP15:#map[0-9]+]] = (d0, d1) -> (d0 - d1)

 // CHECK-LABEL: func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_no_symbols() {
@ -86,8 +83,7 @@ func @compose_affine_maps_1dto2d_with_symbols() {
    %c4 = constant 4 : index
    %x0 = affine_apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]

-    // CHECK: constant 4
-    // CHECK-NEXT: [[I0:%[0-9]+]] = affine_apply [[MAP4]](%i0)[%c4]
+    // CHECK: [[I0:%[0-9]+]] = affine_apply [[MAP4]](%i0)[%c4]
    // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I0]], [[I0]]{{\]}}
    %v0 = load %0[%x0, %x0] : memref<4x4xf32>

@ -187,8 +183,8 @@ func @compose_affine_maps_dependent_loads() {
        %x11 = affine_apply (d0, d1)[s0, s1] -> (d1 ceildiv s0)
           (%x01, %x00)[%c7, %c3]

-        // CHECK-NEXT: [[I2A:%[0-9]+]] = affine_apply [[MAP12A]](%i1)[%c3, %c7]
-        // CHECK-NEXT: [[I2B:%[0-9]+]] = affine_apply [[MAP12B]](%i0)[%c7, %c3]
+        // CHECK-NEXT: [[I2A:%[0-9]+]] = affine_apply [[MAP12]](%i1)[%c7]
+        // CHECK-NEXT: [[I2B:%[0-9]+]] = affine_apply [[MAP11]](%i0)[%c3, %c7]
        // CHECK-NEXT: load %{{[0-9]+}}{{\[}}[[I2A]], [[I2B]]{{\]}}
        %v3 = load %0[%x10, %x11] : memref<16x32xf32>
      }
@ -216,7 +212,7 @@ func @compose_affine_maps_diamond_dependency() {
  return
 }

-// CHECK-LABEL: func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
+// CHECK-LABEL: func @arg_used_as_dim_and_symbol
 func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
  %c9 = constant 9 : index
  %1 = alloc() : memref<100x100xf32, 1>
@ -237,19 +233,31 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {

 // CHECK-LABEL: func @trivial_maps
 func @trivial_maps() {
+  // CHECK-NOT: affine_apply
+
  %0 = alloc() : memref<10xf32>
  %c0 = constant 0 : index
  %cst = constant 0.000000e+00 : f32
  for %i1 = 0 to 10 {
    %1 = affine_apply ()[s0] -> (s0)()[%c0]
-    // CHECK: {{.*}} = affine_apply [[MAP15]]()[%c0]
    store %cst, %0[%1] : memref<10xf32>
    %2 = load %0[%c0] : memref<10xf32>

    %3 = affine_apply ()[] -> (0)()[]
-    // CHECK: {{.*}} = affine_apply [[MAP16]]()
    store %cst, %0[%3] : memref<10xf32>
    %4 = load %0[%c0] : memref<10xf32>
  }
  return
 }
+
+// CHECK-LABEL: func @partial_fold_map
+func @partial_fold_map(%arg0: memref<index>, %arg1: index, %arg2: index) {
+  // TODO: Constant fold one index into affine_apply
+  %c42 = constant 42 : index
+  %2 = affine_apply (d0, d1) -> (d0 - d1) (%arg1, %c42)
+  store %2, %arg0[] : memref<index>
+  // CHECK: [[X:%[0-9]+]] = affine_apply [[MAP15]](%arg1, %c42)
+  // CHECK-NEXT: store [[X]], %arg0
+
+  return
+}
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@ -1,11 +1,5 @@
 // RUN: mlir-opt %s -canonicalize | FileCheck %s

-// CHECK-DAG: [[D0M1:#map.*]] = (d0) -> (d0 - 1)
-// CHECK-DAG: [[D0MD1:#map.*]] = (d0, d1) -> (d0 - d1)
-// CHECK-DAG: [[D0PD0:#map.*]] = (d0) -> (d0 + d0)
-// CHECK-DAG: [[D0P2:#map.*]] = (d0) -> (d0 + 2)
-// CHECK-DAG: [[DEDUPMAP:#map.*]] = (d0)[s0, s1] -> (d0 - d0 + s0 + s1 + s0 + s1 - 1)
-
 // CHECK-LABEL: func @test_subi_zero
 func @test_subi_zero(%arg0: i32) -> i32 {
  // CHECK-NEXT: %c0_i32 = constant 0 : i32
@ -267,45 +261,6 @@ func @const_fold_propagate() -> memref<?x?xf32> {
  // CHECK: = alloc() : memref<64x32xf32>
  %Av = alloc(%VT_i_s, %VT_k_l) : memref<?x?xf32>
  return %Av : memref<?x?xf32>
- }
-
-
-// CHECK-LABEL: func @simplify_affine_apply
-func @simplify_affine_apply(%arg0: memref<index>, %arg1: index, %arg2: index) {
-  // Only uses d1, not d0.
-  %0 = affine_apply (d0, d1) -> (d1 - 1) (%arg1, %arg2)
-  store %0, %arg0[] : memref<index>
-  // CHECK: [[X:%[0-9]+]] = affine_apply [[D0M1]](%arg2)
-  // CHECK-NEXT: store [[X]], %arg0
-
-  // TODO: Constant fold one index into affine_apply
-  %c42 = constant 42 : index
-  %2 = affine_apply (d0, d1) -> (d0 - d1) (%arg1, %c42)
-  store %2, %arg0[] : memref<index>
-  // CHECK: [[X:%[0-9]+]] = affine_apply [[D0MD1]](%arg1, %c42)
-  // CHECK-NEXT: store [[X]], %arg0
-
-  %3 = affine_apply (d0, d1) -> (d0 + d1) (%arg1, %arg1)
-  store %3, %arg0[] : memref<index>
-  // CHECK: [[X:%[0-9]+]] = affine_apply [[D0PD0]](%arg1)
-  // CHECK-NEXT: store [[X]], %arg0
-
-  // TODO: Compose affine maps.
-  %x0 = affine_apply (d0) -> (d0 - 1) (%arg1)
-  %x1 = affine_apply (d0) -> (d0+2) (%x0)
-  store %x1, %arg0[] : memref<index>
-
-  // CHECK: [[X:%[0-9]+]] = affine_apply [[D0M1]](%arg1)
-  // CHECK-NEXT: [[Y:%[0-9]+]] = affine_apply [[D0P2]]([[X]])
-  // CHECK-NEXT: store [[Y]], %arg0
-
-  // Drop redundant exprs and symbols.
-  %dedup = affine_apply (d0, d1) [s0, s1, s2, s3] -> (d0 - d1 - 1 + s0 + s1 + s2 + s3) (%arg1, %arg1)[%arg2, %arg1, %arg2, %arg1]
-  store %dedup, %arg0[] : memref<index>
-  // CHECK: [[DEDUP:%.+]] = affine_apply [[DEDUPMAP]](%arg1)[%arg2, %arg1]
-  // CHECK-NEXT: store [[DEDUP]], %arg0
-
-  return
 }

 // CHECK-LABEL: func @cond_br_folding