[mlir][SCF] NFC - Drop SCF EDSC usage

Drop the SCF dialect EDSC subdirectory and update all uses. Differential Revision: https://reviews.llvm.org/D102780
2021-05-19 15:41:54 +00:00 · 2021-05-19 15:41:54 +00:00 · 84a880e1e2
parent 9383e9c1e6
commit 84a880e1e2
16 changed files with 989 additions and 1472 deletions
--- a/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/Affine/EDSC/Builders.h
@ -89,132 +89,6 @@ Value uge(Value lhs, Value rhs);

 } // namespace op

-/// Arithmetic operator overloadings.
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator+(Value e) {
-  using op::operator+;
-  return static_cast<Value>(*this) + e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator-(Value e) {
-  using op::operator-;
-  return static_cast<Value>(*this) - e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator*(Value e) {
-  using op::operator*;
-  return static_cast<Value>(*this) * e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator/(Value e) {
-  using op::operator/;
-  return static_cast<Value>(*this) / e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator%(Value e) {
-  using op::operator%;
-  return static_cast<Value>(*this) % e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator^(Value e) {
-  using op::operator^;
-  return static_cast<Value>(*this) ^ e;
-}
-
-/// Assignment-arithmetic operator overloadings.
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator+=(Value e) {
-  using op::operator+;
-  return Store(*this + e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator-=(Value e) {
-  using op::operator-;
-  return Store(*this - e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator*=(Value e) {
-  using op::operator*;
-  return Store(*this * e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator/=(Value e) {
-  using op::operator/;
-  return Store(*this / e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator%=(Value e) {
-  using op::operator%;
-  return Store(*this % e, getBase(), indices);
-}
-template <typename Load, typename Store>
-Store TemplatedIndexedValue<Load, Store>::operator^=(Value e) {
-  using op::operator^;
-  return Store(*this ^ e, getBase(), indices);
-}
-
-/// Logical operator overloadings.
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator&&(Value e) {
-  using op::operator&&;
-  return static_cast<Value>(*this) && e;
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::operator||(Value e) {
-  using op::operator||;
-  return static_cast<Value>(*this) || e;
-}
-
-/// Comparison operator overloadings.
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::eq(Value e) {
-  return eq(value, e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ne(Value e) {
-  return ne(value, e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::slt(Value e) {
-  using op::slt;
-  return slt(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::sle(Value e) {
-  using op::sle;
-  return sle(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::sgt(Value e) {
-  using op::sgt;
-  return sgt(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::sge(Value e) {
-  using op::sge;
-  return sge(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ult(Value e) {
-  using op::ult;
-  return ult(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ule(Value e) {
-  using op::ule;
-  return ule(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::ugt(Value e) {
-  using op::ugt;
-  return ugt(static_cast<Value>(*this), e);
-}
-template <typename Load, typename Store>
-Value TemplatedIndexedValue<Load, Store>::uge(Value e) {
-  using op::uge;
-  return uge(static_cast<Value>(*this), e);
-}
-
 } // namespace edsc
 } // namespace mlir

--- a/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/Affine/EDSC/Intrinsics.h
@ -21,9 +21,6 @@ using affine_min = ValueBuilder<AffineMinOp>;
 using affine_max = ValueBuilder<AffineMaxOp>;
 using affine_store = OperationBuilder<AffineStoreOp>;

-/// Provide an index notation around affine_load and affine_store.
-using AffineIndexedValue = TemplatedIndexedValue<affine_load, affine_store>;
-
 } // namespace intrinsics
 } // namespace edsc
 } // namespace mlir
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@ -244,19 +244,15 @@ struct RegionMatcher {
 /// Utility class used to generate nested loops with ranges described by
 /// `loopRanges` and loop type described by the `iteratorTypes`. `bodyBuilderFn`
 /// is used to generate the body of the innermost loop. It is passed a range
-/// of loop induction variables.
+/// of loop induction variables and a range of iterArgs.
 template <typename LoopTy>
 struct GenerateLoopNest {
-  using IndexedValueTy =
-      typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
-                                edsc::intrinsics::AffineIndexedValue,
-                                edsc::intrinsics::MemRefIndexedValue>::type;
-
-  static void
-  doit(ArrayRef<Range> loopRanges, LinalgOp linalgOp,
-       ArrayRef<Attribute> iteratorTypes,
-       function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
-       Optional<LinalgLoopDistributionOptions> = None);
+  static void doit(OpBuilder &b, Location loc, ArrayRef<Range> loopRanges,
+                   LinalgOp linalgOp, ArrayRef<Attribute> iteratorTypes,
+                   function_ref<scf::ValueVector(OpBuilder &, Location,
+                                                 ValueRange, ValueRange)>
+                       bodyBuilderFn,
+                   Optional<LinalgLoopDistributionOptions> = None);
 };

 } // namespace linalg
--- a/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/MemRef/EDSC/Intrinsics.h
@ -31,9 +31,6 @@ using memref_tensor_load = ValueBuilder<memref::TensorLoadOp>;
 using memref_tensor_store = OperationBuilder<memref::TensorStoreOp>;
 using memref_view = ValueBuilder<memref::ViewOp>;

-/// Provide an index notation around memref_load and memref_store.
-using MemRefIndexedValue =
-    TemplatedIndexedValue<intrinsics::memref_load, intrinsics::memref_store>;
 } // namespace intrinsics
 } // namespace edsc
 } // namespace mlir
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
@ -1,56 +0,0 @@
-//===- Builders.h - MLIR Declarative Builder Classes ------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Provides intuitive composable interfaces for building structured MLIR
-// snippets in a declarative fashion.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
-#define MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
-
-#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/EDSC/Builders.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Types.h"
-
-namespace mlir {
-namespace edsc {
-
-/// Adapters for building loop nests using the builder and the location stored
-/// in ScopedContext. Actual builders are in scf::buildLoopNest.
-scf::LoopNest loopNestBuilder(ValueRange lbs, ValueRange ubs,
-                                 ValueRange steps,
-                                 function_ref<void(ValueRange)> fun = nullptr);
-scf::LoopNest loopNestBuilder(Value lb, Value ub, Value step,
-                                 function_ref<void(Value)> fun = nullptr);
-scf::LoopNest loopNestBuilder(
-    Value lb, Value ub, Value step, ValueRange iterArgInitValues,
-    function_ref<scf::ValueVector(Value, ValueRange)> fun = nullptr);
-scf::LoopNest loopNestBuilder(
-    ValueRange lbs, ValueRange ubs, ValueRange steps,
-    ValueRange iterArgInitValues,
-    function_ref<scf::ValueVector(ValueRange, ValueRange)> fun = nullptr);
-
-/// Adapters for building if conditions using the builder and the location
-/// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted
-/// if the condition should not have an 'else' part.
-/// When `ifOp` is specified, the scf::IfOp is captured. This is particularly
-/// convenient for 0-result conditions.
-ValueRange conditionBuilder(TypeRange results, Value condition,
-                            function_ref<scf::ValueVector()> thenBody,
-                            function_ref<scf::ValueVector()> elseBody = nullptr,
-                            scf::IfOp *ifOp = nullptr);
-ValueRange conditionBuilder(Value condition, function_ref<void()> thenBody,
-                            function_ref<void()> elseBody = nullptr,
-                            scf::IfOp *ifOp = nullptr);
-
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SCF_EDSC_BUILDERS_H_
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Intrinsics.h
@ -1,24 +0,0 @@
-//===- Intrinsics.h - MLIR EDSC Intrinsics for SCF --------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM
-// Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
-#define MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
-
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
-
-namespace mlir {
-namespace edsc {
-namespace intrinsics {
-
-using loop_yield = OperationBuilder<scf::YieldOp>;
-
-} // namespace intrinsics
-} // namespace edsc
-} // namespace mlir
-
-#endif // MLIR_DIALECT_SCF_EDSC_INTRINSICS_H_
--- a/mlir/include/mlir/EDSC/Builders.h
+++ b/mlir/include/mlir/EDSC/Builders.h
@ -169,172 +169,6 @@ private:
  SmallVector<AffineExpr, 4> exprs;
 };

-/// A TemplatedIndexedValue brings an index notation over the template Load and
-/// Store parameters. Assigning to an IndexedValue emits an actual `Store`
-/// operation, while converting an IndexedValue to a Value emits an actual
-/// `Load` operation.
-template <typename Load, typename Store>
-class TemplatedIndexedValue {
-public:
-  explicit TemplatedIndexedValue(Value v) : value(v) {}
-
-  TemplatedIndexedValue(const TemplatedIndexedValue &rhs) = default;
-
-  TemplatedIndexedValue operator()() { return *this; }
-  /// Returns a new `TemplatedIndexedValue`.
-  TemplatedIndexedValue operator()(Value index) {
-    TemplatedIndexedValue res(value);
-    res.indices.push_back(index);
-    return res;
-  }
-  template <typename... Args>
-  TemplatedIndexedValue operator()(Value index, Args... indices) {
-    return TemplatedIndexedValue(value, index).append(indices...);
-  }
-  TemplatedIndexedValue operator()(ValueRange indices) {
-    return TemplatedIndexedValue(value, indices);
-  }
-
-  /// Emits a `store`.
-  Store operator=(const TemplatedIndexedValue &rhs) {
-    return Store(rhs, value, indices);
-  }
-  Store operator=(Value rhs) { return Store(rhs, value, indices); }
-
-  /// Emits a `load` when converting to a Value.
-  operator Value() const { return Load(value, indices); }
-
-  /// Returns the base memref.
-  Value getBase() const { return value; }
-
-  /// Returns the underlying memref.
-  MemRefType getMemRefType() const {
-    return value.getType().template cast<MemRefType>();
-  }
-
-  /// Returns the underlying MemRef elemental type cast as `T`.
-  template <typename T>
-  T getElementalTypeAs() const {
-    return value.getType()
-        .template cast<MemRefType>()
-        .getElementType()
-        .template cast<T>();
-  }
-
-  /// Arithmetic operator overloadings.
-  Value operator+(Value e);
-  Value operator-(Value e);
-  Value operator*(Value e);
-  Value operator/(Value e);
-  Value operator%(Value e);
-  Value operator^(Value e);
-  Value operator+(TemplatedIndexedValue e) {
-    return *this + static_cast<Value>(e);
-  }
-  Value operator-(TemplatedIndexedValue e) {
-    return *this - static_cast<Value>(e);
-  }
-  Value operator*(TemplatedIndexedValue e) {
-    return *this * static_cast<Value>(e);
-  }
-  Value operator/(TemplatedIndexedValue e) {
-    return *this / static_cast<Value>(e);
-  }
-  Value operator%(TemplatedIndexedValue e) {
-    return *this % static_cast<Value>(e);
-  }
-  Value operator^(TemplatedIndexedValue e) {
-    return *this ^ static_cast<Value>(e);
-  }
-
-  /// Assignment-arithmetic operator overloadings.
-  Store operator+=(Value e);
-  Store operator-=(Value e);
-  Store operator*=(Value e);
-  Store operator/=(Value e);
-  Store operator%=(Value e);
-  Store operator^=(Value e);
-  Store operator+=(TemplatedIndexedValue e) {
-    return this->operator+=(static_cast<Value>(e));
-  }
-  Store operator-=(TemplatedIndexedValue e) {
-    return this->operator-=(static_cast<Value>(e));
-  }
-  Store operator*=(TemplatedIndexedValue e) {
-    return this->operator*=(static_cast<Value>(e));
-  }
-  Store operator/=(TemplatedIndexedValue e) {
-    return this->operator/=(static_cast<Value>(e));
-  }
-  Store operator%=(TemplatedIndexedValue e) {
-    return this->operator%=(static_cast<Value>(e));
-  }
-  Store operator^=(TemplatedIndexedValue e) {
-    return this->operator^=(static_cast<Value>(e));
-  }
-
-  /// Logical operator overloadings.
-  Value operator&&(Value e);
-  Value operator||(Value e);
-  Value operator&&(TemplatedIndexedValue e) {
-    return *this && static_cast<Value>(e);
-  }
-  Value operator||(TemplatedIndexedValue e) {
-    return *this || static_cast<Value>(e);
-  }
-
-  /// Comparison operator overloadings.
-  Value eq(Value e);
-  Value ne(Value e);
-  Value slt(Value e);
-  Value sle(Value e);
-  Value sgt(Value e);
-  Value sge(Value e);
-  Value ult(Value e);
-  Value ule(Value e);
-  Value ugt(Value e);
-  Value uge(Value e);
-  Value slt(TemplatedIndexedValue e) {
-    return slt(*this, static_cast<Value>(e));
-  }
-  Value sle(TemplatedIndexedValue e) {
-    return sle(*this, static_cast<Value>(e));
-  }
-  Value sgt(TemplatedIndexedValue e) {
-    return sgt(*this, static_cast<Value>(e));
-  }
-  Value sge(TemplatedIndexedValue e) {
-    return sge(*this, static_cast<Value>(e));
-  }
-  Value ult(TemplatedIndexedValue e) {
-    return ult(*this, static_cast<Value>(e));
-  }
-  Value ule(TemplatedIndexedValue e) {
-    return ule(*this, static_cast<Value>(e));
-  }
-  Value ugt(TemplatedIndexedValue e) {
-    return ugt(*this, static_cast<Value>(e));
-  }
-  Value uge(TemplatedIndexedValue e) {
-    return uge(*this, static_cast<Value>(e));
-  }
-
-private:
-  TemplatedIndexedValue(Value value, ValueRange indices)
-      : value(value), indices(indices.begin(), indices.end()) {}
-
-  TemplatedIndexedValue &append() { return *this; }
-
-  template <typename T, typename... Args>
-  TemplatedIndexedValue &append(T index, Args... indices) {
-    this->indices.push_back(static_cast<Value>(index));
-    append(indices...);
-    return *this;
-  }
-  Value value;
-  SmallVector<Value, 8> indices;
-};
-
 } // namespace edsc
 } // namespace mlir

--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@ -14,7 +14,7 @@
 #include "mlir/Dialect/GPU/MemoryPromotion.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/LoopUtils.h"
@ -41,7 +41,7 @@ static StringRef getDimName(unsigned dim) {
 /// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
 /// single-iteration loops. Maps the innermost loops to thread dimensions, in
 /// reverse order to enable access coalescing in the innermost loop.
-static void insertCopyLoops(OpBuilder &builder, Location loc,
+static void insertCopyLoops(OpBuilder &b, Location loc,
                            MemRefBoundsCapture &bounds, Value from, Value to) {
  // Create EDSC handles for bounds.
  unsigned rank = bounds.rank();
@ -68,24 +68,24 @@ static void insertCopyLoops(OpBuilder &builder, Location loc,
                  [](int64_t step) { return std_constant_index(step); });

  // Obtain thread identifiers and block sizes, necessary to map to them.
-  auto indexType = builder.getIndexType();
+  auto indexType = b.getIndexType();
  SmallVector<Value, 3> threadIds, blockDims;
  for (unsigned i = 0; i < 3; ++i) {
-    auto dimName = builder.getStringAttr(getDimName(i));
-    threadIds.push_back(
-        builder.create<gpu::ThreadIdOp>(loc, indexType, dimName));
-    blockDims.push_back(
-        builder.create<gpu::BlockDimOp>(loc, indexType, dimName));
+    auto dimName = b.getStringAttr(getDimName(i));
+    threadIds.push_back(b.create<gpu::ThreadIdOp>(loc, indexType, dimName));
+    blockDims.push_back(b.create<gpu::BlockDimOp>(loc, indexType, dimName));
  }

  // Produce the loop nest with copies.
  SmallVector<Value, 8> ivs(lbs.size());
-  loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) {
-    ivs.assign(loopIvs.begin(), loopIvs.end());
-    auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
-    MemRefIndexedValue fromHandle(from), toHandle(to);
-    toHandle(activeIvs) = fromHandle(activeIvs);
-  });
+  mlir::scf::buildLoopNest(
+      b, loc, lbs, ubs, steps,
+      [&](OpBuilder &b, Location loc, ValueRange loopIvs) {
+        ivs.assign(loopIvs.begin(), loopIvs.end());
+        auto activeIvs = llvm::makeArrayRef(ivs).take_back(rank);
+        Value loaded = b.create<memref::LoadOp>(loc, from, activeIvs);
+        b.create<memref::StoreOp>(loc, loaded, to, activeIvs);
+      });

  // Map the innermost loops to threads in reverse order.
  for (auto en :
@ -142,17 +142,17 @@ static void insertCopies(Region &region, Location loc, Value from, Value to) {
  assert(llvm::hasSingleElement(region) &&
         "unstructured control flow not supported");

-  OpBuilder builder(region.getContext());
-  builder.setInsertionPointToStart(&region.front());
+  OpBuilder b(region.getContext());
+  b.setInsertionPointToStart(&region.front());

-  ScopedContext edscContext(builder, loc);
+  ScopedContext edscContext(b, loc);
  MemRefBoundsCapture fromBoundsCapture(from);
-  insertCopyLoops(builder, loc, fromBoundsCapture, from, to);
-  builder.create<gpu::BarrierOp>(loc);
+  insertCopyLoops(b, loc, fromBoundsCapture, from, to);
+  b.create<gpu::BarrierOp>(loc);

-  builder.setInsertionPoint(&region.front().back());
-  builder.create<gpu::BarrierOp>(loc);
-  insertCopyLoops(builder, loc, fromBoundsCapture, to, from);
+  b.setInsertionPoint(&region.front().back());
+  b.create<gpu::BarrierOp>(loc);
+  insertCopyLoops(b, loc, fromBoundsCapture, to, from);
 }

 /// Promotes a function argument to workgroup memory in the given function. The
--- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
@ -11,7 +11,6 @@
 #include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Math/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/IR/AffineExpr.h"
--- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
@ -7,16 +7,11 @@
 //===----------------------------------------------------------------------===//

 #include "PassDetail.h"
-#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
-#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@ -27,38 +22,67 @@
 #include "llvm/ADT/TypeSwitch.h"

 using namespace mlir;
-using namespace mlir::edsc;
-using namespace mlir::edsc::intrinsics;
 using namespace mlir::linalg;

-using edsc::op::operator+;
+namespace {
+/// Helper struct to build simple arithmetic quantities with minimal type
+/// inference support.
+struct ArithBuilder {
+  ArithBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {}

-static SmallVector<Value, 8> makeCanonicalAffineApplies(OpBuilder &b,
-                                                        Location loc,
-                                                        AffineMap map,
-                                                        ArrayRef<Value> vals) {
+  Value select(Value cmp, Value lhs, Value rhs) {
+    return b.create<SelectOp>(loc, cmp, lhs, rhs);
+  }
+  Value slt(Value lhs, Value rhs) {
+    if (lhs.getType().isa<IntegerType>())
+      return b.create<CmpIOp>(loc, CmpIPredicate::slt, lhs, rhs);
+    return b.create<CmpFOp>(loc, CmpFPredicate::OLT, lhs, rhs);
+  }
+  Value sgt(Value lhs, Value rhs) {
+    if (lhs.getType().isa<IntegerType>())
+      return b.create<CmpIOp>(loc, CmpIPredicate::sgt, lhs, rhs);
+    return b.create<CmpFOp>(loc, CmpFPredicate::OGT, lhs, rhs);
+  }
+  Value add(Value lhs, Value rhs) {
+    if (lhs.getType().isa<IntegerType>())
+      return b.create<AddIOp>(loc, lhs, rhs);
+    return b.create<AddFOp>(loc, lhs, rhs);
+  }
+  Value mul(Value lhs, Value rhs) {
+    if (lhs.getType().isa<IntegerType>())
+      return b.create<MulIOp>(loc, lhs, rhs);
+    return b.create<MulFOp>(loc, lhs, rhs);
+  }
+
+  OpBuilder &b;
+  Location loc;
+};
+} // namespace
+
+static SmallVector<Value> makeCanonicalAffineApplies(OpBuilder &b, Location loc,
+                                                     AffineMap map,
+                                                     ArrayRef<Value> vals) {
  if (map.isEmpty())
    return {};

  assert(map.getNumInputs() == vals.size());
-  SmallVector<Value, 8> res;
+  SmallVector<Value> res;
  res.reserve(map.getNumResults());
  auto dims = map.getNumDims();
  for (auto e : map.getResults()) {
    auto exprMap = AffineMap::get(dims, map.getNumSymbols(), e);
-    SmallVector<Value, 4> operands(vals.begin(), vals.end());
+    SmallVector<Value> operands(vals.begin(), vals.end());
    canonicalizeMapAndOperands(&exprMap, &operands);
-    res.push_back(affine_apply(exprMap, operands));
+    res.push_back(b.create<AffineApplyOp>(loc, exprMap, operands));
  }
  return res;
 }

-template <typename IndexedValueType, typename OpType>
-static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
-                                     ArrayRef<SmallVector<Value, 8>> indexing,
+template <typename LoadOpTy, typename StoreOpTy, typename OpType>
+static void inlineRegionAndEmitStore(OpBuilder &b, Location loc, OpType op,
+                                     ArrayRef<Value> indexedValues,
+                                     ArrayRef<SmallVector<Value>> indexing,
                                     ArrayRef<Value> outputBuffers) {
-  assert(op->getNumRegions() == 1 && "Expected single region op");
-  auto &b = ScopedContext::getBuilderRef();
  auto &block = op->getRegion(0).front();
  BlockAndValueMapping map;
  map.map(block.getArguments(), indexedValues);
@ -67,26 +91,24 @@ static void inlineRegionAndEmitStore(OpType op, ArrayRef<Value> indexedValues,
    map.map(op.getResults(), newOp->getResults());
  }

-  Operation &terminator = block.back();
-  assert(isa<linalg::YieldOp>(terminator) &&
-         "expected a yield op in the end of the region");
-  for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) {
-    IndexedValueType O(outputBuffers[i]);
-    O(indexing[i]) = map.lookupOrDefault(terminator.getOperand(i));
+  Operation *terminator = block.getTerminator();
+  for (OpOperand &operand : terminator->getOpOperands()) {
+    Value toStore = map.lookupOrDefault(operand.get());
+    b.create<StoreOpTy>(loc, toStore, outputBuffers[operand.getOperandNumber()],
+                        indexing[operand.getOperandNumber()]);
  }
 }

 // Returns a pair that contains input indices and output indices of a
 // SingleInputPoolingOp `op`.
 struct InputAndOutputIndices {
-  SmallVector<Value, 8> inputs;
-  SmallVector<Value, 8> outputs;
+  SmallVector<Value> inputs;
+  SmallVector<Value> outputs;
 };
 template <typename SingleInputPoolingOp>
-static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
-                                                      SingleInputPoolingOp op) {
-  auto &b = ScopedContext::getBuilderRef();
-  auto loc = ScopedContext::getLocation();
+static InputAndOutputIndices
+getInputAndOutputIndices(OpBuilder &b, Location loc, ArrayRef<Value> allIvs,
+                         SingleInputPoolingOp op) {
  auto mapsRange = op.indexing_maps().template getAsRange<AffineMapAttr>();
  auto maps = llvm::to_vector<8>(
      llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
@ -125,19 +147,18 @@ static InputAndOutputIndices getInputAndOutputIndices(ArrayRef<Value> allIvs,
 ///      }
 ///    }
 /// ```
-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs,
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+                                     ArrayRef<Value> allIvs,
                                     LinalgOp linalgOp) {
  assert(linalgOp.hasBufferSemantics() &&
         "expected linalg op with buffer semantics");
-  auto &b = ScopedContext::getBuilderRef();
-  auto loc = ScopedContext::getLocation();
  unsigned nInputs = linalgOp.getNumInputs();
  unsigned nOutputs = linalgOp.getNumOutputs();
-  SmallVector<Value, 4> indexedValues;
+  SmallVector<Value> indexedValues;
  indexedValues.reserve(nInputs + nOutputs);

-  auto allIvsPlusDims = SmallVector<Value, 4>(allIvs.begin(), allIvs.end());
+  auto allIvsPlusDims = SmallVector<Value>(allIvs.begin(), allIvs.end());

  // TODO: Avoid the loads if the corresponding argument of the
  // region has no uses.
@ -145,46 +166,40 @@ static void emitScalarImplementation(ArrayRef<Value> allIvs,
  for (unsigned i = 0; i < nInputs; ++i) {
    auto indexing = makeCanonicalAffineApplies(
        b, loc, linalgOp.getInputIndexingMap(i), allIvsPlusDims);
-    // Passing through IndexedValueType emits the proper load operation.
-    indexedValues.push_back(IndexedValueType(linalgOp.getInput(i))(indexing));
+    indexedValues.push_back(
+        b.create<LoadOpTy>(loc, linalgOp.getInput(i), indexing));
  }
  // 1.b. Emit load from output views.
  for (unsigned i = 0; i < nOutputs; ++i) {
    auto indexing = makeCanonicalAffineApplies(
        b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims);
-    // Passing through IndexedValueType emits the proper load operation.
    indexedValues.push_back(
-        IndexedValueType(linalgOp.getOutputBuffer(i))(indexing));
+        b.create<LoadOpTy>(loc, linalgOp.getOutputBuffer(i), indexing));
  }

  // TODO: When a region inliner exists, use it.
  // 2. Inline region, currently only works for a single basic block.
  // 3. Emit store.
-  SmallVector<SmallVector<Value, 8>, 8> indexing;
-  SmallVector<Value, 8> outputBuffers;
+  SmallVector<SmallVector<Value>, 8> indexing;
+  SmallVector<Value> outputBuffers;
  for (unsigned i = 0; i < nOutputs; ++i) {
    indexing.push_back(makeCanonicalAffineApplies(
        b, loc, linalgOp.getOutputIndexingMap(i), allIvsPlusDims));
    outputBuffers.push_back(linalgOp.getOutputBuffer(i));
  }
-  inlineRegionAndEmitStore<IndexedValueType>(linalgOp, indexedValues, indexing,
-                                             outputBuffers);
+  inlineRegionAndEmitStore<LoadOpTy, StoreOpTy>(b, loc, linalgOp, indexedValues,
+                                                indexing, outputBuffers);
 }

 // Create a padded view into the given `input` tensor using the 'indices'
 // to access the tensor. `skipPadding` lists the dimensions for which no padding
 // is needed e.g. the non-spatial dimensions for convolutions.
-template <typename IndexedValueType>
-Value getPaddedInput(Value input, ArrayRef<Value> indices,
-                     ArrayRef<int> skipPadding, Value padValue) {
-  // TODO: add a level of indirection to linalg.generic.
-
-  IndexedValueType indexedInput(input);
-
-  auto *context = ScopedContext::getContext();
-  Value zeroIndex = std_constant_index(0);
-  SmallVector<Value, 8> conds;
-  SmallVector<Value, 8> clampedImIdx;
+Value getPaddedInput(OpBuilder &b, Location loc, Value input,
+                     ArrayRef<Value> indices, ArrayRef<int> skipPadding,
+                     Value padValue) {
+  Value zeroIndex = b.create<ConstantIndexOp>(loc, 0);
+  SmallVector<Value> conds;
+  SmallVector<Value> clampedImIdx;
  for (auto iter : llvm::enumerate(indices)) {
    int idx = iter.index();
    auto dim = iter.value();
@ -193,29 +208,33 @@ Value getPaddedInput(Value input, ArrayRef<Value> indices,
      continue;
    }

-    using edsc::op::sge;
-    using edsc::op::slt;
-    using edsc::op::operator||;
-    Value leftOutOfBound = slt(dim, zeroIndex);
+    Value leftOutOfBound =
+        b.create<CmpIOp>(loc, CmpIPredicate::slt, dim, zeroIndex);
    if (conds.empty())
      conds.push_back(leftOutOfBound);
    else
-      conds.push_back(conds.back() || leftOutOfBound);
-    Value rightBound = memref_dim(input, idx);
-    conds.push_back(conds.back() || (sge(dim, rightBound)));
+      conds.push_back(b.create<OrOp>(loc, conds.back(), leftOutOfBound));
+    Value rightBound = b.create<memref::DimOp>(loc, input, idx);
+    Value rightOutOfBound =
+        b.create<CmpIOp>(loc, CmpIPredicate::sge, dim, rightBound);
+    conds.push_back(b.create<OrOp>(loc, conds.back(), rightOutOfBound));

    // When padding is involved, the indices will only be shifted to negative,
    // so having a max op is enough.
-    auto maxMap = AffineMap::get(/*dimCount=*/1, 0,
-                                 {getAffineDimExpr(/*position=*/0, context),
-                                  getAffineConstantExpr(0, context)},
-                                 context);
-    clampedImIdx.push_back(affine_max(dim.getType(), maxMap, ValueRange{dim}));
+    MLIRContext *ctx = input.getContext();
+    AffineExpr m = getAffineDimExpr(/*position=*/0, ctx),
+               zero = getAffineConstantExpr(0, ctx);
+    AffineMap maxMap =
+        AffineMap::inferFromExprList(ArrayRef<ArrayRef<AffineExpr>>{{m, zero}})
+            .front();
+    clampedImIdx.push_back(b.create<AffineMaxOp>(loc, maxMap, ValueRange{dim}));
  }

-  Value readInput = indexedInput(clampedImIdx);
-  return conds.empty() ? readInput
-                       : (Value)std_select(conds.back(), padValue, readInput);
+  Value readInput = b.create<memref::LoadOp>(loc, input, clampedImIdx);
+  if (conds.empty())
+    return readInput;
+
+  return b.create<SelectOp>(loc, conds.back(), padValue, readInput);
 }

 namespace {
@ -229,48 +248,47 @@ template <typename OpType> Attribute getPadValueAttr(Type type) {
 }

 template <> Attribute getPadValueAttr<PoolingMaxOp>(Type type) {
-  auto &b = ScopedContext::getBuilderRef();
  if (auto floatType = type.dyn_cast<FloatType>()) {
-    return b.getFloatAttr(
-        floatType,
-        APFloat::getInf(floatType.getFloatSemantics(), /*Negative*/ true));
+    return OpBuilder(type.getContext())
+        .getFloatAttr(floatType, APFloat::getInf(floatType.getFloatSemantics(),
+                                                 /*Negative*/ true));
  }
  if (auto intType = type.dyn_cast<IntegerType>()) {
    unsigned width = intType.getWidth();
    // The select instruction used to lower the PoolingMin uses a signed
    // comparison, use a signed constant irrespective of the signedness of the
    // integer type.
-    return b.getIntegerAttr(intType, APInt::getSignedMinValue(width));
+    return OpBuilder(type.getContext())
+        .getIntegerAttr(intType, APInt::getSignedMinValue(width));
  }
  llvm_unreachable("Unsupported data type for PoolingMaxOp");
  return {};
 }

 template <> Attribute getPadValueAttr<PoolingMinOp>(Type type) {
-  auto &b = ScopedContext::getBuilderRef();
  if (auto floatType = type.dyn_cast<FloatType>()) {
-    return b.getFloatAttr(floatType,
-                          APFloat::getInf(floatType.getFloatSemantics()));
+    return OpBuilder(type.getContext())
+        .getFloatAttr(floatType,
+                      APFloat::getInf(floatType.getFloatSemantics()));
  }
  if (auto intType = type.dyn_cast<IntegerType>()) {
    unsigned width = intType.getWidth();
    // The select instruction used to lower the PoolingMin uses a signed
    // comparison, use a signed constant irrespective of the signedness of the
    // integer type.
-    return b.getIntegerAttr(intType, APInt::getSignedMaxValue(width));
+    return OpBuilder(type.getContext())
+        .getIntegerAttr(intType, APInt::getSignedMaxValue(width));
  }
  llvm_unreachable("Unsupported data type for PoolingMinOp");
  return {};
 }

 template <> Attribute getPadValueAttr<PoolingSumOp>(Type type) {
-  auto &b = ScopedContext::getBuilderRef();
-  return b.getZeroAttr(type);
+  return OpBuilder(type.getContext()).getZeroAttr(type);
 }

 template <> Attribute getPadValueAttr<ConvOp>(Type type) {
-  auto &b = ScopedContext::getBuilderRef();
-  return b.getZeroAttr(type);
+  return OpBuilder(type.getContext()).getZeroAttr(type);
 }

 } // namespace
@ -284,38 +302,43 @@ static bool hasPadding(ConvOp convOp) {
  return false;
 }

-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+                                     ArrayRef<Value> allIvs, ConvOp convOp) {
  assert(convOp.hasBufferSemantics() &&
         "expected linalg op with buffer semantics");
-  auto &b = ScopedContext::getBuilderRef();
-  auto loc = ScopedContext::getLocation();
  auto mapsRange = convOp.indexing_maps().getAsRange<AffineMapAttr>();
  auto maps = llvm::to_vector<8>(
      llvm::map_range(mapsRange, [](AffineMapAttr a) { return a.getValue(); }));
-  SmallVector<Value, 8> fIdx(
-      makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
-  SmallVector<Value, 8> imIdx(
-      makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
-  SmallVector<Value, 8> oIdx(
-      makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
+  SmallVector<Value> fIdx(makeCanonicalAffineApplies(b, loc, maps[0], allIvs));
+  SmallVector<Value> imIdx(makeCanonicalAffineApplies(b, loc, maps[1], allIvs));
+  SmallVector<Value> oIdx(makeCanonicalAffineApplies(b, loc, maps[2], allIvs));

-  IndexedValueType F(convOp.filter()), O(convOp.output());
+  Value filter = convOp.filter(), output = convOp.output();

  // Emit scalar form. Padded conv involves an affine.max in the memory access
  // which is not allowed by affine.load. Override to use an MemRefIndexedValue
  // when there is non-zero padding.
  if (hasPadding(convOp)) {
    Type type = convOp.input().getType().cast<MemRefType>().getElementType();
-    Value padValue = std_constant(type, getPadValueAttr<ConvOp>(type));
-    Value paddedInput = getPaddedInput<MemRefIndexedValue>(
-        convOp.input(), imIdx,
-        /* Only need to pad the window dimensions */
-        {0, static_cast<int>(imIdx.size()) - 1}, padValue);
-    O(oIdx) += F(fIdx) * paddedInput;
+    Value padValue =
+        b.create<ConstantOp>(loc, type, getPadValueAttr<ConvOp>(type));
+    Value paddedInput =
+        getPaddedInput(b, loc, convOp.input(), imIdx,
+                       /* Only need to pad the window dimensions */
+                       {0, static_cast<int>(imIdx.size()) - 1}, padValue);
+    Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
+    Value mulVal = ArithBuilder(b, loc).mul(filterVal, paddedInput);
+    Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
+    Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
+    b.create<StoreOpTy>(loc, addVal, output, oIdx);
  } else {
-    IndexedValueType I(convOp.input());
-    O(oIdx) += F(fIdx) * I(imIdx);
+    Value inputVal = b.create<LoadOpTy>(loc, convOp.input(), imIdx);
+    Value filterVal = b.create<LoadOpTy>(loc, filter, fIdx);
+    Value mulVal = ArithBuilder(b, loc).mul(filterVal, inputVal);
+    Value outputVal = b.create<LoadOpTy>(loc, output, oIdx);
+    Value addVal = ArithBuilder(b, loc).add(mulVal, outputVal);
+    b.create<StoreOpTy>(loc, addVal, output, oIdx);
  }
 }

@ -327,55 +350,62 @@ template <typename PoolingOp> static bool hasPadding(PoolingOp poolingOp) {
  return false;
 }

-template <typename IndexedValueType, typename PoolingOp>
-static Value getPoolingInput(PoolingOp op, ArrayRef<Value> inputIndices) {
+template <typename LoadOpTy, typename StoreOpTy, typename PoolingOp>
+static Value getPoolingInput(OpBuilder &b, Location loc, PoolingOp op,
+                             ArrayRef<Value> inputIndices) {
  if (hasPadding(op)) {
    Type type =
        op.input().getType().template cast<MemRefType>().getElementType();
-    Value padValue = std_constant(type, getPadValueAttr<PoolingOp>(type));
-    return getPaddedInput<MemRefIndexedValue>(op.input(), inputIndices,
-                                              /*Pad every dimension*/ {},
-                                              padValue);
+    Value padValue =
+        b.create<ConstantOp>(loc, type, getPadValueAttr<PoolingOp>(type));
+    return getPaddedInput(b, loc, op.input(), inputIndices,
+                          /*Pad every dimension*/ {}, padValue);
  }
-  IndexedValueType input(op.input());
-  return input(inputIndices);
+  return b.create<LoadOpTy>(loc, op.input(), inputIndices);
 }

-template <typename IndexedValueType, typename OpType>
-void emitPoolingMinMaxScalarImplementation(ArrayRef<Value> allIvs, OpType op) {
-  InputAndOutputIndices indices = getInputAndOutputIndices(allIvs, op);
-  // Emit scalar form.
-  IndexedValueType output(op.output());
-  Value lhs = output(indices.outputs);
-  Value rhs = getPoolingInput<IndexedValueType>(op, indices.inputs);
-  using edsc::op::sgt;
-  using edsc::op::slt;
-  Value value = std::is_same<OpType, PoolingMinOp>()
-                    ? std_select(slt(lhs, rhs), lhs, rhs)
-                    : std_select(sgt(lhs, rhs), lhs, rhs);
-  output(indices.outputs) = value;
+template <typename LoadOpTy, typename StoreOpTy, typename OpType>
+void emitPoolingMinMaxScalarImplementation(OpBuilder &b, Location loc,
+                                           ArrayRef<Value> allIvs, OpType op) {
+  InputAndOutputIndices indices = getInputAndOutputIndices(b, loc, allIvs, op);
+  Value lhs = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
+  Value rhs = getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
+  Value value = llvm::TypeSwitch<Operation *, Value>(op)
+                    .Case([&](PoolingMinOp poolingOp) {
+                      return ArithBuilder(b, loc).select(
+                          ArithBuilder(b, loc).slt(lhs, rhs), lhs, rhs);
+                    })
+                    .Case([&](PoolingMaxOp poolingOp) {
+                      return ArithBuilder(b, loc).select(
+                          ArithBuilder(b, loc).sgt(lhs, rhs), lhs, rhs);
+                    })
+                    .Default([&](auto) { return Value(); });
+  b.create<StoreOpTy>(loc, value, op.output(), indices.outputs);
 }

-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMaxOp op) {
-  emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMaxOp>(allIvs,
-                                                                        op);
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+                                     ArrayRef<Value> allIvs, PoolingMaxOp op) {
+  emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMaxOp>(
+      b, loc, allIvs, op);
 }

-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingMinOp op) {
-  emitPoolingMinMaxScalarImplementation<IndexedValueType, PoolingMinOp>(allIvs,
-                                                                        op);
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+                                     ArrayRef<Value> allIvs, PoolingMinOp op) {
+  emitPoolingMinMaxScalarImplementation<LoadOpTy, StoreOpTy, PoolingMinOp>(
+      b, loc, allIvs, op);
 }

-template <typename IndexedValueType>
-static void emitScalarImplementation(ArrayRef<Value> allIvs, PoolingSumOp op) {
-  auto indices = getInputAndOutputIndices(allIvs, op);
-  IndexedValueType output(op.output());
-
-  // Emit scalar form.
-  output(indices.outputs) +=
-      getPoolingInput<IndexedValueType>(op, indices.inputs);
+template <typename LoadOpTy, typename StoreOpTy>
+static void emitScalarImplementation(OpBuilder &b, Location loc,
+                                     ArrayRef<Value> allIvs, PoolingSumOp op) {
+  auto indices = getInputAndOutputIndices(b, loc, allIvs, op);
+  Value inputVal =
+      getPoolingInput<LoadOpTy, StoreOpTy>(b, loc, op, indices.inputs);
+  Value outputVal = b.create<LoadOpTy>(loc, op.output(), indices.outputs);
+  Value added = ArithBuilder(b, loc).add(outputVal, inputVal);
+  b.create<StoreOpTy>(loc, added, op.output(), indices.outputs);
 }

 /// Replace the index operations in the body of the loop nest by the matching
@ -413,8 +443,12 @@ static void replaceIndexOpsByInductionVariables(LinalgOp linalgOp,
 template <typename LoopTy>
 static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
                                                 LinalgOp linalgOp) {
-  using IndexedValueTy = typename GenerateLoopNest<LoopTy>::IndexedValueTy;
-  ScopedContext scope(rewriter, linalgOp.getLoc());
+  using LoadOpTy =
+      typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
+                                AffineLoadOp, memref::LoadOp>::type;
+  using StoreOpTy =
+      typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
+                                AffineStoreOp, memref::StoreOp>::type;

  // Canonicalize indexed_generic operations before lowering them to loops.
  if (isa<IndexedGenericOp>(linalgOp))
@ -428,16 +462,18 @@ static Optional<LinalgLoops> linalgOpToLoopsImpl(PatternRewriter &rewriter,
  auto loopRanges = linalgOp.createLoopRanges(rewriter, linalgOp.getLoc());
  auto iteratorTypes = llvm::to_vector<4>(linalgOp.iterator_types().getValue());

-  SmallVector<Value, 4> allIvs;
+  SmallVector<Value> allIvs;
  GenerateLoopNest<LoopTy>::doit(
-      loopRanges, linalgOp, iteratorTypes,
-      [&](ValueRange ivs, ValueRange iterArgs) -> scf::ValueVector {
+      rewriter, linalgOp.getLoc(), loopRanges, linalgOp, iteratorTypes,
+      [&](OpBuilder &b, Location loc, ValueRange ivs,
+          ValueRange iterArgs) -> scf::ValueVector {
        assert(iterArgs.empty() && "unexpected iterArgs");
        allIvs.append(ivs.begin(), ivs.end());
        llvm::TypeSwitch<Operation *>(linalgOp)
            .Case<ConvOp, PoolingMaxOp, PoolingMinOp, PoolingSumOp, LinalgOp>(
                [&](auto op) {
-                  emitScalarImplementation<IndexedValueTy>(allIvs, op);
+                  emitScalarImplementation<LoadOpTy, StoreOpTy>(b, loc, allIvs,
+                                                                op);
                })
            .Default([&](Operation *op) { assert(false && "unexpected op"); });
        return scf::ValueVector{};
@ -499,7 +535,7 @@ struct TiledLoopToSCFPattern : public OpRewritePattern<TiledLoopOp> {
                       tiledLoop.upperBound(), tiledLoop.step(),
                       [&](OpBuilder &builder, Location loc, ValueRange ivs) {
                         // Move body without its terminator.
-                         SmallVector<Value, 16> newBlockArgs;
+                         SmallVector<Value> newBlockArgs;
                         newBlockArgs.append(ivs.begin(), ivs.end());
                         newBlockArgs.append(tiledLoop.inputs().begin(),
                                             tiledLoop.inputs().end());
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@ -19,7 +19,6 @@
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/EDSC/Intrinsics.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/AffineExpr.h"
@ -225,69 +224,67 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ValueRange tileSizes,
  // 2. Create the tiled loops.
  LinalgOp res = op;
  SmallVector<Value, 4> ivs, tensorResults;
-  GenerateLoopNest<LoopTy>::doit(
-      loopRanges, op, iteratorTypes,
-      [&](ValueRange localIvs, ValueRange iterArgs) -> scf::ValueVector {
-        auto &b = ScopedContext::getBuilderRef();
-        auto loc = ScopedContext::getLocation();
-        ivs.assign(localIvs.begin(), localIvs.end());
+  auto tiledLoopBodyBuilder = [&](OpBuilder &b, Location loc,
+                                  ValueRange localIvs,
+                                  ValueRange iterArgs) -> scf::ValueVector {
+    ivs.assign(localIvs.begin(), localIvs.end());

-        // When an `interchangeVector` is present, it has been applied to the
-        // loop ranges and the iterator types. Apply its inverse to the
-        // resulting loop `ivs` to match the op definition.
-        SmallVector<Value, 4> interchangedIvs;
-        if (!options.interchangeVector.empty())
-          interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
-        else
-          interchangedIvs.assign(ivs.begin(), ivs.end());
+    // When an `interchangeVector` is present, it has been applied to the
+    // loop ranges and the iterator types. Apply its inverse to the
+    // resulting loop `ivs` to match the op definition.
+    SmallVector<Value, 4> interchangedIvs;
+    if (!options.interchangeVector.empty())
+      interchangedIvs = applyMapToValues(b, loc, invPermutationMap, ivs);
+    else
+      interchangedIvs.assign(ivs.begin(), ivs.end());

-        assert(op.getNumOutputTensors() == iterArgs.size() &&
-               "num output tensors must match number of loop iter arguments");
+    assert(op.getNumOutputTensors() == iterArgs.size() &&
+           "num output tensors must match number of loop iter arguments");

-        auto operands = llvm::to_vector<4>(op.getInputs());
-        SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
-        // TODO: thanks to simplifying assumption we do not need to worry about
-        // order of output buffers and tensors: there is only ever one kind.
-        assert(outputBuffers.empty() || iterArgs.empty());
-        operands.append(outputBuffers.begin(), outputBuffers.end());
-        operands.append(iterArgs.begin(), iterArgs.end());
-        auto sizeBounds =
-            applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
-        SmallVector<Value, 4> tiledOperands = makeTiledShapes(
-            b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
-        auto nonShapedOperands = op.getAssumedNonShapedOperands();
-        tiledOperands.append(nonShapedOperands.begin(),
-                             nonShapedOperands.end());
+    auto operands = llvm::to_vector<4>(op.getInputs());
+    SmallVector<Value, 4> outputBuffers = op.getOutputBuffers();
+    // TODO: thanks to simplifying assumption we do not need to worry about
+    // order of output buffers and tensors: there is only ever one kind.
+    assert(outputBuffers.empty() || iterArgs.empty());
+    operands.append(outputBuffers.begin(), outputBuffers.end());
+    operands.append(iterArgs.begin(), iterArgs.end());
+    auto sizeBounds =
+        applyMapToValues(b, loc, shapeSizesToLoopsMap, allShapeSizes);
+    SmallVector<Value, 4> tiledOperands = makeTiledShapes(
+        b, loc, op, operands, interchangedIvs, tileSizes, sizeBounds);
+    auto nonShapedOperands = op.getAssumedNonShapedOperands();
+    tiledOperands.append(nonShapedOperands.begin(), nonShapedOperands.end());

-        // TODO: use an interface/adaptor to avoid leaking position in
-        // `tiledOperands`.
-        SmallVector<Type, 4> resultTensorTypes;
-        for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
-          resultTensorTypes.push_back(
-              tiledOperands[opOperand->getOperandNumber()].getType());
+    // TODO: use an interface/adaptor to avoid leaking position in
+    // `tiledOperands`.
+    SmallVector<Type, 4> resultTensorTypes;
+    for (OpOperand *opOperand : op.getOutputTensorsOpOperands())
+      resultTensorTypes.push_back(
+          tiledOperands[opOperand->getOperandNumber()].getType());

-        res = op.clone(b, loc, resultTensorTypes, tiledOperands);
+    res = op.clone(b, loc, resultTensorTypes, tiledOperands);

-        // Insert a subtensor_insert for each output tensor.
-        unsigned resultIdx = 0;
-        for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
-          // TODO: use an interface/adaptor to avoid leaking position in
-          // `tiledOperands`.
-          Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
-          if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
-            tensorResults.push_back(b.create<SubTensorInsertOp>(
-                loc, subtensor.source().getType(), res->getResult(resultIdx),
-                subtensor.source(), subtensor.offsets(), subtensor.sizes(),
-                subtensor.strides(), subtensor.static_offsets(),
-                subtensor.static_sizes(), subtensor.static_strides()));
-          } else {
-            tensorResults.push_back(res->getResult(resultIdx));
-          }
-          ++resultIdx;
-        }
-        return scf::ValueVector(tensorResults.begin(), tensorResults.end());
-      },
-      options.distribution);
+    // Insert a subtensor_insert for each output tensor.
+    unsigned resultIdx = 0;
+    for (OpOperand *opOperand : op.getOutputTensorsOpOperands()) {
+      // TODO: use an interface/adaptor to avoid leaking position in
+      // `tiledOperands`.
+      Value outputTensor = tiledOperands[opOperand->getOperandNumber()];
+      if (auto subtensor = outputTensor.getDefiningOp<SubTensorOp>()) {
+        tensorResults.push_back(b.create<SubTensorInsertOp>(
+            loc, subtensor.source().getType(), res->getResult(resultIdx),
+            subtensor.source(), subtensor.offsets(), subtensor.sizes(),
+            subtensor.strides(), subtensor.static_offsets(),
+            subtensor.static_sizes(), subtensor.static_strides()));
+      } else {
+        tensorResults.push_back(res->getResult(resultIdx));
+      }
+      ++resultIdx;
+    }
+    return scf::ValueVector(tensorResults.begin(), tensorResults.end());
+  };
+  GenerateLoopNest<LoopTy>::doit(b, op.getLoc(), loopRanges, op, iteratorTypes,
+                                 tiledLoopBodyBuilder, options.distribution);

  // 3. Transform IndexOp results w.r.t. the tiling.
  transformIndexOps(b, res, ivs, loopIndexToRangeIndex);
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@ -16,7 +16,6 @@
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
@ -197,15 +196,14 @@ IntegerAttr getSmallestBoundingIndex(Value size) {
 /// Specialization to build an scf "for" nest.
 template <>
 void GenerateLoopNest<scf::ForOp>::doit(
-    ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+    OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
    ArrayRef<Attribute> iteratorTypes,
-    function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+    function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+                                  ValueRange)>
+        bodyBuilderFn,
    Optional<LinalgLoopDistributionOptions> distributionOptions) {
  auto iterArgInitValues = linalgOp.getOutputTensors();
  // Create procInfo so it dominates loops, if appropriate.
-  OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
-  Location loc = edsc::ScopedContext::getLocation();
-
  SmallVector<ProcInfo, 4> procInfo;
  SmallVector<DistributionMethod, 0> distributionMethod;
  if (distributionOptions.hasValue()) {
@ -219,13 +217,13 @@ void GenerateLoopNest<scf::ForOp>::doit(
    distributionMethod = distributionOptions->distributionMethod;
    if (distributionMethod.size() < parallelLoopRanges.size())
      parallelLoopRanges.resize(distributionMethod.size());
-    procInfo = distributionOptions->procInfo(builder, loc, parallelLoopRanges);
+    procInfo = distributionOptions->procInfo(b, loc, parallelLoopRanges);
  }

  SmallVector<Value, 4> lbs, ubs, steps;
  unpackRanges(loopRanges, lbs, ubs, steps);
-  LoopNest loopNest =
-      edsc::loopNestBuilder(lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);
+  LoopNest loopNest = mlir::scf::buildLoopNest(
+      b, loc, lbs, ubs, steps, iterArgInitValues, bodyBuilderFn);

  if (!distributionOptions || loopNest.loops.empty())
    return;
@ -246,9 +244,11 @@ void GenerateLoopNest<scf::ForOp>::doit(
 /// Specialization to build affine "for" nest.
 template <>
 void GenerateLoopNest<AffineForOp>::doit(
-    ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+    OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
    ArrayRef<Attribute> iteratorTypes,
-    function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+    function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+                                  ValueRange)>
+        bodyBuilderFn,
    Optional<LinalgLoopDistributionOptions>) {
  auto iterArgInitValues = linalgOp.getOutputTensors();
  assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
@ -264,38 +264,36 @@ void GenerateLoopNest<AffineForOp>::doit(
    constantSteps.push_back(op.getValue());
  }

-  auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
-    bodyBuilderFn(ivs, {});
-  };
-  edsc::affineLoopNestBuilder(lbs, ubs, constantSteps,
-                              bodyBuilderWithoutIterArgsFn);
+  mlir::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
+                            [&](OpBuilder &b, Location loc, ValueRange ivs) {
+                              bodyBuilderFn(b, loc, ivs, {});
+                            });
 }

 /// Specialization to build an linalg.tiled_loop
 template <>
 void GenerateLoopNest<TiledLoopOp>::doit(
-    ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+    OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
    ArrayRef<Attribute> iteratorTypes,
-    function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+    function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+                                  ValueRange)>
+        bodyBuilderFn,
    Optional<LinalgLoopDistributionOptions>) {
-  OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
-  Location loc = edsc::ScopedContext::getLocation();
  SmallVector<ProcInfo, 2> procInfo;
-
  SmallVector<Value, 4> lbs, ubs, steps;
  unpackRanges(loopRanges, lbs, ubs, steps);

  auto wrappedBuilderFn = [&](OpBuilder &nestedBuilder, Location nestedLoc,
                              ValueRange ivs, ValueRange inputs,
                              ValueRange outputs) {
-    ScopedContext context(nestedBuilder, nestedLoc);
-    scf::ValueVector results = bodyBuilderFn(ivs, linalgOp.getOutputTensors());
+    scf::ValueVector results = bodyBuilderFn(nestedBuilder, nestedLoc, ivs,
+                                             linalgOp.getOutputTensors());
    nestedBuilder.create<linalg::YieldOp>(nestedLoc, results);
  };

-  auto tiledLoop = builder.create<TiledLoopOp>(
+  auto tiledLoop = b.create<TiledLoopOp>(
      loc, lbs, ubs, steps, linalgOp.getInputs(), linalgOp.getOutputs(),
-      builder.getArrayAttr(iteratorTypes), wrappedBuilderFn);
+      b.getArrayAttr(iteratorTypes), wrappedBuilderFn);

  // Replace inputs/outputs with the corresponding region args.
  auto isInsideTiledLoop = [&](OpOperand &operand) {
@ -310,9 +308,9 @@ void GenerateLoopNest<TiledLoopOp>::doit(
 }

 /// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
-void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
-                                       Value procId, Value nprocs, Value &lb,
-                                       Value &ub, Value &step) {
+void updateBoundsForCyclicDistribution(OpBuilder &b, Location loc, Value procId,
+                                       Value nprocs, Value &lb, Value &ub,
+                                       Value &step) {
  using edsc::op::operator+;
  using edsc::op::operator*;
  lb = lb + (procId * step);
@ -329,20 +327,22 @@ void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc,
 // TODO: this function can be made iterative instead. However, it
 // will have at most as many recursive calls as nested loops, which rarely
 // exceeds 10.
-static void
-generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
-                         ArrayRef<Attribute> iteratorTypes,
-                         function_ref<void(ValueRange)> bodyBuilderFn,
-                         SmallVectorImpl<Value> &ivStorage,
-                         ArrayRef<DistributionMethod> distributionMethod = {}) {
+static void generateParallelLoopNest(
+    OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
+    ValueRange steps, ArrayRef<Attribute> iteratorTypes,
+    function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
+    SmallVectorImpl<Value> &ivStorage,
+    ArrayRef<DistributionMethod> distributionMethod = {}) {
  assert(lbs.size() == ubs.size());
  assert(lbs.size() == steps.size());
  assert(lbs.size() == iteratorTypes.size());

  // If there are no (more) loops to be generated, generate the body and be
  // done with it.
-  if (iteratorTypes.empty())
-    return bodyBuilderFn(ivStorage);
+  if (iteratorTypes.empty()) {
+    bodyBuilderFn(b, loc, ivStorage);
+    return;
+  }

  // Find the outermost parallel loops and drop their types from the list.
  unsigned nLoops = iteratorTypes.size();
@ -353,27 +353,29 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
  // recurse. Note that we wouldn't have dropped anything from `iteratorTypes`
  // in this case.
  if (nOuterPar == 0) {
-    edsc::loopNestBuilder(lbs[0], ubs[0], steps[0], [&](Value iv) {
-      ivStorage.push_back(iv);
-      generateParallelLoopNest(lbs.drop_front(), ubs.drop_front(),
-                               steps.drop_front(), iteratorTypes.drop_front(),
-                               bodyBuilderFn, ivStorage, distributionMethod);
-    });
+    LoopNest singleLoop = buildLoopNest(
+        b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
+        [&](OpBuilder &b, Location loc, ValueRange ivs) {
+          ivStorage.append(ivs.begin(), ivs.end());
+          generateParallelLoopNest(b, loc, lbs.drop_front(), ubs.drop_front(),
+                                   steps.drop_front(),
+                                   iteratorTypes.drop_front(), bodyBuilderFn,
+                                   ivStorage, distributionMethod);
+        });
    return;
  }
  if (distributionMethod.empty()) {
    // Generate a single parallel loop-nest operation for all outermost
    // parallel loops and recurse.
-    edsc::OperationBuilder<scf::ParallelOp>(
-        lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
+    b.create<scf::ParallelOp>(
+        loc, lbs.take_front(nOuterPar), ubs.take_front(nOuterPar),
        steps.take_front(nOuterPar),
        [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
-          edsc::ScopedContext context(nestedBuilder, nestedLoc);
          ivStorage.append(localIvs.begin(), localIvs.end());
          generateParallelLoopNest(
-              lbs.drop_front(nOuterPar), ubs.drop_front(nOuterPar),
-              steps.drop_front(nOuterPar), iteratorTypes.drop_front(nOuterPar),
-              bodyBuilderFn, ivStorage,
+              nestedBuilder, nestedLoc, lbs.drop_front(nOuterPar),
+              ubs.drop_front(nOuterPar), steps.drop_front(nOuterPar),
+              iteratorTypes.drop_front(nOuterPar), bodyBuilderFn, ivStorage,
              (distributionMethod.size() < nOuterPar)
                  ? ArrayRef<DistributionMethod>()
                  : distributionMethod.drop_front(nOuterPar));
@ -394,15 +396,14 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
  case DistributionMethod::Cyclic: {
    // Generate a single parallel loop-nest operation for all outermost
    // parallel loops and recurse.
-    edsc::OperationBuilder<scf::ParallelOp>(
-        lbs.take_front(numProcessed), ubs.take_front(numProcessed),
+    b.create<scf::ParallelOp>(
+        loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
        steps.take_front(numProcessed),
        [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
-          edsc::ScopedContext context(nestedBuilder, nestedLoc);
          ivStorage.append(localIvs.begin(), localIvs.end());
          generateParallelLoopNest(
-              lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
-              steps.drop_front(numProcessed),
+              nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
+              ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
              iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
              (distributionMethod.size() < numProcessed)
                  ? ArrayRef<DistributionMethod>()
@ -418,12 +419,13 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
    for (unsigned i = 1; i < numProcessed; ++i)
      cond = cond && slt(lbs[i], ubs[i]);
    ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
-    edsc::conditionBuilder(cond, [&]() {
+    b.create<scf::IfOp>(loc, cond, [&](OpBuilder &b, Location loc) {
      generateParallelLoopNest(
-          lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
+          b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
          steps.drop_front(numProcessed),
          iteratorTypes.drop_front(numProcessed), bodyBuilderFn, ivStorage,
          distributionMethod.drop_front(numProcessed));
+      b.create<scf::YieldOp>(loc, ValueRange{});
    });
    return;
  }
@ -432,7 +434,7 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
    // with inner loop generation.
    ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
    generateParallelLoopNest(
-        lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
+        b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
        steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
        bodyBuilderFn, ivStorage, distributionMethod.drop_front(numProcessed));
    return;
@ -442,9 +444,11 @@ generateParallelLoopNest(ValueRange lbs, ValueRange ubs, ValueRange steps,
 /// Specialization for generating a mix of parallel and sequential scf loops.
 template <>
 void GenerateLoopNest<scf::ParallelOp>::doit(
-    ArrayRef<Range> loopRanges, LinalgOp linalgOp,
+    OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
    ArrayRef<Attribute> iteratorTypes,
-    function_ref<scf::ValueVector(ValueRange, ValueRange)> bodyBuilderFn,
+    function_ref<scf::ValueVector(OpBuilder &, Location, ValueRange,
+                                  ValueRange)>
+        bodyBuilderFn,
    Optional<LinalgLoopDistributionOptions> distributionOptions) {
  auto iterArgInitValues = linalgOp.getOutputTensors();
  assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
@ -466,7 +470,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
  SmallVector<DistributionMethod, 0> distributionMethod;
  if (distributionOptions) {
    auto &options = distributionOptions.getValue();
-    OpBuilder &builder = edsc::ScopedContext::getBuilderRef();
+    OpBuilder &b = edsc::ScopedContext::getBuilderRef();
    Location loc = edsc::ScopedContext::getLocation();
    distributionMethod.assign(distributionOptions->distributionMethod.begin(),
                              distributionOptions->distributionMethod.end());
@ -478,14 +482,14 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
    if (distributionMethod.size() < parallelLoopRanges.size())
      parallelLoopRanges.resize(distributionMethod.size());
    SmallVector<ProcInfo, 2> procInfo =
-        options.procInfo(builder, loc, parallelLoopRanges);
+        options.procInfo(b, loc, parallelLoopRanges);
    unsigned index = 0;
    for (auto iteratorType : enumerate(iteratorTypes)) {
      if (index >= procInfo.size())
        break;
      if (isParallelIteratorType(iteratorType.value())) {
        unsigned i = iteratorType.index();
-        updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId,
+        updateBoundsForCyclicDistribution(b, loc, procInfo[index].procId,
                                          procInfo[index].nprocs, lbsStorage[i],
                                          ubsStorage[i], stepsStorage[i]);
        index++;
@ -493,17 +497,17 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
    }
  }
  ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
-  auto bodyBuilderWithoutIterArgsFn = [&](ValueRange ivs) {
-    bodyBuilderFn(ivs, {});
-  };
-  generateParallelLoopNest(lbs, ubs, steps, iteratorTypes,
-                           bodyBuilderWithoutIterArgsFn, ivs,
-                           distributionMethod);
+  generateParallelLoopNest(
+      b, loc, lbs, ubs, steps, iteratorTypes,
+      [&](OpBuilder &b, Location loc, ValueRange ivs) {
+        bodyBuilderFn(b, loc, ivs, {});
+      },
+      ivs, distributionMethod);

  assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
 }

-SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
+SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
                                      LinalgOp linalgOp,
                                      ArrayRef<Value> tiledOperands,
                                      ValueRange ivs, ValueRange tileSizes,
@ -529,7 +533,7 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
    LLVM_DEBUG(llvm::dbgs() << "size: " << subShapeSizes.back() << "\n");
  }

-  MLIRContext *context = builder.getContext();
+  MLIRContext *context = b.getContext();
  SmallVector<Value, 4> tiledShapes;
  tiledShapes.reserve(tiledOperands.size());
  for (auto en : llvm::enumerate(tiledOperands)) {
@ -555,10 +559,10 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
    for (unsigned r = 0; r < rank; ++r) {
      LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for dim#" << r);
      if (!isTiled(map.getSubMap({r}), tileSizes)) {
-        offsets.push_back(builder.getIndexAttr(0));
+        offsets.push_back(b.getIndexAttr(0));
        Value dim = memref_dim(shapedOp, r).value;
        sizes.push_back(dim);
-        strides.push_back(builder.getIndexAttr(1));
+        strides.push_back(b.getIndexAttr(1));
        LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
        continue;
      }
@ -568,10 +572,9 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
      // (i.e. the op does not subsample, stepping occurs in the loop).
      auto m = map.getSubMap({r});
      LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: submap: " << map << "\n");
-      auto offset = applyMapToValues(builder, loc, m, lbs).front();
+      auto offset = applyMapToValues(b, loc, m, lbs).front();
      offsets.push_back(offset);
-      auto closedIntSize =
-          applyMapToValues(builder, loc, m, subShapeSizes).front();
+      auto closedIntSize = applyMapToValues(b, loc, m, subShapeSizes).front();
      // Resulting size needs to be made half open interval again.
      auto size = closedIntSize + std_constant_index(1);
      LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: raw size: " << size << "\n");
@ -589,27 +592,29 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &builder, Location loc,
        AffineExpr dim0, dim1, dim2;
        bindDims(context, dim0, dim1, dim2);
        // Compute min(size, dim - offset) to avoid out-of-bounds accesses.
-        auto minMap = AffineMap::get(
-            /*dimCount=*/3, /*symbolCount=*/0, {dim0, dim1 - dim2}, context);
-        Value d = memref_dim(shapedOp, r);
+        AffineMap minMap =
+            AffineMap::inferFromExprList(
+                ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
+                .front();
+        Value d = b.create<memref::DimOp>(loc, shapedOp, r);
        SmallVector<Value, 4> operands{size, d, offset};
        fullyComposeAffineMapAndOperands(&minMap, &operands);
-        size = affine_min(builder.getIndexType(), minMap, operands);
+        size = b.create<AffineMinOp>(loc, b.getIndexType(), minMap, operands);
      }

      sizes.push_back(size);
      LLVM_DEBUG(llvm::dbgs()
                 << "makeTiledShapes: new offset: " << offset << "\n");
      LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: new size: " << size << "\n");
-      strides.push_back(builder.getIndexAttr(1));
+      strides.push_back(b.getIndexAttr(1));
    }

    if (shapedType.isa<MemRefType>())
-      tiledShapes.push_back(builder.create<memref::SubViewOp>(
-          loc, shapedOp, offsets, sizes, strides));
+      tiledShapes.push_back(
+          b.create<memref::SubViewOp>(loc, shapedOp, offsets, sizes, strides));
    else
      tiledShapes.push_back(
-          builder.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
+          b.create<SubTensorOp>(loc, shapedOp, offsets, sizes, strides));
  }

  return tiledShapes;
--- a/mlir/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/CMakeLists.txt
@ -1,6 +1,5 @@
 add_mlir_dialect_library(MLIRSCF
  SCF.cpp
-  EDSC/Builders.cpp

  ADDITIONAL_HEADER_DIRS
  ${MLIR_MAIN_INCLUDE_DIR}/mlir/LoopOps
@ -9,7 +8,6 @@ add_mlir_dialect_library(MLIRSCF
  MLIRSCFOpsIncGen

  LINK_LIBS PUBLIC
-  MLIREDSC
  MLIRIR
  MLIRLoopLikeInterface
  MLIRMemRef
--- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
@ -1,135 +0,0 @@
-//===- Builders.cpp - MLIR Declarative Builder Classes --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SCF/EDSC/Builders.h"
-#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-
-using namespace mlir;
-using namespace mlir::edsc;
-
-mlir::scf::LoopNest
-mlir::edsc::loopNestBuilder(ValueRange lbs, ValueRange ubs, ValueRange steps,
-                            function_ref<void(ValueRange)> fun) {
-  // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
-  // the expected function interface.
-  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
-  return mlir::scf::buildLoopNest(
-      ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
-      steps, [&](OpBuilder &builder, Location loc, ValueRange ivs) {
-        ScopedContext context(builder, loc);
-        if (fun)
-          fun(ivs);
-      });
-}
-
-mlir::scf::LoopNest
-mlir::edsc::loopNestBuilder(Value lb, Value ub, Value step,
-                            function_ref<void(Value)> fun) {
-  // Delegates to the ValueRange-based version by wrapping the lambda.
-  auto wrapper = [&](ValueRange ivs) {
-    assert(ivs.size() == 1);
-    if (fun)
-      fun(ivs[0]);
-  };
-  return loopNestBuilder(ValueRange(lb), ValueRange(ub), ValueRange(step),
-                         wrapper);
-}
-
-mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
-    Value lb, Value ub, Value step, ValueRange iterArgInitValues,
-    function_ref<scf::ValueVector(Value, ValueRange)> fun) {
-  // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
-  // the expected function interface.
-  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
-  return mlir::scf::buildLoopNest(
-      ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lb, ub,
-      step, iterArgInitValues,
-      [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
-        assert(ivs.size() == 1 && "expected one induction variable");
-        ScopedContext context(builder, loc);
-        if (fun)
-          return fun(ivs[0], args);
-        return scf::ValueVector(iterArgInitValues.begin(),
-                                iterArgInitValues.end());
-      });
-}
-
-mlir::scf::LoopNest mlir::edsc::loopNestBuilder(
-    ValueRange lbs, ValueRange ubs, ValueRange steps,
-    ValueRange iterArgInitValues,
-    function_ref<scf::ValueVector(ValueRange, ValueRange)> fun) {
-  // Delegates actual construction to scf::buildLoopNest by wrapping `fun` into
-  // the expected function interface.
-  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
-  return mlir::scf::buildLoopNest(
-      ScopedContext::getBuilderRef(), ScopedContext::getLocation(), lbs, ubs,
-      steps, iterArgInitValues,
-      [&](OpBuilder &builder, Location loc, ValueRange ivs, ValueRange args) {
-        ScopedContext context(builder, loc);
-        if (fun)
-          return fun(ivs, args);
-        return scf::ValueVector(iterArgInitValues.begin(),
-                                iterArgInitValues.end());
-      });
-}
-
-static std::function<void(OpBuilder &, Location)>
-wrapIfBody(function_ref<scf::ValueVector()> body, TypeRange expectedTypes) {
-  (void)expectedTypes;
-  return [=](OpBuilder &builder, Location loc) {
-    ScopedContext context(builder, loc);
-    scf::ValueVector returned = body();
-    assert(ValueRange(returned).getTypes() == expectedTypes &&
-           "'if' body builder returned values of unexpected type");
-    builder.create<scf::YieldOp>(loc, returned);
-  };
-}
-
-ValueRange
-mlir::edsc::conditionBuilder(TypeRange results, Value condition,
-                             function_ref<scf::ValueVector()> thenBody,
-                             function_ref<scf::ValueVector()> elseBody,
-                             scf::IfOp *ifOp) {
-  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
-  assert(thenBody && "thenBody is mandatory");
-
-  auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
-      ScopedContext::getLocation(), results, condition,
-      wrapIfBody(thenBody, results), wrapIfBody(elseBody, results));
-  if (ifOp)
-    *ifOp = newOp;
-  return newOp.getResults();
-}
-
-static std::function<void(OpBuilder &, Location)>
-wrapZeroResultIfBody(function_ref<void()> body) {
-  return [=](OpBuilder &builder, Location loc) {
-    ScopedContext context(builder, loc);
-    body();
-    builder.create<scf::YieldOp>(loc);
-  };
-}
-
-ValueRange mlir::edsc::conditionBuilder(Value condition,
-                                        function_ref<void()> thenBody,
-                                        function_ref<void()> elseBody,
-                                        scf::IfOp *ifOp) {
-  assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
-  assert(thenBody && "thenBody is mandatory");
-
-  auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
-      ScopedContext::getLocation(), condition, wrapZeroResultIfBody(thenBody),
-      elseBody ? llvm::function_ref<void(OpBuilder &, Location)>(
-                     wrapZeroResultIfBody(elseBody))
-               : llvm::function_ref<void(OpBuilder &, Location)>(nullptr));
-  if (ifOp)
-    *ifOp = newOp;
-  return {};
-}
--- a/mlir/test/Dialect/Linalg/affine.mlir
+++ b/mlir/test/Dialect/Linalg/affine.mlir
@ -24,18 +24,18 @@ func @matmul(%arg0: memref<?xi8>, %M: index, %N: index, %K: index) {
 // CHECK-SAME: [[M:arg[0-9]+]]: index
 // CHECK-SAME: [[N:arg[0-9]+]]: index
 // CHECK-SAME: [[K:arg[0-9]+]]: index
-//       CHECK: %[[A:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK: %[[B:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK: %[[C:.*]] = memref.view %{{.*}}[{{.*}}] : memref<?xi8> to memref<?x?xf32>
-//       CHECK: affine.for %{{.*}}  = 0 to %{{.*}} {
-//       CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
-//       CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
-//   CHECK-DAG:       %[[a:.*]] = affine.load %[[A]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
-//   CHECK-DAG:       %[[b:.*]] = affine.load %[[B]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECK: %[[A:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
+//       CHECK: %[[B:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
+//       CHECK: %[[C:.*]] = memref.view %{{.*}} : memref<?xi8> to memref<?x?xf32>
+//       CHECK: affine.for
+//       CHECK:   affine.for
+//       CHECK:     affine.for
+//   CHECK-DAG:       %[[a:.*]] = affine.load %[[A]]{{.*}} : memref<?x?xf32>
+//   CHECK-DAG:       %[[b:.*]] = affine.load %[[B]]{{.*}} : memref<?x?xf32>
 //   CHECK-DAG:       %[[inc:.*]] = mulf %[[a]], %[[b]] : f32
-//   CHECK-DAG:       %[[c:.*]] = affine.load %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//   CHECK-DAG:       %[[c:.*]] = affine.load %[[C]]{{.*}} : memref<?x?xf32>
 //   CHECK-DAG:       %[[res:.*]] = addf %[[c]], %[[inc]] : f32
-//       CHECK:       affine.store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref<?x?xf32>
+//       CHECK:       affine.store %[[res]], %[[C]]{{.*}} : memref<?x?xf32>

 func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
  linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
@ -49,12 +49,12 @@ func @conv_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1:
 //       CHECK:   %[[K:.*]] = memref.dim %arg0, %c2 : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECK:   %[[B:.*]] = memref.dim %arg1, %c0 : memref<?x?x?xf32, #[[$strided3D]]>
 //       CHECK:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?xf32, #[[$strided3D]]>
-//       CHECK:   affine.for %{{.*}} = 0 to %[[B]] {
-//       CHECK:     affine.for %{{.*}} = 0 to %[[X0]] {
-//       CHECK:       affine.for %{{.*}} = 0 to %[[K]] {
-//       CHECK:         affine.for %{{.*}} = 0 to %[[Q]] {
-//       CHECK:           affine.for %{{.*}} = 0 to %[[Z0]] {
-//       CHECK:            %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]](%{{.*}}, %{{.*}})
+//       CHECK:   affine.for {{.*}}0 to %[[B]] {
+//       CHECK:     affine.for {{.*}}0 to %[[X0]] {
+//       CHECK:       affine.for {{.*}}0 to %[[K]] {
+//       CHECK:         affine.for {{.*}}0 to %[[Q]] {
+//       CHECK:           affine.for {{.*}}0 to %[[Z0]] {
+//       CHECK:            %[[SUM:.*]] = affine.apply #[[$stride2Dilation1]]{{.*}}
 //       No padding needed here; only affine loads.
 //       CHECK-NEXT:       affine.load
 //       CHECK-NEXT:       affine.load
@ -78,26 +78,26 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32>,
 //       CHECK:   %[[B:.*]] =  memref.dim %arg1, %c0 : memref<?x?x?x?xf32>
 //       CHECK:   %[[X0:.*]] = memref.dim %arg2, %c1 : memref<?x?x?x?xf32>
 //       CHECK:   %[[X1:.*]] = memref.dim %arg2, %c2 : memref<?x?x?x?xf32>
-//       CHECK:   affine.for %{{.*}} = 0 to %[[B]] {
-//       CHECK:     affine.for %{{.*}} = 0 to %[[X0]] {
-//       CHECK:       affine.for %{{.*}} = 0 to %[[X1]] {
-//       CHECK:         affine.for %{{.*}} = 0 to %[[K]] {
-//       CHECK:           affine.for %{{.*}} = 0 to %[[Q]] {
-//       CHECK:             affine.for %{{.*}} = 0 to %[[Z0]] {
-//       CHECK:               affine.for %{{.*}} = 0 to %[[Z1]] {
-//       CHECK:                 %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
-//       CHECK:                 %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
+//       CHECK:   affine.for {{.*}}0 to %[[B]] {
+//       CHECK:     affine.for {{.*}}0 to %[[X0]] {
+//       CHECK:       affine.for {{.*}}0 to %[[X1]] {
+//       CHECK:         affine.for {{.*}}0 to %[[K]] {
+//       CHECK:           affine.for {{.*}}0 to %[[Q]] {
+//       CHECK:             affine.for {{.*}}0 to %[[Z0]] {
+//       CHECK:               affine.for {{.*}}0 to %[[Z1]] {
+//       CHECK:                 %[[SUM0:.*]] = affine.apply #{{.*}}
+//       CHECK:                 %[[SUM1:.*]] = affine.apply #{{.*}}
 //       CHECK:                 %[[IDX:.*]] = affine.max #[[$clampMinMap]](%[[SUM0]])
 //       CHECK:                 %[[IDY:.*]] = affine.max #[[$clampMinMap]](%[[SUM1]])
 // Padded conv involves an affine.max in the memory access and this is not
 // allowed by affine.load. Use memref.load in such cases.
-//       CHECK:                 %{{.*}} = memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
-//       CHECK:                 %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
-//       CHECK:                 %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-//       CHECK:                 %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
-//       CHECK:                 %{{.*}} = affine.load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
-//       CHECK:                 %{{.*}} = addf %{{.*}}, %{{.*}} : f32
-//       CHECK:                 affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECK:                 memref.load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
+//       CHECK:                 select {{.*}} : f32
+//       CHECK:                 affine.load
+//       CHECK:                 mulf {{.*}} : f32
+//       CHECK:                 affine.load
+//       CHECK:                 addf {{.*}} : f32
+//       CHECK:                 affine.store

 //----------------------------------------------------------------------------//
 // Named ops to loops.
@ -115,10 +115,10 @@ func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memre
 //       CHECK: %[[M:.*]] = memref.dim %[[mA]], %c1 : memref<?x?x?xf32>
 //       CHECK: %[[K:.*]] = memref.dim %[[mA]], %c2 : memref<?x?x?xf32>
 //       CHECK: %[[N:.*]] = memref.dim %[[mB]], %c2 : memref<?x?x?xf32>
-//       CHECK: affine.for %[[b:.*]] = 0 to %[[B]] {
-//       CHECK:   affine.for %[[m:.*]] = 0 to %[[M]] {
-//       CHECK:     affine.for %[[n:.*]] = 0 to %[[N]] {
-//       CHECK:       affine.for %[[k:.*]] = 0 to %[[K]] {
+//       CHECK: affine.for %[[b:.*]] = {{.*}}0 to %[[B]] {
+//       CHECK:   affine.for %[[m:.*]] = {{.*}}0 to %[[M]] {
+//       CHECK:     affine.for %[[n:.*]] = {{.*}}0 to %[[N]] {
+//       CHECK:       affine.for %[[k:.*]] = {{.*}}0 to %[[K]] {
 //       CHECK:       %[[va:.*]] = affine.load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref<?x?x?xf32>
 //       CHECK:       %[[vb:.*]] = affine.load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref<?x?x?xf32>
 //       CHECK:       %[[vc:.*]] = affine.load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref<?x?x?xf32>
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir