Remove OpPointer, cleaning up a ton of code. This also moves Ops to using

inherited constructors, which is cleaner and means you can now use DimOp() to get a null op, instead of having to use Instruction::getNull<DimOp>(). This removes another 200 lines of code. PiperOrigin-RevId: 240068113
2019-03-24 19:53:05 -07:00 · 2019-03-24 19:53:05 -07:00 · d9b5bc8f55
parent 7ab37aaf02
commit d9b5bc8f55
40 changed files with 350 additions and 489 deletions
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@ -59,6 +59,8 @@ public:
 class AffineApplyOp : public Op<AffineApplyOp, OpTrait::VariadicOperands,
                                OpTrait::OneResult, OpTrait::HasNoSideEffect> {
 public:
+  using Op::Op;
+
  /// Builds an affine apply op with the specified map and operands.
  static void build(Builder *builder, OperationState *result, AffineMap map,
                    ArrayRef<Value *> operands);
@ -84,10 +86,6 @@ public:

  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit AffineApplyOp(Instruction *state) : Op(state) {}
 };

 /// The "for" instruction represents an affine loop nest, defining an SSA value
@ -117,6 +115,8 @@ private:
 class AffineForOp
    : public Op<AffineForOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
 public:
+  using Op::Op;
+
  // Hooks to customize behavior of this op.
  static void build(Builder *builder, OperationState *result,
                    ArrayRef<Value *> lbOperands, AffineMap lbMap,
@ -225,10 +225,6 @@ public:
  /// Returns true if both the lower and upper bound have the same operand lists
  /// (same operands in the same order).
  bool matchingBoundOperandList();
-
-private:
-  friend class Instruction;
-  explicit AffineForOp(Instruction *state) : Op(state) {}
 };

 /// Returns if the provided value is the induction variable of a AffineForOp.
@ -236,21 +232,20 @@ bool isForInductionVar(Value *val);

 /// Returns the loop parent of an induction variable. If the provided value is
 /// not an induction variable, then return nullptr.
-OpPointer<AffineForOp> getForInductionVarOwner(Value *val);
+AffineForOp getForInductionVarOwner(Value *val);

 /// Extracts the induction variables from a list of AffineForOps and places them
 /// in the output argument `ivs`.
-void extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
+void extractForInductionVars(ArrayRef<AffineForOp> forInsts,
                             SmallVectorImpl<Value *> *ivs);

-
 /// AffineBound represents a lower or upper bound in the for instruction.
 /// This class does not own the underlying operands. Instead, it refers
 /// to the operands stored in the AffineForOp. Its life span should not exceed
 /// that of the for instruction it refers to.
 class AffineBound {
 public:
-  OpPointer<AffineForOp> getAffineForOp() { return inst; }
+  AffineForOp getAffineForOp() { return inst; }
  AffineMap getMap() { return map; }

  /// Returns an AffineValueMap representing this bound.
@ -274,15 +269,14 @@ public:

 private:
  // 'for' instruction that contains this bound.
-  OpPointer<AffineForOp> inst;
+  AffineForOp inst;
  // Start and end positions of this affine bound operands in the list of
  // the containing 'for' instruction operands.
  unsigned opStart, opEnd;
  // Affine map for this bound.
  AffineMap map;

-  AffineBound(OpPointer<AffineForOp> inst, unsigned opStart, unsigned opEnd,
-              AffineMap map)
+  AffineBound(AffineForOp inst, unsigned opStart, unsigned opEnd, AffineMap map)
      : inst(inst), opStart(opStart), opEnd(opEnd), map(map) {}

  friend class AffineForOp;
@ -309,6 +303,8 @@ private:
 class AffineIfOp
    : public Op<AffineIfOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
 public:
+  using Op::Op;
+
  // Hooks to customize behavior of this op.
  static void build(Builder *builder, OperationState *result,
                    IntegerSet condition, ArrayRef<Value *> conditionOperands);
@ -328,10 +324,6 @@ public:
  bool verify();
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
-
-private:
-  friend class Instruction;
-  explicit AffineIfOp(Instruction *state) : Op(state) {}
 };

 /// Returns true if the given Value can be used as a dimension id.
@ -349,9 +341,9 @@ void canonicalizeMapAndOperands(AffineMap *map,
 /// Returns a composed AffineApplyOp by composing `map` and `operands` with
 /// other AffineApplyOps supplying those operands. The operands of the resulting
 /// AffineApplyOp do not change the length of  AffineApplyOp chains.
-OpPointer<AffineApplyOp>
-makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
-                        llvm::ArrayRef<Value *> operands);
+AffineApplyOp makeComposedAffineApply(FuncBuilder *b, Location loc,
+                                      AffineMap map,
+                                      llvm::ArrayRef<Value *> operands);

 /// Given an affine map `map` and its input `operands`, this method composes
 /// into `map`, maps of AffineApplyOps whose results are the values in
--- a/mlir/include/mlir/Analysis/AffineAnalysis.h
+++ b/mlir/include/mlir/Analysis/AffineAnalysis.h
@ -36,7 +36,6 @@ class AffineForOp;
 class AffineValueMap;
 class FlatAffineConstraints;
 class Instruction;
-template <typename OpType> class OpPointer;
 class Value;

 /// Returns in `affineApplyOps`, the sequence of those AffineApplyOp
@ -52,7 +51,7 @@ void getReachableAffineApplyOps(
 /// operands are added as symbols in the system. Returns failure for the yet
 /// unimplemented cases.
 //  TODO(bondhugula): handle non-unit strides.
-LogicalResult getIndexSet(llvm::MutableArrayRef<OpPointer<AffineForOp>> forOps,
+LogicalResult getIndexSet(llvm::MutableArrayRef<AffineForOp> forOps,
                          FlatAffineConstraints *domain);

 /// Encapsulates a memref load or store access information.
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@ -131,7 +131,7 @@ public:
  AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
                 ArrayRef<Value *> results = llvm::None);

-  explicit AffineValueMap(OpPointer<AffineApplyOp> applyOp);
+  explicit AffineValueMap(AffineApplyOp applyOp);
  explicit AffineValueMap(AffineBound bound);

  ~AffineValueMap();
@ -385,7 +385,7 @@ public:
  /// instruction are added as trailing identifiers (either dimensional or
  /// symbolic depending on whether the operand is a valid ML Function symbol).
  //  TODO(bondhugula): add support for non-unit strides.
-  LogicalResult addAffineForOpDomain(OpPointer<AffineForOp> forOp);
+  LogicalResult addAffineForOpDomain(AffineForOp forOp);

  /// Adds a lower or an upper bound for the identifier at the specified
  /// position with constraints being drawn from the specified bound map and
--- a/mlir/include/mlir/Analysis/LoopAnalysis.h
+++ b/mlir/include/mlir/Analysis/LoopAnalysis.h
@ -31,7 +31,6 @@ namespace mlir {
 class AffineExpr;
 class AffineForOp;
 class AffineMap;
-template <typename T> class OpPointer;
 class Instruction;
 class MemRefType;
 class Value;
@ -44,18 +43,18 @@ class Value;
 /// bounds before computing the trip count expressions
 // TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
 // pure analysis method relying on FlatAffineConstraints
-void buildTripCountMapAndOperands(OpPointer<AffineForOp> forOp, AffineMap *map,
+void buildTripCountMapAndOperands(AffineForOp forOp, AffineMap *map,
                                  SmallVectorImpl<Value *> *operands);

 /// Returns the trip count of the loop if it's a constant, None otherwise. This
 /// uses affine expression analysis and is able to determine constant trip count
 /// in non-trivial cases.
-llvm::Optional<uint64_t> getConstantTripCount(OpPointer<AffineForOp> forOp);
+llvm::Optional<uint64_t> getConstantTripCount(AffineForOp forOp);

 /// Returns the greatest known integral divisor of the trip count. Affine
 /// expression analysis is used (indirectly through getTripCount), and
 /// this method is thus able to determine non-trivial divisors.
-uint64_t getLargestDivisorOfTripCount(OpPointer<AffineForOp> forOp);
+uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);

 /// Given an induction variable `iv` of type AffineForOp and an `index` of type
 /// IndexType, returns `true` if `index` is independent of `iv` and false
@ -92,13 +91,13 @@ getInvariantAccesses(Value &iv, llvm::ArrayRef<Value *> indices);
 /// 3. all nested load/stores are to scalar MemRefs.
 /// TODO(ntv): implement dependence semantics
 /// TODO(ntv): relax the no-conditionals restriction
-bool isVectorizableLoop(OpPointer<AffineForOp> loop);
+bool isVectorizableLoop(AffineForOp loop);

 /// Checks whether the loop is structurally vectorizable and that all the LoadOp
 /// and StoreOp matched have access indexing functions that are are either:
 ///   1. invariant along the loop induction variable created by 'loop';
 ///   2. varying along the 'fastestVaryingDim' memory dimension.
-bool isVectorizableLoopAlongFastestVaryingMemRefDim(OpPointer<AffineForOp> loop,
+bool isVectorizableLoopAlongFastestVaryingMemRefDim(AffineForOp loop,
                                                    unsigned fastestVaryingDim);

 /// Checks where SSA dominance would be violated if a for inst's body
@ -106,8 +105,7 @@ bool isVectorizableLoopAlongFastestVaryingMemRefDim(OpPointer<AffineForOp> loop,
 /// 'def' and all its uses have the same shift factor.
 // TODO(mlir-team): extend this to check for memory-based dependence
 // violation when we have the support.
-bool isInstwiseShiftValid(OpPointer<AffineForOp> forOp,
-                          llvm::ArrayRef<uint64_t> shifts);
+bool isInstwiseShiftValid(AffineForOp forOp, llvm::ArrayRef<uint64_t> shifts);
 } // end namespace mlir

 #endif // MLIR_ANALYSIS_LOOP_ANALYSIS_H
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@ -41,15 +41,13 @@ class FlatAffineConstraints;
 class Instruction;
 class Location;
 class MemRefAccess;
-template <typename T> class OpPointer;
 class Instruction;
 class Value;

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
 /// the outermost 'for' instruction to the innermost one.
 //  TODO(bondhugula): handle 'if' inst's.
-void getLoopIVs(Instruction &inst,
-                SmallVectorImpl<OpPointer<AffineForOp>> *loops);
+void getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops);

 /// Returns the nesting depth of this instruction, i.e., the number of loops
 /// surrounding this instruction.
@ -57,7 +55,7 @@ unsigned getNestingDepth(Instruction &inst);

 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
 /// at 'forOp'.
-void getSequentialLoops(OpPointer<AffineForOp> forOp,
+void getSequentialLoops(AffineForOp forOp,
                        llvm::SmallDenseSet<Value *, 8> *sequentialLoops);

 /// ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their
@ -105,10 +103,10 @@ LogicalResult getBackwardComputationSliceState(
 // materialize the results of the backward slice - presenting a trade-off b/w
 // storage and redundant computation in several cases.
 // TODO(andydavis) Support computation slices with common surrounding loops.
-OpPointer<AffineForOp>
-insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
-                               unsigned dstLoopDepth,
-                               ComputationSliceState *sliceState);
+AffineForOp insertBackwardComputationSlice(Instruction *srcOpInst,
+                                           Instruction *dstOpInst,
+                                           unsigned dstLoopDepth,
+                                           ComputationSliceState *sliceState);

 /// A region of a memref's data space; this is typically constructed by
 /// analyzing load/store op's on this memref and the index space of loops
@ -235,11 +233,11 @@ unsigned getNumCommonSurroundingLoops(Instruction &A, Instruction &B);

 /// Gets the memory footprint of all data touched in the specified memory space
 /// in bytes; if the memory space is unspecified, considers all memory spaces.
-Optional<int64_t> getMemoryFootprintBytes(OpPointer<AffineForOp> forOp,
+Optional<int64_t> getMemoryFootprintBytes(AffineForOp forOp,
                                          int memorySpace = -1);

 /// Returns true if `forOp' is a parallel loop.
-bool isLoopParallel(OpPointer<AffineForOp> forOp);
+bool isLoopParallel(AffineForOp forOp);

 } // end namespace mlir

--- a/mlir/include/mlir/EDSC/MLIREmitter.h
+++ b/mlir/include/mlir/EDSC/MLIREmitter.h
@ -165,7 +165,7 @@ struct MLIREmitter {
    }
    return res;
  }
-  OpPointer<AffineForOp> getAffineForOp(Expr e);
+  AffineForOp getAffineForOp(Expr e);

 private:
  /// Emits the MLIR for `expr` and inserts at the `builder`'s insertion point.
--- a/mlir/include/mlir/IR/Builders.h
+++ b/mlir/include/mlir/IR/Builders.h
@ -254,7 +254,7 @@ public:

  /// Create operation of specific op type at the current insertion point.
  template <typename OpTy, typename... Args>
-  OpPointer<OpTy> create(Location location, Args... args) {
+  OpTy create(Location location, Args... args) {
    OperationState state(getContext(), location, OpTy::getOperationName());
    OpTy::build(this, &state, args...);
    auto *inst = createOperation(state);
--- a/mlir/include/mlir/IR/Function.h
+++ b/mlir/include/mlir/IR/Function.h
@ -37,7 +37,6 @@ class FunctionType;
 class MLIRContext;
 class Module;
 class ArgumentIterator;
-template <typename T> class OpPointer;

 /// This is the base class for all of the MLIR function types.
 class Function : public llvm::ilist_node_with_parent<Function, Module> {
@ -110,8 +109,7 @@ public:
  void walk(const std::function<void(Instruction *)> &callback);

  /// Specialization of walk to only visit operations of 'OpTy'.
-  template <typename OpTy>
-  void walk(std::function<void(OpPointer<OpTy>)> callback) {
+  template <typename OpTy> void walk(std::function<void(OpTy)> callback) {
    walk([&](Instruction *inst) {
      if (auto op = inst->dyn_cast<OpTy>())
        callback(op);
@ -124,7 +122,7 @@ public:

  /// Specialization of walkPostOrder to only visit operations of 'OpTy'.
  template <typename OpTy>
-  void walkPostOrder(std::function<void(OpPointer<OpTy>)> callback) {
+  void walkPostOrder(std::function<void(OpTy)> callback) {
    walkPostOrder([&](Instruction *inst) {
      if (auto op = inst->dyn_cast<OpTy>())
        callback(op);
--- a/mlir/include/mlir/IR/Instruction.h
+++ b/mlir/include/mlir/IR/Instruction.h
@ -33,7 +33,6 @@ namespace mlir {
 class BlockAndValueMapping;
 class Location;
 class MLIRContext;
-template <typename OpType> class OpPointer;
 class OperandIterator;
 class ResultIterator;
 class ResultTypeIterator;
@ -363,27 +362,20 @@ public:
  // Conversions to declared operations like DimOp
  //===--------------------------------------------------------------------===//

-  // Return a null OpPointer for the specified type.
-  template <typename OpClass> static OpPointer<OpClass> getNull() {
-    return OpPointer<OpClass>(OpClass(nullptr));
-  }
-
  /// The dyn_cast methods perform a dynamic cast from an Instruction to a typed
-  /// Op like DimOp.  This returns a null OpPointer on failure.
-  template <typename OpClass> OpPointer<OpClass> dyn_cast() {
-    if (isa<OpClass>()) {
+  /// Op like DimOp.  This returns a null Op on failure.
+  template <typename OpClass> OpClass dyn_cast() {
+    if (isa<OpClass>())
      return cast<OpClass>();
-    } else {
-      return OpPointer<OpClass>(OpClass(nullptr));
-    }
+    return OpClass();
  }

  /// The cast methods perform a cast from an Instruction to a typed Op like
  /// DimOp.  This aborts if the parameter to the template isn't an instance of
  /// the template type argument.
-  template <typename OpClass> OpPointer<OpClass> cast() {
+  template <typename OpClass> OpClass cast() {
    assert(isa<OpClass>() && "cast<Ty>() argument of incompatible type!");
-    return OpPointer<OpClass>(OpClass(this));
+    return OpClass(this);
  }

  /// The is methods return true if the operation is a typed op (like DimOp) of
@ -399,8 +391,7 @@ public:
  void walk(const std::function<void(Instruction *)> &callback);

  /// Specialization of walk to only visit operations of 'OpTy'.
-  template <typename OpTy>
-  void walk(std::function<void(OpPointer<OpTy>)> callback) {
+  template <typename OpTy> void walk(std::function<void(OpTy)> callback) {
    walk([&](Instruction *inst) {
      if (auto op = inst->dyn_cast<OpTy>())
        callback(op);
@ -413,7 +404,7 @@ public:

  /// Specialization of walkPostOrder to only visit operations of 'OpTy'.
  template <typename OpTy>
-  void walkPostOrder(std::function<void(OpPointer<OpTy>)> callback) {
+  void walkPostOrder(std::function<void(OpTy)> callback) {
    walkPostOrder([&](Instruction *inst) {
      if (auto op = inst->dyn_cast<OpTy>())
        callback(op);
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@ -54,48 +54,6 @@ template <typename OpType> struct IsSingleResult {
      OpType *, OpTrait::OneResult<typename OpType::ConcreteOpType> *>::value;
 };

-/// This pointer represents a notional "Instruction*" but where the actual
-/// storage of the pointer is maintained in the templated "OpType" class.
-template <typename OpType>
-class OpPointer {
-public:
-  explicit OpPointer() : value(Instruction::getNull<OpType>().value) {}
-  explicit OpPointer(OpType value) : value(value) {}
-
-  OpType &operator*() { return value; }
-
-  OpType *operator->() { return &value; }
-
-  explicit operator bool() { return value.getInstruction(); }
-
-  bool operator==(OpPointer rhs) {
-    return value.getInstruction() == rhs.value.getInstruction();
-  }
-  bool operator!=(OpPointer rhs) { return !(*this == rhs); }
-
-  /// OpPointer can be implicitly converted to OpType*.
-  /// Return `nullptr` if there is no associated Instruction*.
-  operator OpType *() {
-    if (!value.getInstruction())
-      return nullptr;
-    return &value;
-  }
-
-  operator OpType() { return value; }
-
-  /// If the OpType operation includes the OneResult trait, then OpPointer can
-  /// be implicitly converted to an Value*.  This yields the value of the
-  /// only result.
-  template <typename SFINAE = OpType>
-  operator typename std::enable_if<IsSingleResult<SFINAE>::value,
-                                   Value *>::type() {
-    return value.getResult();
-  }
-
-private:
-  OpType value;
-};
-
 /// This is the concrete base class that holds the operation pointer and has
 /// non-generic methods that only depend on State (to avoid having them
 /// instantiated on template types that don't affect them.
@ -104,6 +62,12 @@ private:
 /// they aren't customized.
 class OpState {
 public:
+  /// Ops are pointer-like, so we allow implicit conversion to bool.
+  operator bool() { return getInstruction() != nullptr; }
+
+  /// This implicitly converts to Instruction*.
+  operator Instruction *() const { return state; }
+
  /// Return the operation that this refers to.
  Instruction *getInstruction() { return state; }

@ -186,6 +150,14 @@ private:
  Instruction *state;
 };

+// Allow comparing operators.
+inline bool operator==(OpState lhs, OpState rhs) {
+  return lhs.getInstruction() == rhs.getInstruction();
+}
+inline bool operator!=(OpState lhs, OpState rhs) {
+  return lhs.getInstruction() != rhs.getInstruction();
+}
+
 /// This template defines the constantFoldHook and foldHook as used by
 /// AbstractOperation.
 ///
@ -257,6 +229,12 @@ template <typename ConcreteType, bool isSingleResult>
 class FoldingHook<ConcreteType, isSingleResult,
                  typename std::enable_if<isSingleResult>::type> {
 public:
+  /// If the operation returns a single value, then the Op can  be implicitly
+  /// converted to an Value*.  This yields the value of the only result.
+  operator Value *() {
+    return static_cast<ConcreteType *>(this)->getInstruction()->getResult(0);
+  }
+
  /// This is an implementation detail of the constant folder hook for
  /// AbstractOperation.
  static LogicalResult constantFoldHook(Instruction *op,
@ -801,8 +779,14 @@ public:
  /// to introspect traits on this operation.
  using ConcreteOpType = ConcreteType;

+  /// This is a public constructor.  Any op can be initialized to null.
+  explicit Op() : OpState(nullptr) {}
+
 protected:
+  /// This is a private constructor only accessible through the
+  /// Instruction::cast family of methods.
  explicit Op(Instruction *state) : OpState(state) {}
+  friend class Instruction;

 private:
  template <typename... Types> struct BaseVerifier;
@ -866,6 +850,9 @@ template <typename ConcreteType, template <typename T> class... Traits>
 class CastOp : public Op<ConcreteType, OpTrait::OneOperand, OpTrait::OneResult,
                         OpTrait::HasNoSideEffect, Traits...> {
 public:
+  using Op<ConcreteType, OpTrait::OneOperand, OpTrait::OneResult,
+           OpTrait::HasNoSideEffect, Traits...>::Op;
+
  static void build(Builder *builder, OperationState *result, Value *source,
                    Type destType) {
    impl::buildCastOp(builder, result, source, destType);
@ -876,11 +863,6 @@ public:
  void print(OpAsmPrinter *p) {
    return impl::printCastOp(this->getInstruction(), p);
  }
-
-protected:
-  explicit CastOp(Instruction *state)
-      : Op<ConcreteType, OpTrait::OneOperand, OpTrait::OneResult,
-           OpTrait::HasNoSideEffect, Traits...>(state) {}
 };

 } // end namespace mlir
--- a/mlir/include/mlir/IR/PatternMatch.h
+++ b/mlir/include/mlir/IR/PatternMatch.h
@ -185,7 +185,7 @@ public:
  /// Create operation of specific op type at the current insertion point
  /// without verifying to see if it is valid.
  template <typename OpTy, typename... Args>
-  OpPointer<OpTy> create(Location location, Args... args) {
+  OpTy create(Location location, Args... args) {
    OperationState state(getContext(), location, OpTy::getOperationName());
    OpTy::build(this, &state, args...);
    auto *op = createOperation(state);
@ -198,7 +198,7 @@ public:
  /// If the result is an invalid op (the verifier hook fails), emit an error
  /// and return null.
  template <typename OpTy, typename... Args>
-  OpPointer<OpTy> createChecked(Location location, Args... args) {
+  OpTy createChecked(Location location, Args... args) {
    OperationState state(getContext(), location, OpTy::getOperationName());
    OpTy::build(this, &state, args...);
    auto *op = createOperation(state);
@ -213,7 +213,7 @@ public:
    // Otherwise, the error message got emitted.  Just remove the instruction
    // we made.
    op->erase();
-    return OpPointer<OpTy>();
+    return OpTy();
  }

  /// This method performs the final replacement for a pattern, where the
--- a/mlir/include/mlir/StandardOps/Ops.h
+++ b/mlir/include/mlir/StandardOps/Ops.h
@ -68,6 +68,8 @@ public:
 class AllocOp
    : public Op<AllocOp, OpTrait::VariadicOperands, OpTrait::OneResult> {
 public:
+  using Op::Op;
+
  /// The result of an alloc is always a MemRefType.
  MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }

@ -81,10 +83,6 @@ public:
  void print(OpAsmPrinter *p);
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit AllocOp(Instruction *state) : Op(state) {}
 };

 /// The "br" operation represents a branch instruction in a function.
@ -100,6 +98,8 @@ private:
 class BranchOp : public Op<BranchOp, OpTrait::VariadicOperands,
                           OpTrait::ZeroResult, OpTrait::IsTerminator> {
 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "std.br"; }

  static void build(Builder *builder, OperationState *result, Block *dest,
@ -115,10 +115,6 @@ public:

  /// Erase the operand at 'index' from the operand list.
  void eraseOperand(unsigned index);
-
-private:
-  friend class Instruction;
-  explicit BranchOp(Instruction *state) : Op(state) {}
 };

 /// The "call" operation represents a direct call to a function.  The operands
@ -130,6 +126,8 @@ private:
 class CallOp
    : public Op<CallOp, OpTrait::VariadicOperands, OpTrait::VariadicResults> {
 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "std.call"; }

  static void build(Builder *builder, OperationState *result, Function *callee,
@ -151,10 +149,6 @@ public:
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
  bool verify();
-
-protected:
-  friend class Instruction;
-  explicit CallOp(Instruction *state) : Op(state) {}
 };

 /// The "call_indirect" operation represents an indirect call to a value of
@ -168,6 +162,7 @@ protected:
 class CallIndirectOp : public Op<CallIndirectOp, OpTrait::VariadicOperands,
                                 OpTrait::VariadicResults> {
 public:
+  using Op::Op;
  static StringRef getOperationName() { return "std.call_indirect"; }

  static void build(Builder *builder, OperationState *result, Value *callee,
@ -189,10 +184,6 @@ public:
  bool verify();
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-protected:
-  friend class Instruction;
-  explicit CallIndirectOp(Instruction *state) : Op(state) {}
 };

 /// The predicate indicates the type of the comparison to perform:
@ -240,6 +231,8 @@ class CmpIOp
                OpTrait::OneResult, OpTrait::ResultsAreBoolLike,
                OpTrait::SameOperandsAndResultShape, OpTrait::HasNoSideEffect> {
 public:
+  using Op::Op;
+
  CmpIPredicate getPredicate() {
    return (CmpIPredicate)getAttrOfType<IntegerAttr>(getPredicateAttrName())
        .getInt();
@ -255,10 +248,6 @@ public:
  void print(OpAsmPrinter *p);
  bool verify();
  Attribute constantFold(ArrayRef<Attribute> operands, MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit CmpIOp(Instruction *state) : Op(state) {}
 };

 /// The "cond_br" operation represents a conditional branch instruction in a
@ -283,6 +272,8 @@ class CondBranchOp : public Op<CondBranchOp, OpTrait::AtLeastNOperands<1>::Impl,
  /// follows:
  /// { condition, [true_operands], [false_operands] }
 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "std.cond_br"; }

  static void build(Builder *builder, OperationState *result, Value *condition,
@ -363,9 +354,6 @@ private:
  unsigned getFalseDestOperandIndex() {
    return getTrueDestOperandIndex() + getNumTrueOperands();
  }
-
-  friend class Instruction;
-  explicit CondBranchOp(Instruction *state) : Op(state) {}
 };

 /// The "constant" operation requires a single attribute named "value".
@ -377,6 +365,8 @@ private:
 class ConstantOp : public Op<ConstantOp, OpTrait::ZeroOperands,
                             OpTrait::OneResult, OpTrait::HasNoSideEffect> {
 public:
+  using Op::Op;
+
  /// Builds a constant op with the specified attribute value and result type.
  static void build(Builder *builder, OperationState *result, Type type,
                    Attribute value);
@ -394,10 +384,6 @@ public:
  void print(OpAsmPrinter *p);
  bool verify();
  Attribute constantFold(ArrayRef<Attribute> operands, MLIRContext *context);
-
-protected:
-  friend class Instruction;
-  explicit ConstantOp(Instruction *state) : Op(state) {}
 };

 /// This is a refinement of the "constant" op for the case where it is
@ -407,6 +393,8 @@ protected:
 ///
 class ConstantFloatOp : public ConstantOp {
 public:
+  using ConstantOp::ConstantOp;
+
  /// Builds a constant float op producing a float of the specified type.
  static void build(Builder *builder, OperationState *result,
                    const APFloat &value, FloatType type);
@ -414,10 +402,6 @@ public:
  APFloat getValue() { return getAttrOfType<FloatAttr>("value").getValue(); }

  static bool isClassFor(Instruction *op);
-
-private:
-  friend class Instruction;
-  explicit ConstantFloatOp(Instruction *state) : ConstantOp(state) {}
 };

 /// This is a refinement of the "constant" op for the case where it is
@ -427,6 +411,7 @@ private:
 ///
 class ConstantIntOp : public ConstantOp {
 public:
+  using ConstantOp::ConstantOp;
  /// Build a constant int op producing an integer of the specified width.
  static void build(Builder *builder, OperationState *result, int64_t value,
                    unsigned width);
@ -439,10 +424,6 @@ public:
  int64_t getValue() { return getAttrOfType<IntegerAttr>("value").getInt(); }

  static bool isClassFor(Instruction *op);
-
-private:
-  friend class Instruction;
-  explicit ConstantIntOp(Instruction *state) : ConstantOp(state) {}
 };

 /// This is a refinement of the "constant" op for the case where it is
@ -452,16 +433,14 @@ private:
 ///
 class ConstantIndexOp : public ConstantOp {
 public:
+  using ConstantOp::ConstantOp;
+
  /// Build a constant int op producing an index.
  static void build(Builder *builder, OperationState *result, int64_t value);

  int64_t getValue() { return getAttrOfType<IntegerAttr>("value").getInt(); }

  static bool isClassFor(Instruction *op);
-
-private:
-  friend class Instruction;
-  explicit ConstantIndexOp(Instruction *state) : ConstantOp(state) {}
 };

 /// The "dealloc" operation frees the region of memory referenced by a memref
@ -477,6 +456,8 @@ private:
 class DeallocOp
    : public Op<DeallocOp, OpTrait::OneOperand, OpTrait::ZeroResult> {
 public:
+  using Op::Op;
+
  Value *getMemRef() { return getOperand(); }
  void setMemRef(Value *value) { setOperand(value); }

@ -489,10 +470,6 @@ public:
  void print(OpAsmPrinter *p);
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit DeallocOp(Instruction *state) : Op(state) {}
 };

 /// The "dim" operation takes a memref or tensor operand and returns an
@ -504,6 +481,8 @@ private:
 class DimOp : public Op<DimOp, OpTrait::OneOperand, OpTrait::OneResult,
                        OpTrait::HasNoSideEffect> {
 public:
+  using Op::Op;
+
  static void build(Builder *builder, OperationState *result,
                    Value *memrefOrTensor, unsigned index);

@ -520,10 +499,6 @@ public:
  bool verify();
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
-
-private:
-  friend class Instruction;
-  explicit DimOp(Instruction *state) : Op(state) {}
 };

 // DmaStartOp starts a non-blocking DMA operation that transfers data from a
@ -566,6 +541,8 @@ private:
 class DmaStartOp
    : public Op<DmaStartOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
 public:
+  using Op::Op;
+
  static void build(Builder *builder, OperationState *result, Value *srcMemRef,
                    ArrayRef<Value *> srcIndices, Value *destMemRef,
                    ArrayRef<Value *> destIndices, Value *numElements,
@ -671,10 +648,6 @@ public:
      return nullptr;
    return getOperand(getNumOperands() - 1);
  }
-
-protected:
-  friend class Instruction;
-  explicit DmaStartOp(Instruction *state) : Op(state) {}
 };

 // DmaWaitOp blocks until the completion of a DMA operation associated with the
@ -693,6 +666,8 @@ protected:
 class DmaWaitOp
    : public Op<DmaWaitOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
 public:
+  using Op::Op;
+
  static void build(Builder *builder, OperationState *result, Value *tagMemRef,
                    ArrayRef<Value *> tagIndices, Value *numElements);

@ -719,10 +694,6 @@ public:
  void print(OpAsmPrinter *p);
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-protected:
-  friend class Instruction;
-  explicit DmaWaitOp(Instruction *state) : Op(state) {}
 };

 /// The "extract_element" op reads a tensor or vector and returns one element
@ -740,6 +711,8 @@ class ExtractElementOp
    : public Op<ExtractElementOp, OpTrait::VariadicOperands, OpTrait::OneResult,
                OpTrait::HasNoSideEffect> {
 public:
+  using Op::Op;
+
  static void build(Builder *builder, OperationState *result, Value *aggregate,
                    ArrayRef<Value *> indices = {});

@ -757,10 +730,6 @@ public:
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
  Attribute constantFold(ArrayRef<Attribute> operands, MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit ExtractElementOp(Instruction *state) : Op(state) {}
 };

 /// The "load" op reads an element from a memref specified by an index list. The
@ -774,6 +743,8 @@ private:
 class LoadOp
    : public Op<LoadOp, OpTrait::VariadicOperands, OpTrait::OneResult> {
 public:
+  using Op::Op;
+
  // Hooks to customize behavior of this op.
  static void build(Builder *builder, OperationState *result, Value *memref,
                    ArrayRef<Value *> indices = {});
@ -796,10 +767,6 @@ public:
  void print(OpAsmPrinter *p);
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit LoadOp(Instruction *state) : Op(state) {}
 };

 /// The "memref_cast" operation converts a memref from one type to an equivalent
@ -819,6 +786,7 @@ private:
 ///
 class MemRefCastOp : public CastOp<MemRefCastOp> {
 public:
+  using CastOp::CastOp;
  static StringRef getOperationName() { return "std.memref_cast"; }

  /// The result of a memref_cast is always a memref.
@ -827,10 +795,6 @@ public:
  void print(OpAsmPrinter *p);

  bool verify();
-
-private:
-  friend class Instruction;
-  explicit MemRefCastOp(Instruction *state) : CastOp(state) {}
 };

 /// The "return" operation represents a return instruction within a function.
@ -845,6 +809,8 @@ private:
 class ReturnOp : public Op<ReturnOp, OpTrait::VariadicOperands,
                           OpTrait::ZeroResult, OpTrait::IsTerminator> {
 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "std.return"; }

  static void build(Builder *builder, OperationState *result,
@ -854,10 +820,6 @@ public:
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
  bool verify();
-
-private:
-  friend class Instruction;
-  explicit ReturnOp(Instruction *state) : Op(state) {}
 };

 /// The "select" operation chooses one value based on a binary condition
@ -874,6 +836,8 @@ private:
 class SelectOp : public Op<SelectOp, OpTrait::NOperands<3>::Impl,
                           OpTrait::OneResult, OpTrait::HasNoSideEffect> {
 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "std.select"; }
  static void build(Builder *builder, OperationState *result, Value *condition,
                    Value *trueValue, Value *falseValue);
@ -886,10 +850,6 @@ public:
  Value *getFalseValue() { return getOperand(2); }

  Value *fold();
-
-private:
-  friend class Instruction;
-  explicit SelectOp(Instruction *state) : Op(state) {}
 };

 /// The "store" op writes an element to a memref specified by an index list.
@ -905,6 +865,8 @@ private:
 class StoreOp
    : public Op<StoreOp, OpTrait::VariadicOperands, OpTrait::ZeroResult> {
 public:
+  using Op::Op;
+
  // Hooks to customize behavior of this op.
  static void build(Builder *builder, OperationState *result,
                    Value *valueToStore, Value *memref,
@ -931,10 +893,6 @@ public:

  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);
-
-private:
-  friend class Instruction;
-  explicit StoreOp(Instruction *state) : Op(state) {}
 };

 /// The "tensor_cast" operation converts a tensor from one type to an equivalent
@ -949,6 +907,8 @@ private:
 ///
 class TensorCastOp : public CastOp<TensorCastOp> {
 public:
+  using CastOp::CastOp;
+
  static StringRef getOperationName() { return "std.tensor_cast"; }

  /// The result of a tensor_cast is always a tensor.
@ -957,10 +917,6 @@ public:
  void print(OpAsmPrinter *p);

  bool verify();
-
-private:
-  friend class Instruction;
-  explicit TensorCastOp(Instruction *state) : CastOp(state) {}
 };

 /// Prints dimension and symbol list.
--- a/mlir/include/mlir/SuperVectorOps/SuperVectorOps.h
+++ b/mlir/include/mlir/SuperVectorOps/SuperVectorOps.h
@ -96,6 +96,8 @@ class VectorTransferReadOp
  enum Offsets : unsigned { MemRefOffset = 0, FirstIndexOffset = 1 };

 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "vector_transfer_read"; }
  static StringRef getPermutationMapAttrName() { return "permutation_map"; }
  static void build(Builder *builder, OperationState *result,
@ -118,10 +120,6 @@ public:
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
  bool verify();
-
-private:
-  friend class Instruction;
-  explicit VectorTransferReadOp(Instruction *state) : Op(state) {}
 };

 /// VectorTransferWriteOp performs a blocking write from a super-vector to
@ -162,6 +160,8 @@ class VectorTransferWriteOp
  };

 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "vector_transfer_write"; }
  static StringRef getPermutationMapAttrName() { return "permutation_map"; }
  static void build(Builder *builder, OperationState *result, Value *srcVector,
@ -181,10 +181,6 @@ public:
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
  bool verify();
-
-private:
-  friend class Instruction;
-  explicit VectorTransferWriteOp(Instruction *state) : Op(state) {}
 };

 /// VectorTypeCastOp performs a conversion from a memref with scalar element to
@ -199,16 +195,14 @@ private:
 class VectorTypeCastOp
    : public Op<VectorTypeCastOp, OpTrait::OneOperand, OpTrait::OneResult> {
 public:
+  using Op::Op;
+
  static StringRef getOperationName() { return "vector_type_cast"; }
  static void build(Builder *builder, OperationState *result, Value *srcVector,
                    Type dstType);
  static bool parse(OpAsmParser *parser, OperationState *result);
  void print(OpAsmPrinter *p);
  bool verify();
-
-private:
-  friend class Instruction;
-  explicit VectorTypeCastOp(Instruction *state) : Op(state) {}
 };

 } // end namespace mlir
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@ -32,35 +32,32 @@ class AffineMap;
 class AffineForOp;
 class Function;
 class FuncBuilder;
-template <typename T> class OpPointer;
 class Value;

 /// Unrolls this for instruction completely if the trip count is known to be
 /// constant. Returns failure otherwise.
-LogicalResult loopUnrollFull(OpPointer<AffineForOp> forOp);
+LogicalResult loopUnrollFull(AffineForOp forOp);
 /// Unrolls this for instruction by the specified unroll factor. Returns failure
 /// if the loop cannot be unrolled either due to restrictions or due to invalid
 /// unroll factors.
-LogicalResult loopUnrollByFactor(OpPointer<AffineForOp> forOp,
-                                 uint64_t unrollFactor);
+LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor);
 /// Unrolls this loop by the specified unroll factor or its trip count,
 /// whichever is lower.
-LogicalResult loopUnrollUpToFactor(OpPointer<AffineForOp> forOp,
-                                   uint64_t unrollFactor);
+LogicalResult loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor);

 /// Unrolls and jams this loop by the specified factor. Returns success if the
 /// loop is successfully unroll-jammed.
-LogicalResult loopUnrollJamByFactor(OpPointer<AffineForOp> forOp,
+LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
                                    uint64_t unrollJamFactor);

 /// Unrolls and jams this loop by the specified factor or by the trip count (if
 /// constant), whichever is lower.
-LogicalResult loopUnrollJamUpToFactor(OpPointer<AffineForOp> forOp,
+LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
                                      uint64_t unrollJamFactor);

 /// Promotes the loop body of a AffineForOp to its containing block if the
 /// AffineForOp was known to have a single iteration.
-LogicalResult promoteIfSingleIteration(OpPointer<AffineForOp> forOp);
+LogicalResult promoteIfSingleIteration(AffineForOp forOp);

 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.
@ -71,8 +68,8 @@ void promoteSingleIterationLoops(Function *f);
 /// part of the unrolled loop. Computes the bound as an AffineMap with its
 /// operands or a null map when the trip count can't be expressed as an affine
 /// expression.
-void getCleanupLoopLowerBound(OpPointer<AffineForOp> forOp,
-                              unsigned unrollFactor, AffineMap *map,
+void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
+                              AffineMap *map,
                              SmallVectorImpl<Value *> *operands,
                              FuncBuilder *builder);

@ -80,42 +77,39 @@ void getCleanupLoopLowerBound(OpPointer<AffineForOp> forOp,
 /// instruction-wise shifts. The shifts are with respect to the original
 /// execution order, and are multiplied by the loop 'step' before being applied.
 LLVM_NODISCARD
-LogicalResult instBodySkew(OpPointer<AffineForOp> forOp,
-                           ArrayRef<uint64_t> shifts,
+LogicalResult instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                           bool unrollPrologueEpilogue = false);

 /// Tiles the specified band of perfectly nested loops creating tile-space loops
 /// and intra-tile loops. A band is a contiguous set of loops.
 LLVM_NODISCARD
-LogicalResult tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
+LogicalResult tileCodeGen(MutableArrayRef<AffineForOp> band,
                          ArrayRef<unsigned> tileSizes);

 /// Performs loop interchange on 'forOpA' and 'forOpB'. Requires that 'forOpA'
 /// and 'forOpB' are part of a perfectly nested sequence of loops.
-void interchangeLoops(OpPointer<AffineForOp> forOpA,
-                      OpPointer<AffineForOp> forOpB);
+void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB);

 /// Sinks 'forOp' by 'loopDepth' levels by performing a series of loop
 /// interchanges. Requires that 'forOp' is part of a perfect nest with
 /// 'loopDepth' AffineForOps consecutively nested under it.
-void sinkLoop(OpPointer<AffineForOp> forOp, unsigned loopDepth);
+void sinkLoop(AffineForOp forOp, unsigned loopDepth);

 /// Performs tiling fo imperfectly nested loops (with interchange) by
 /// strip-mining the `forOps` by `sizes` and sinking them, in their order of
 /// occurrence in `forOps`, under each of the `targets`.
 /// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
 /// nested immediately under each of `targets`.
-SmallVector<SmallVector<OpPointer<AffineForOp>, 8>, 8>
-tile(ArrayRef<OpPointer<AffineForOp>> forOps, ArrayRef<uint64_t> sizes,
-     ArrayRef<OpPointer<AffineForOp>> targets);
+SmallVector<SmallVector<AffineForOp, 8>, 8> tile(ArrayRef<AffineForOp> forOps,
+                                                 ArrayRef<uint64_t> sizes,
+                                                 ArrayRef<AffineForOp> targets);

 /// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
 /// and sinking them, in their order of occurrence in `forOps`, under `target`.
 /// Returns the new AffineForOps, one per `forOps`, nested immediately under
 /// `target`.
-SmallVector<OpPointer<AffineForOp>, 8>
-tile(ArrayRef<OpPointer<AffineForOp>> forOps, ArrayRef<uint64_t> sizes,
-     OpPointer<AffineForOp> target);
+SmallVector<AffineForOp, 8> tile(ArrayRef<AffineForOp> forOps,
+                                 ArrayRef<uint64_t> sizes, AffineForOp target);

 } // end namespace mlir

--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@ -30,7 +30,6 @@
 namespace mlir {

 class AffineForOp;
-template <typename T> class OpPointer;
 class FunctionPassBase;
 class ModulePassBase;

@ -62,8 +61,7 @@ FunctionPassBase *createMaterializeVectorsPass();
 /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
 FunctionPassBase *createLoopUnrollPass(
    int unrollFactor = -1, int unrollFull = -1,
-    const std::function<unsigned(OpPointer<AffineForOp>)> &getUnrollFactor =
-        nullptr);
+    const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);

 /// Creates a loop unroll jam pass to unroll jam by the specified factor. A
 /// factor of -1 lets the pass use the default factor or the one on the command
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@ -116,8 +116,8 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
 /// all the affine.apply op's supplying operands to this opInst did not have any
 /// uses other than those in this opInst. The method otherwise returns the list
 /// of affine.apply operations created in output argument `sliceOps`.
-void createAffineComputationSlice(
-    Instruction *opInst, SmallVectorImpl<OpPointer<AffineApplyOp>> *sliceOps);
+void createAffineComputationSlice(Instruction *opInst,
+                                  SmallVectorImpl<AffineApplyOp> *sliceOps);

 /// Replaces (potentially nested) function attributes in the operation "op"
 /// with those specified in "remappingTable".
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@ -485,7 +485,7 @@ AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
      auto *t = operands[i];
      auto affineApply = t->getDefiningInst()
                             ? t->getDefiningInst()->dyn_cast<AffineApplyOp>()
-                             : OpPointer<AffineApplyOp>();
+                             : AffineApplyOp();
      if (affineApply) {
        // a. Compose affine.apply instructions.
        LLVM_DEBUG(affineApply->getInstruction()->print(
@ -567,9 +567,9 @@ void mlir::fullyComposeAffineMapAndOperands(
  }
 }

-OpPointer<AffineApplyOp>
-mlir::makeComposedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
-                              ArrayRef<Value *> operands) {
+AffineApplyOp mlir::makeComposedAffineApply(FuncBuilder *b, Location loc,
+                                            AffineMap map,
+                                            ArrayRef<Value *> operands) {
  AffineMap normalizedMap = map;
  SmallVector<Value *, 8> normalizedOperands(operands.begin(), operands.end());
  composeAffineMapAndOperands(&normalizedMap, &normalizedOperands);
@ -1070,15 +1070,14 @@ Block *AffineForOp::createBody() {

 AffineBound AffineForOp::getLowerBound() {
  auto lbMap = getLowerBoundMap();
-  return AffineBound(OpPointer<AffineForOp>(*this), 0, lbMap.getNumInputs(),
-                     lbMap);
+  return AffineBound(AffineForOp(*this), 0, lbMap.getNumInputs(), lbMap);
 }

 AffineBound AffineForOp::getUpperBound() {
  auto lbMap = getLowerBoundMap();
  auto ubMap = getUpperBoundMap();
-  return AffineBound(OpPointer<AffineForOp>(*this), lbMap.getNumInputs(),
-                     getNumOperands(), ubMap);
+  return AffineBound(AffineForOp(*this), lbMap.getNumInputs(), getNumOperands(),
+                     ubMap);
 }

 void AffineForOp::setLowerBound(ArrayRef<Value *> lbOperands, AffineMap map) {
@ -1178,24 +1177,24 @@ Value *AffineForOp::getInductionVar() { return getBody()->getArgument(0); }

 /// Returns if the provided value is the induction variable of a AffineForOp.
 bool mlir::isForInductionVar(Value *val) {
-  return getForInductionVarOwner(val) != nullptr;
+  return getForInductionVarOwner(val) != AffineForOp();
 }

 /// Returns the loop parent of an induction variable. If the provided value is
 /// not an induction variable, then return nullptr.
-OpPointer<AffineForOp> mlir::getForInductionVarOwner(Value *val) {
+AffineForOp mlir::getForInductionVarOwner(Value *val) {
  auto *ivArg = dyn_cast<BlockArgument>(val);
  if (!ivArg || !ivArg->getOwner())
-    return OpPointer<AffineForOp>();
+    return AffineForOp();
  auto *containingInst = ivArg->getOwner()->getParent()->getContainingInst();
  if (!containingInst)
-    return OpPointer<AffineForOp>();
+    return AffineForOp();
  return containingInst->dyn_cast<AffineForOp>();
 }

 /// Extracts the induction variables from a list of AffineForOps and returns
 /// them.
-void mlir::extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
+void mlir::extractForInductionVars(ArrayRef<AffineForOp> forInsts,
                                   SmallVectorImpl<Value *> *ivs) {
  ivs->reserve(forInsts.size());
  for (auto forInst : forInsts)
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@ -98,7 +98,7 @@ void mlir::getReachableAffineApplyOps(
 // stride information in FlatAffineConstraints. (For eg., by using iv - lb %
 // step = 0 and/or by introducing a method in FlatAffineConstraints
 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
-LogicalResult mlir::getIndexSet(MutableArrayRef<OpPointer<AffineForOp>> forOps,
+LogicalResult mlir::getIndexSet(MutableArrayRef<AffineForOp> forOps,
                                FlatAffineConstraints *domain) {
  SmallVector<Value *, 4> indices;
  extractForInductionVars(forOps, &indices);
@ -122,7 +122,7 @@ static LogicalResult getInstIndexSet(Instruction *inst,
                                     FlatAffineConstraints *indexSet) {
  // TODO(andydavis) Extend this to gather enclosing IfInsts and consider
  // factoring it out into a utility function.
-  SmallVector<OpPointer<AffineForOp>, 4> loops;
+  SmallVector<AffineForOp, 4> loops;
  getLoopIVs(*inst, &loops);
  return getIndexSet(loops, indexSet);
 }
@ -461,7 +461,7 @@ addMemRefAccessConstraints(const AffineValueMap &srcAccessMap,
      if (auto *opInst = symbol->getDefiningInst()) {
        if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
          dependenceDomain->setIdToConstant(valuePosMap.getSymPos(symbol),
-                                            constOp->getValue());
+                                            constOp.getValue());
        }
      }
    }
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@ -217,7 +217,7 @@ AffineValueMap::AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
    : map(map), operands(operands.begin(), operands.end()),
      results(results.begin(), results.end()) {}

-AffineValueMap::AffineValueMap(OpPointer<AffineApplyOp> applyOp)
+AffineValueMap::AffineValueMap(AffineApplyOp applyOp)
    : map(applyOp->getAffineMap()),
      operands(applyOp->operand_begin(), applyOp->operand_end()) {
  results.push_back(applyOp->getResult());
@ -729,13 +729,12 @@ void FlatAffineConstraints::addInductionVarOrTerminalSymbol(Value *id) {
  // Check if the symbol is a constant.
  if (auto *opInst = id->getDefiningInst()) {
    if (auto constOp = opInst->dyn_cast<ConstantIndexOp>()) {
-      setIdToConstant(*id, constOp->getValue());
+      setIdToConstant(*id, constOp.getValue());
    }
  }
 }

-LogicalResult
-FlatAffineConstraints::addAffineForOpDomain(OpPointer<AffineForOp> forOp) {
+LogicalResult FlatAffineConstraints::addAffineForOpDomain(AffineForOp forOp) {
  unsigned pos;
  // Pre-condition for this method.
  if (!findId(*forOp->getInductionVar(), &pos)) {
@ -772,10 +771,8 @@ FlatAffineConstraints::addAffineForOpDomain(OpPointer<AffineForOp> forOp) {
    addConstantLowerBound(pos, forOp->getConstantLowerBound());
  } else {
    // Non-constant lower bound case.
-    OpPointer<AffineForOp> ncForOp =
-        *reinterpret_cast<OpPointer<AffineForOp> *>(&forOp);
-    SmallVector<Value *, 4> lbOperands(ncForOp->getLowerBoundOperands().begin(),
-                                       ncForOp->getLowerBoundOperands().end());
+    SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands().begin(),
+                                       forOp->getLowerBoundOperands().end());
    if (failed(addLowerOrUpperBound(pos, forOp->getLowerBoundMap(), lbOperands,
                                    /*eq=*/false, /*lower=*/true)))
      return failure();
@ -786,10 +783,8 @@ FlatAffineConstraints::addAffineForOpDomain(OpPointer<AffineForOp> forOp) {
    return success();
  }
  // Non-constant upper bound case.
-  OpPointer<AffineForOp> ncForOp =
-      *reinterpret_cast<OpPointer<AffineForOp> *>(&forOp);
-  SmallVector<Value *, 4> ubOperands(ncForOp->getUpperBoundOperands().begin(),
-                                     ncForOp->getUpperBoundOperands().end());
+  SmallVector<Value *, 4> ubOperands(forOp->getUpperBoundOperands().begin(),
+                                     forOp->getUpperBoundOperands().end());
  return addLowerOrUpperBound(pos, forOp->getUpperBoundMap(), ubOperands,
                              /*eq=*/false, /*lower=*/false);
 }
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@ -49,16 +49,12 @@ using namespace mlir;
 // pure analysis method relying on FlatAffineConstraints; the latter will also
 // be more powerful (since both inequalities and equalities will be considered).
 void mlir::buildTripCountMapAndOperands(
-    OpPointer<AffineForOp> forOp, AffineMap *map,
+    AffineForOp forOp, AffineMap *map,
    SmallVectorImpl<Value *> *tripCountOperands) {
  int64_t loopSpan;

  int64_t step = forOp->getStep();
-
-  // We need to get operands; we aren't changing them here.
-  auto ncForOp = *reinterpret_cast<OpPointer<AffineForOp> *>(&forOp);
-
-  FuncBuilder b(ncForOp->getInstruction());
+  FuncBuilder b(forOp->getInstruction());

  if (forOp->hasConstantBounds()) {
    int64_t lb = forOp->getConstantLowerBound();
@ -76,8 +72,8 @@ void mlir::buildTripCountMapAndOperands(
    *map = AffineMap();
    return;
  }
-  SmallVector<Value *, 4> lbOperands(ncForOp->getLowerBoundOperands());
-  SmallVector<Value *, 4> ubOperands(ncForOp->getUpperBoundOperands());
+  SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands());
+  SmallVector<Value *, 4> ubOperands(forOp->getUpperBoundOperands());
  auto lb = b.create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);
  SmallVector<Value *, 4> ubs;
  ubs.reserve(ubMap.getNumResults());
@ -117,8 +113,7 @@ void mlir::buildTripCountMapAndOperands(
 // being an analysis utility, it shouldn't. Replace with a version that just
 // works with analysis structures (FlatAffineConstraints) and thus doesn't
 // update the IR.
-llvm::Optional<uint64_t>
-mlir::getConstantTripCount(OpPointer<AffineForOp> forOp) {
+llvm::Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) {
  SmallVector<Value *, 4> operands;
  AffineMap map;
  buildTripCountMapAndOperands(forOp, &map, &operands);
@ -144,7 +139,7 @@ mlir::getConstantTripCount(OpPointer<AffineForOp> forOp) {
 /// Returns the greatest known integral divisor of the trip count. Affine
 /// expression analysis is used (indirectly through getTripCount), and
 /// this method is thus able to determine non-trivial divisors.
-uint64_t mlir::getLargestDivisorOfTripCount(OpPointer<AffineForOp> forOp) {
+uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
  SmallVector<Value *, 4> operands;
  AffineMap map;
  buildTripCountMapAndOperands(forOp, &map, &operands);
@ -235,7 +230,7 @@ mlir::getInvariantAccesses(Value &iv, llvm::ArrayRef<Value *> indices) {
 ///
 // TODO(ntv): check strides.
 template <typename LoadOrStoreOp>
-static bool isContiguousAccess(Value &iv, OpPointer<LoadOrStoreOp> memoryOp,
+static bool isContiguousAccess(Value &iv, LoadOrStoreOp memoryOp,
                               unsigned fastestVaryingDim) {
  static_assert(std::is_same<LoadOrStoreOp, LoadOp>::value ||
                    std::is_same<LoadOrStoreOp, StoreOp>::value,
@ -281,10 +276,9 @@ static bool isVectorTransferReadOrWrite(Instruction &inst) {
  return inst.isa<VectorTransferReadOp>() || inst.isa<VectorTransferWriteOp>();
 }

-using VectorizableInstFun =
-    std::function<bool(OpPointer<AffineForOp>, Instruction &)>;
+using VectorizableInstFun = std::function<bool(AffineForOp, Instruction &)>;

-static bool isVectorizableLoopWithCond(OpPointer<AffineForOp> loop,
+static bool isVectorizableLoopWithCond(AffineForOp loop,
                                       VectorizableInstFun isVectorizableInst) {
  auto *forInst = const_cast<Instruction *>(loop->getInstruction());
  if (!matcher::isParallelLoop(*forInst) &&
@ -340,9 +334,9 @@ static bool isVectorizableLoopWithCond(OpPointer<AffineForOp> loop,
 }

 bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim(
-    OpPointer<AffineForOp> loop, unsigned fastestVaryingDim) {
+    AffineForOp loop, unsigned fastestVaryingDim) {
  VectorizableInstFun fun(
-      [fastestVaryingDim](OpPointer<AffineForOp> loop, Instruction &op) {
+      [fastestVaryingDim](AffineForOp loop, Instruction &op) {
        auto load = op.dyn_cast<LoadOp>();
        auto store = op.dyn_cast<StoreOp>();
        return load ? isContiguousAccess(*loop->getInductionVar(), load,
@ -353,10 +347,10 @@ bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim(
  return isVectorizableLoopWithCond(loop, fun);
 }

-bool mlir::isVectorizableLoop(OpPointer<AffineForOp> loop) {
+bool mlir::isVectorizableLoop(AffineForOp loop) {
  VectorizableInstFun fun(
      // TODO: implement me
-      [](OpPointer<AffineForOp> loop, Instruction &op) { return true; });
+      [](AffineForOp loop, Instruction &op) { return true; });
  return isVectorizableLoopWithCond(loop, fun);
 }

@ -365,8 +359,7 @@ bool mlir::isVectorizableLoop(OpPointer<AffineForOp> loop) {
 /// 'def' and all its uses have the same shift factor.
 // TODO(mlir-team): extend this to check for memory-based dependence
 // violation when we have the support.
-bool mlir::isInstwiseShiftValid(OpPointer<AffineForOp> forOp,
-                                ArrayRef<uint64_t> shifts) {
+bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
  auto *forBody = forOp->getBody();
  assert(shifts.size() == forBody->getInstructions().size());

--- a/mlir/lib/Analysis/TestParallelismDetection.cpp
+++ b/mlir/lib/Analysis/TestParallelismDetection.cpp
@ -44,7 +44,7 @@ FunctionPassBase *mlir::createParallelismDetectionTestPass() {
 void TestParallelismDetection::runOnFunction() {
  Function *f = getFunction();
  FuncBuilder b(f);
-  f->walk<AffineForOp>([&](OpPointer<AffineForOp> forOp) {
+  f->walk<AffineForOp>([&](AffineForOp forOp) {
    if (isLoopParallel(forOp))
      forOp->emitNote("parallel loop");
  });
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@ -39,10 +39,9 @@ using llvm::SmallDenseMap;

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
 /// the outermost 'for' instruction to the innermost one.
-void mlir::getLoopIVs(Instruction &inst,
-                      SmallVectorImpl<OpPointer<AffineForOp>> *loops) {
+void mlir::getLoopIVs(Instruction &inst, SmallVectorImpl<AffineForOp> *loops) {
  auto *currInst = inst.getParentInst();
-  OpPointer<AffineForOp> currAffineForOp;
+  AffineForOp currAffineForOp;
  // Traverse up the hierarchy collecing all 'for' instruction while
  // skipping over 'if' instructions.
  while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
@ -76,7 +75,7 @@ ComputationSliceState::getAsConstraints(FlatAffineConstraints *cst) {
      // Check if the symbol is a constant.
      if (auto *inst = value->getDefiningInst()) {
        if (auto constOp = inst->dyn_cast<ConstantIndexOp>()) {
-          cst->setIdToConstant(*value, constOp->getValue());
+          cst->setIdToConstant(*value, constOp.getValue());
        }
      }
    } else {
@ -189,7 +188,7 @@ LogicalResult MemRefRegion::compute(Instruction *inst, unsigned loopDepth,
                          << "depth: " << loopDepth << "\n";);

  if (rank == 0) {
-    SmallVector<OpPointer<AffineForOp>, 4> ivs;
+    SmallVector<AffineForOp, 4> ivs;
    getLoopIVs(*inst, &ivs);
    SmallVector<Value *, 8> regionSymbols;
    extractForInductionVars(ivs, &regionSymbols);
@ -245,7 +244,7 @@ LogicalResult MemRefRegion::compute(Instruction *inst, unsigned loopDepth,
      // Check if the symbol is a constant.
      if (auto *inst = symbol->getDefiningInst()) {
        if (auto constOp = inst->dyn_cast<ConstantIndexOp>()) {
-          cst.setIdToConstant(*symbol, constOp->getValue());
+          cst.setIdToConstant(*symbol, constOp.getValue());
        }
      }
    }
@ -280,14 +279,14 @@ LogicalResult MemRefRegion::compute(Instruction *inst, unsigned loopDepth,

  // Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
  // this memref region is symbolic.
-  SmallVector<OpPointer<AffineForOp>, 4> enclosingIVs;
+  SmallVector<AffineForOp, 4> enclosingIVs;
  getLoopIVs(*inst, &enclosingIVs);
  assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
  enclosingIVs.resize(loopDepth);
  SmallVector<Value *, 4> ids;
  cst.getIdValues(cst.getNumDimIds(), cst.getNumDimAndSymbolIds(), &ids);
  for (auto *id : ids) {
-    OpPointer<AffineForOp> iv;
+    AffineForOp iv;
    if ((iv = getForInductionVarOwner(id)) &&
        llvm::is_contained(enclosingIVs, iv) == false) {
      cst.projectOut(id);
@ -371,10 +370,9 @@ Optional<uint64_t> mlir::getMemRefSizeInBytes(MemRefType memRefType) {
 template <typename LoadOrStoreOpPointer>
 LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
                                            bool emitError) {
-  static_assert(
-      std::is_same<LoadOrStoreOpPointer, OpPointer<LoadOp>>::value ||
-          std::is_same<LoadOrStoreOpPointer, OpPointer<StoreOp>>::value,
-      "argument should be either a LoadOp or a StoreOp");
+  static_assert(std::is_same<LoadOrStoreOpPointer, LoadOp>::value ||
+                    std::is_same<LoadOrStoreOpPointer, StoreOp>::value,
+                "argument should be either a LoadOp or a StoreOp");

  Instruction *opInst = loadOrStoreOp->getInstruction();

@ -424,9 +422,9 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOpPointer loadOrStoreOp,
 }

 // Explicitly instantiate the template so that the compiler knows we need them!
-template LogicalResult mlir::boundCheckLoadOrStoreOp(OpPointer<LoadOp> loadOp,
+template LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOp loadOp,
                                                     bool emitError);
-template LogicalResult mlir::boundCheckLoadOrStoreOp(OpPointer<StoreOp> storeOp,
+template LogicalResult mlir::boundCheckLoadOrStoreOp(StoreOp storeOp,
                                                     bool emitError);

 // Returns in 'positions' the Block positions of 'inst' in each ancestor
@ -490,12 +488,12 @@ LogicalResult mlir::getBackwardComputationSliceState(
    return failure();
  }
  // Get loop nest surrounding src operation.
-  SmallVector<OpPointer<AffineForOp>, 4> srcLoopIVs;
+  SmallVector<AffineForOp, 4> srcLoopIVs;
  getLoopIVs(*srcAccess.opInst, &srcLoopIVs);
  unsigned numSrcLoopIVs = srcLoopIVs.size();

  // Get loop nest surrounding dst operation.
-  SmallVector<OpPointer<AffineForOp>, 4> dstLoopIVs;
+  SmallVector<AffineForOp, 4> dstLoopIVs;
  getLoopIVs(*dstAccess.opInst, &dstLoopIVs);
  unsigned numDstLoopIVs = dstLoopIVs.size();
  if (dstLoopDepth > numDstLoopIVs) {
@ -566,21 +564,21 @@ LogicalResult mlir::getBackwardComputationSliceState(
 // entire destination index set. Subtract out the dependent destination
 // iterations from destination index set and check for emptiness --- this is one
 // solution.
-OpPointer<AffineForOp> mlir::insertBackwardComputationSlice(
+AffineForOp mlir::insertBackwardComputationSlice(
    Instruction *srcOpInst, Instruction *dstOpInst, unsigned dstLoopDepth,
    ComputationSliceState *sliceState) {
  // Get loop nest surrounding src operation.
-  SmallVector<OpPointer<AffineForOp>, 4> srcLoopIVs;
+  SmallVector<AffineForOp, 4> srcLoopIVs;
  getLoopIVs(*srcOpInst, &srcLoopIVs);
  unsigned numSrcLoopIVs = srcLoopIVs.size();

  // Get loop nest surrounding dst operation.
-  SmallVector<OpPointer<AffineForOp>, 4> dstLoopIVs;
+  SmallVector<AffineForOp, 4> dstLoopIVs;
  getLoopIVs(*dstOpInst, &dstLoopIVs);
  unsigned dstLoopIVsSize = dstLoopIVs.size();
  if (dstLoopDepth > dstLoopIVsSize) {
    dstOpInst->emitError("invalid destination loop depth");
-    return OpPointer<AffineForOp>();
+    return AffineForOp();
  }

  // Find the inst block positions of 'srcOpInst' within 'srcLoopIVs'.
@ -599,7 +597,7 @@ OpPointer<AffineForOp> mlir::insertBackwardComputationSlice(
  Instruction *sliceInst =
      getInstAtPosition(positions, /*level=*/0, sliceLoopNest->getBody());
  // Get loop nest surrounding 'sliceInst'.
-  SmallVector<OpPointer<AffineForOp>, 4> sliceSurroundingLoops;
+  SmallVector<AffineForOp, 4> sliceSurroundingLoops;
  getLoopIVs(*sliceInst, &sliceSurroundingLoops);

  // Sanity check.
@ -666,7 +664,7 @@ unsigned mlir::getNestingDepth(Instruction &inst) {
 /// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
 /// where each lists loops from outer-most to inner-most in loop nest.
 unsigned mlir::getNumCommonSurroundingLoops(Instruction &A, Instruction &B) {
-  SmallVector<OpPointer<AffineForOp>, 4> loopsA, loopsB;
+  SmallVector<AffineForOp, 4> loopsA, loopsB;
  getLoopIVs(A, &loopsA);
  getLoopIVs(B, &loopsB);

@ -728,7 +726,7 @@ static Optional<int64_t> getMemoryFootprintBytes(Block &block,
  return totalSizeInBytes;
 }

-Optional<int64_t> mlir::getMemoryFootprintBytes(OpPointer<AffineForOp> forOp,
+Optional<int64_t> mlir::getMemoryFootprintBytes(AffineForOp forOp,
                                                int memorySpace) {
  auto *forInst = forOp->getInstruction();
  return ::getMemoryFootprintBytes(
@ -739,8 +737,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(OpPointer<AffineForOp> forOp,
 /// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
 /// at 'forOp'.
 void mlir::getSequentialLoops(
-    OpPointer<AffineForOp> forOp,
-    llvm::SmallDenseSet<Value *, 8> *sequentialLoops) {
+    AffineForOp forOp, llvm::SmallDenseSet<Value *, 8> *sequentialLoops) {
  forOp->getInstruction()->walk([&](Instruction *inst) {
    if (auto innerFor = inst->dyn_cast<AffineForOp>())
      if (!isLoopParallel(innerFor))
@ -749,7 +746,7 @@ void mlir::getSequentialLoops(
 }

 /// Returns true if 'forOp' is parallel.
-bool mlir::isLoopParallel(OpPointer<AffineForOp> forOp) {
+bool mlir::isLoopParallel(AffineForOp forOp) {
  // Collect all load and store ops in loop nest rooted at 'forOp'.
  SmallVector<Instruction *, 8> loadAndStoreOpInsts;
  forOp->getInstruction()->walk([&](Instruction *opInst) {
--- a/mlir/lib/EDSC/Builders.cpp
+++ b/mlir/lib/EDSC/Builders.cpp
@ -155,8 +155,8 @@ static llvm::Optional<ValueHandle> emitStaticFor(ArrayRef<ValueHandle> lbs,
  if (!lbConst || !ubConst)
    return llvm::Optional<ValueHandle>();

-  return ValueHandle::create<AffineForOp>(lbConst->getValue(),
-                                          ubConst->getValue(), step);
+  return ValueHandle::create<AffineForOp>(lbConst.getValue(),
+                                          ubConst.getValue(), step);
 }

 mlir::edsc::LoopBuilder::LoopBuilder(ValueHandle *iv,
@ -268,10 +268,9 @@ categorizeValueByAffineType(MLIRContext *context, Value *val, unsigned &numDims,
  AffineExpr d;
  Value *resultVal = nullptr;
  auto *inst = val->getDefiningInst();
-  auto constant =
-      inst ? inst->dyn_cast<ConstantIndexOp>() : OpPointer<ConstantIndexOp>();
+  auto constant = inst ? inst->dyn_cast<ConstantIndexOp>() : ConstantIndexOp();
  if (constant) {
-    d = getAffineConstantExpr(constant->getValue(), context);
+    d = getAffineConstantExpr(constant.getValue(), context);
  } else if (isValidSymbol(val) && !isValidDim(val)) {
    d = getAffineSymbolExpr(numSymbols++, context);
    resultVal = val;
--- a/mlir/lib/EDSC/MLIREmitter.cpp
+++ b/mlir/lib/EDSC/MLIREmitter.cpp
@ -94,25 +94,24 @@ static void checkAffineProvenance(ArrayRef<Value *> values) {
  }
 }

-static OpPointer<AffineForOp> emitStaticFor(FuncBuilder &builder, Location loc,
-                                            ArrayRef<Value *> lbs,
-                                            ArrayRef<Value *> ubs,
-                                            uint64_t step) {
+static AffineForOp emitStaticFor(FuncBuilder &builder, Location loc,
+                                 ArrayRef<Value *> lbs, ArrayRef<Value *> ubs,
+                                 uint64_t step) {
  if (lbs.size() != 1 || ubs.size() != 1)
-    return OpPointer<AffineForOp>();
+    return AffineForOp();

  auto *lbDef = lbs.front()->getDefiningInst();
  auto *ubDef = ubs.front()->getDefiningInst();
  if (!lbDef || !ubDef)
-    return OpPointer<AffineForOp>();
+    return AffineForOp();

  auto lbConst = lbDef->dyn_cast<ConstantIndexOp>();
  auto ubConst = ubDef->dyn_cast<ConstantIndexOp>();
  if (!lbConst || !ubConst)
-    return OpPointer<AffineForOp>();
+    return AffineForOp();

-  return builder.create<AffineForOp>(loc, lbConst->getValue(),
-                                     ubConst->getValue(), step);
+  return builder.create<AffineForOp>(loc, lbConst.getValue(),
+                                     ubConst.getValue(), step);
 }

 Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {
@ -166,11 +165,10 @@ Value *mlir::edsc::MLIREmitter::emitExpr(Expr e) {

      // Step must be a static constant.
      auto step =
-          stepExpr->getDefiningInst()->cast<ConstantIndexOp>()->getValue();
+          stepExpr->getDefiningInst()->cast<ConstantIndexOp>().getValue();

      // Special case with more concise emitted code for static bounds.
-      OpPointer<AffineForOp> forOp =
-          emitStaticFor(*builder, location, lbs, ubs, step);
+      AffineForOp forOp = emitStaticFor(*builder, location, lbs, ubs, step);

      // General case.
      if (!forOp)
@ -387,7 +385,7 @@ mlir::edsc::MLIREmitter::makeBoundMemRefView(Expr boundMemRef) {
  return makeBoundMemRefView(v);
 }

-OpPointer<AffineForOp> mlir::edsc::MLIREmitter::getAffineForOp(Expr e) {
+AffineForOp mlir::edsc::MLIREmitter::getAffineForOp(Expr e) {
  auto *value = ssaBindings.lookup(e);
  assert(value && "Expr not bound");
  return getForInductionVarOwner(value);
--- a/mlir/lib/StandardOps/Ops.cpp
+++ b/mlir/lib/StandardOps/Ops.cpp
@ -319,10 +319,10 @@ struct SimplifyAllocConst : public RewritePattern {
        continue;
      }
      auto *defOp = allocOp->getOperand(dynamicDimPos)->getDefiningInst();
-      OpPointer<ConstantIndexOp> constantIndexOp;
+      ConstantIndexOp constantIndexOp;
      if (defOp && (constantIndexOp = defOp->dyn_cast<ConstantIndexOp>())) {
        // Dynamic shape dimension will be folded.
-        newShapeConstants.push_back(constantIndexOp->getValue());
+        newShapeConstants.push_back(constantIndexOp.getValue());
        // Record to check for zero uses later below.
        droppedOperands.push_back(constantIndexOp);
      } else {
--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@ -187,7 +187,7 @@ static bool getFullMemRefAsRegion(Instruction *opInst, unsigned numParamLoopIVs,

  // Just get the first numSymbols IVs, which the memref region is parametric
  // on.
-  SmallVector<OpPointer<AffineForOp>, 4> ivs;
+  SmallVector<AffineForOp, 4> ivs;
  getLoopIVs(*opInst, &ivs);
  ivs.resize(numParamLoopIVs);
  SmallVector<Value *, 4> symbols;
@ -485,7 +485,7 @@ bool DmaGeneration::runOnBlock(Block *block) {
  for (auto it = curBegin; it != block->end(); ++it) {
    if (auto forOp = it->dyn_cast<AffineForOp>()) {
      // Returns true if the footprint is known to exceed capacity.
-      auto exceedsCapacity = [&](OpPointer<AffineForOp> forOp) {
+      auto exceedsCapacity = [&](AffineForOp forOp) {
        Optional<int64_t> footprint =
            getMemoryFootprintBytes(forOp,
                                    /*memorySpace=*/0);
@ -553,7 +553,7 @@ findHighestBlockForPlacement(const MemRefRegion &region, Block &block,
  SmallVector<Value *, 4> symbols;
  cst->getIdValues(cst->getNumDimIds(), cst->getNumDimAndSymbolIds(), &symbols);

-  SmallVector<OpPointer<AffineForOp>, 4> enclosingFors;
+  SmallVector<AffineForOp, 4> enclosingFors;
  getLoopIVs(*block.begin(), &enclosingFors);
  // Walk up loop parents till we find an IV on which this region is
  // symbolic/variant.
@ -733,7 +733,7 @@ uint64_t DmaGeneration::runOnBlock(Block::iterator begin, Block::iterator end) {

  // For a range of operation instructions, a note will be emitted at the
  // caller.
-  OpPointer<AffineForOp> forOp;
+  AffineForOp forOp;
  uint64_t sizeInKib = llvm::divideCeil(totalDmaBuffersSizeInBytes, 1024);
  if (llvm::DebugFlag && (forOp = begin->dyn_cast<AffineForOp>())) {
    forOp->emitNote(
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@ -122,7 +122,7 @@ namespace {
 // LoopNestStateCollector walks loop nests and collects load and store
 // operations, and whether or not an IfInst was encountered in the loop nest.
 struct LoopNestStateCollector {
-  SmallVector<OpPointer<AffineForOp>, 4> forOps;
+  SmallVector<AffineForOp, 4> forOps;
  SmallVector<Instruction *, 4> loadOpInsts;
  SmallVector<Instruction *, 4> storeOpInsts;
  bool hasNonForRegion = false;
@ -691,7 +691,7 @@ bool MemRefDependenceGraph::init(Function *f) {
    auto *opInst = node.inst;
    for (auto *value : opInst->getResults()) {
      for (auto &use : value->getUses()) {
-        SmallVector<OpPointer<AffineForOp>, 4> loops;
+        SmallVector<AffineForOp, 4> loops;
        getLoopIVs(*use.getOwner(), &loops);
        if (loops.empty())
          continue;
@ -727,7 +727,7 @@ namespace {
 // and operation count) for a loop nest up until the innermost loop body.
 struct LoopNestStats {
  // Map from AffineForOp to immediate child AffineForOps in its loop body.
-  DenseMap<Instruction *, SmallVector<OpPointer<AffineForOp>, 2>> loopMap;
+  DenseMap<Instruction *, SmallVector<AffineForOp, 2>> loopMap;
  // Map from AffineForOp to count of operations in its loop body.
  DenseMap<Instruction *, uint64_t> opCountMap;
  // Map from AffineForOp to its constant trip count.
@ -743,7 +743,7 @@ struct LoopNestStatsCollector {
  LoopNestStatsCollector(LoopNestStats *stats) : stats(stats) {}

  void collect(Instruction *inst) {
-    inst->walk<AffineForOp>([&](OpPointer<AffineForOp> forOp) {
+    inst->walk<AffineForOp>([&](AffineForOp forOp) {
      auto *forInst = forOp->getInstruction();
      auto *parentInst = forOp->getInstruction()->getParentInst();
      if (parentInst != nullptr) {
@ -844,7 +844,7 @@ static Optional<uint64_t> getConstDifference(AffineMap lbMap, AffineMap ubMap) {
 static bool buildSliceTripCountMap(
    Instruction *srcOpInst, ComputationSliceState *sliceState,
    llvm::SmallDenseMap<Instruction *, uint64_t, 8> *tripCountMap) {
-  SmallVector<OpPointer<AffineForOp>, 4> srcLoopIVs;
+  SmallVector<AffineForOp, 4> srcLoopIVs;
  getLoopIVs(*srcOpInst, &srcLoopIVs);
  unsigned numSrcLoopIVs = srcLoopIVs.size();
  // Populate map from AffineForOp -> trip count
@ -892,7 +892,7 @@ static unsigned getInnermostCommonLoopDepth(ArrayRef<Instruction *> ops) {
  unsigned numOps = ops.size();
  assert(numOps > 0);

-  std::vector<SmallVector<OpPointer<AffineForOp>, 4>> loops(numOps);
+  std::vector<SmallVector<AffineForOp, 4>> loops(numOps);
  unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
  for (unsigned i = 0; i < numOps; ++i) {
    getLoopIVs(*ops[i], &loops[i]);
@ -1056,8 +1056,8 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) {
  assert(node->inst->isa<AffineForOp>());
  // Get perfectly nested sequence of loops starting at root of loop nest.
  // TODO(andydavis,bondhugula) Share this with similar code in loop tiling.
-  SmallVector<OpPointer<AffineForOp>, 4> loops;
-  OpPointer<AffineForOp> curr = node->inst->cast<AffineForOp>();
+  SmallVector<AffineForOp, 4> loops;
+  AffineForOp curr = node->inst->cast<AffineForOp>();
  loops.push_back(curr);
  auto *currBody = curr->getBody();
  while (!currBody->empty() &&
@ -1113,7 +1113,7 @@ unsigned getMemRefEltSizeInBytes(MemRefType memRefType) {
 // MemRefRegion written to by 'srcStoreOpInst' at depth 'dstLoopDepth'.
 // TODO(bondhugula): consider refactoring the common code from generateDma and
 // this one.
-static Value *createPrivateMemRef(OpPointer<AffineForOp> forOp,
+static Value *createPrivateMemRef(AffineForOp forOp,
                                  Instruction *srcStoreOpInst,
                                  unsigned dstLoopDepth,
                                  Optional<unsigned> fastMemorySpace,
@ -1429,7 +1429,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
  });

  // Compute cost of sliced and unsliced src loop nest.
-  SmallVector<OpPointer<AffineForOp>, 4> srcLoopIVs;
+  SmallVector<AffineForOp, 4> srcLoopIVs;
  getLoopIVs(*srcOpInst, &srcLoopIVs);
  unsigned numSrcLoopIVs = srcLoopIVs.size();

@ -1443,7 +1443,7 @@ static bool isFusionProfitable(Instruction *srcOpInst,
    return false;
  }
  // Compute cost of dst loop nest.
-  SmallVector<OpPointer<AffineForOp>, 4> dstLoopIVs;
+  SmallVector<AffineForOp, 4> dstLoopIVs;
  getLoopIVs(*dstLoadOpInsts[0], &dstLoopIVs);

  LoopNestStats dstLoopNestStats;
@ -1933,7 +1933,7 @@ public:
          // Fuse computation slice of 'srcLoopNest' into 'dstLoopNest'.
          auto sliceLoopNest = mlir::insertBackwardComputationSlice(
              srcStoreOpInst, dstLoadOpInsts[0], bestDstLoopDepth, &sliceState);
-          if (sliceLoopNest != nullptr) {
+          if (sliceLoopNest) {
            LLVM_DEBUG(llvm::dbgs()
                       << "\tslice loop nest:\n"
                       << *sliceLoopNest->getInstruction() << "\n");
@ -2182,8 +2182,8 @@ public:
    return false;
  }

-  void updateStateAfterSiblingFusion(OpPointer<AffineForOp> sliceLoopNest,
-                                     Node *sibNode, Node *dstNode) {
+  void updateStateAfterSiblingFusion(AffineForOp sliceLoopNest, Node *sibNode,
+                                     Node *dstNode) {
    // Update 'sibNode' and 'dstNode' input/output edges to reflect fusion.
    mdg->updateEdges(sibNode->id, dstNode->id);

--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@ -67,7 +67,7 @@ struct LoopTiling : public FunctionPass<LoopTiling> {
      : cacheSizeBytes(cacheSizeBytes), avoidMaxMinBounds(avoidMaxMinBounds) {}

  void runOnFunction() override;
-  void getTileSizes(ArrayRef<OpPointer<AffineForOp>> band,
+  void getTileSizes(ArrayRef<AffineForOp> band,
                    SmallVectorImpl<unsigned> *tileSizes);

  // Default tile size if nothing is provided.
@ -90,7 +90,7 @@ FunctionPassBase *mlir::createLoopTilingPass(uint64_t cacheSizeBytes) {

 // Move the loop body of AffineForOp 'src' from 'src' into the specified
 // location in destination's body.
-static inline void moveLoopBody(AffineForOp *src, AffineForOp *dest,
+static inline void moveLoopBody(AffineForOp src, AffineForOp dest,
                                Block::iterator loc) {
  dest->getBody()->getInstructions().splice(loc,
                                            src->getBody()->getInstructions());
@ -98,7 +98,7 @@ static inline void moveLoopBody(AffineForOp *src, AffineForOp *dest,

 // Move the loop body of AffineForOp 'src' from 'src' to the start of dest's
 // body.
-static inline void moveLoopBody(AffineForOp *src, AffineForOp *dest) {
+static inline void moveLoopBody(AffineForOp src, AffineForOp dest) {
  moveLoopBody(src, dest, dest->getBody()->begin());
 }

@ -107,10 +107,10 @@ static inline void moveLoopBody(AffineForOp *src, AffineForOp *dest) {
 /// depend on other dimensions. Bounds of each dimension can thus be treated
 /// independently, and deriving the new bounds is much simpler and faster
 /// than for the case of tiling arbitrary polyhedral shapes.
-static void constructTiledIndexSetHyperRect(
-    MutableArrayRef<OpPointer<AffineForOp>> origLoops,
-    MutableArrayRef<OpPointer<AffineForOp>> newLoops,
-    ArrayRef<unsigned> tileSizes) {
+static void
+constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
+                                MutableArrayRef<AffineForOp> newLoops,
+                                ArrayRef<unsigned> tileSizes) {
  assert(!origLoops.empty());
  assert(origLoops.size() == tileSizes.size());

@ -174,7 +174,7 @@ static void constructTiledIndexSetHyperRect(
 /// Tiles the specified band of perfectly nested loops creating tile-space loops
 /// and intra-tile loops. A band is a contiguous set of loops.
 //  TODO(bondhugula): handle non hyper-rectangular spaces.
-LogicalResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
+LogicalResult mlir::tileCodeGen(MutableArrayRef<AffineForOp> band,
                                ArrayRef<unsigned> tileSizes) {
  assert(!band.empty());
  assert(band.size() == tileSizes.size() && "Incorrect number of tile sizes");
@ -187,13 +187,13 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,

  auto origLoops = band;

-  OpPointer<AffineForOp> rootAffineForOp = origLoops[0];
+  AffineForOp rootAffineForOp = origLoops[0];
  auto loc = rootAffineForOp->getLoc();
  // Note that width is at least one since band isn't empty.
  unsigned width = band.size();

-  SmallVector<OpPointer<AffineForOp>, 12> newLoops(2 * width);
-  OpPointer<AffineForOp> innermostPointLoop;
+  SmallVector<AffineForOp, 12> newLoops(2 * width);
+  AffineForOp innermostPointLoop;

  // The outermost among the loops as we add more..
  auto *topLoop = rootAffineForOp->getInstruction();
@ -256,13 +256,12 @@ LogicalResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
 // Identify valid and profitable bands of loops to tile. This is currently just
 // a temporary placeholder to test the mechanics of tiled code generation.
 // Returns all maximal outermost perfect loop nests to tile.
-static void
-getTileableBands(Function *f,
-                 std::vector<SmallVector<OpPointer<AffineForOp>, 6>> *bands) {
+static void getTileableBands(Function *f,
+                             std::vector<SmallVector<AffineForOp, 6>> *bands) {
  // Get maximal perfect nest of 'for' insts starting from root (inclusive).
-  auto getMaximalPerfectLoopNest = [&](OpPointer<AffineForOp> root) {
-    SmallVector<OpPointer<AffineForOp>, 6> band;
-    OpPointer<AffineForOp> currInst = root;
+  auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
+    SmallVector<AffineForOp, 6> band;
+    AffineForOp currInst = root;
    do {
      band.push_back(currInst);
    } while (currInst->getBody()->getInstructions().size() == 1 &&
@ -278,7 +277,7 @@ getTileableBands(Function *f,

 // Reduce each tile size to the largest divisor of the corresponding trip count
 // (if the trip count is known).
-static void adjustToDivisorsOfTripCounts(ArrayRef<OpPointer<AffineForOp>> band,
+static void adjustToDivisorsOfTripCounts(ArrayRef<AffineForOp> band,
                                         SmallVectorImpl<unsigned> *tileSizes) {
  assert(band.size() == tileSizes->size() && "invalid tile size count");
  for (unsigned i = 0, e = band.size(); i < e; i++) {
@ -302,7 +301,7 @@ static void adjustToDivisorsOfTripCounts(ArrayRef<OpPointer<AffineForOp>> band,
 // along each of the dimensions being tiled.
 // TODO(mlir-team): evolve this model. Tile size determination is a large area
 // to play with in general.
-void LoopTiling::getTileSizes(ArrayRef<OpPointer<AffineForOp>> band,
+void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
                              SmallVectorImpl<unsigned> *tileSizes) {
  if (band.empty())
    return;
@ -383,7 +382,7 @@ void LoopTiling::runOnFunction() {
    cacheSizeBytes = clCacheSizeKiB * 1024;

  // Bands of loops to tile.
-  std::vector<SmallVector<OpPointer<AffineForOp>, 6>> bands;
+  std::vector<SmallVector<AffineForOp, 6>> bands;
  getTileableBands(getFunction(), &bands);

  for (auto &band : bands) {
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@ -69,19 +69,18 @@ struct LoopUnroll : public FunctionPass<LoopUnroll> {
  const Optional<bool> unrollFull;
  // Callback to obtain unroll factors; if this has a callable target, takes
  // precedence over command-line argument or passed argument.
-  const std::function<unsigned(OpPointer<AffineForOp>)> getUnrollFactor;
+  const std::function<unsigned(AffineForOp)> getUnrollFactor;

-  explicit LoopUnroll(Optional<unsigned> unrollFactor = None,
-                      Optional<bool> unrollFull = None,
-                      const std::function<unsigned(OpPointer<AffineForOp>)>
-                          &getUnrollFactor = nullptr)
+  explicit LoopUnroll(
+      Optional<unsigned> unrollFactor = None, Optional<bool> unrollFull = None,
+      const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr)
      : unrollFactor(unrollFactor), unrollFull(unrollFull),
        getUnrollFactor(getUnrollFactor) {}

  void runOnFunction() override;

  /// Unroll this for inst. Returns failure if nothing was done.
-  LogicalResult runOnAffineForOp(OpPointer<AffineForOp> forOp);
+  LogicalResult runOnAffineForOp(AffineForOp forOp);

  static const unsigned kDefaultUnrollFactor = 4;
 };
@ -91,7 +90,7 @@ void LoopUnroll::runOnFunction() {
  // Gathers all innermost loops through a post order pruned walk.
  struct InnermostLoopGatherer {
    // Store innermost loops as we walk.
-    std::vector<OpPointer<AffineForOp>> loops;
+    std::vector<AffineForOp> loops;

    void walkPostOrder(Function *f) {
      for (auto &b : *f)
@ -124,18 +123,16 @@ void LoopUnroll::runOnFunction() {
  if (clUnrollFull.getNumOccurrences() > 0 &&
      clUnrollFullThreshold.getNumOccurrences() > 0) {
    // Store short loops as we walk.
-    std::vector<OpPointer<AffineForOp>> loops;
+    std::vector<AffineForOp> loops;

    // Gathers all loops with trip count <= minTripCount. Do a post order walk
    // so that loops are gathered from innermost to outermost (or else unrolling
    // an outer one may delete gathered inner ones).
-    getFunction()->walkPostOrder<AffineForOp>(
-        [&](OpPointer<AffineForOp> forOp) {
-          Optional<uint64_t> tripCount = getConstantTripCount(forOp);
-          if (tripCount.hasValue() &&
-              tripCount.getValue() <= clUnrollFullThreshold)
-            loops.push_back(forOp);
-        });
+    getFunction()->walkPostOrder<AffineForOp>([&](AffineForOp forOp) {
+      Optional<uint64_t> tripCount = getConstantTripCount(forOp);
+      if (tripCount.hasValue() && tripCount.getValue() <= clUnrollFullThreshold)
+        loops.push_back(forOp);
+    });
    for (auto forOp : loops)
      loopUnrollFull(forOp);
    return;
@ -163,7 +160,7 @@ void LoopUnroll::runOnFunction() {

 /// Unrolls a 'for' inst. Returns success if the loop was unrolled, failure
 /// otherwise. The default unroll factor is 4.
-LogicalResult LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
+LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
  // Use the function callback if one was provided.
  if (getUnrollFactor) {
    return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
@ -185,7 +182,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {

 FunctionPassBase *mlir::createLoopUnrollPass(
    int unrollFactor, int unrollFull,
-    const std::function<unsigned(OpPointer<AffineForOp>)> &getUnrollFactor) {
+    const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
  return new LoopUnroll(
      unrollFactor == -1 ? None : Optional<unsigned>(unrollFactor),
      unrollFull == -1 ? None : Optional<bool>(unrollFull), getUnrollFactor);
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@ -78,7 +78,7 @@ struct LoopUnrollAndJam : public FunctionPass<LoopUnrollAndJam> {
      : unrollJamFactor(unrollJamFactor) {}

  void runOnFunction() override;
-  LogicalResult runOnAffineForOp(OpPointer<AffineForOp> forOp);
+  LogicalResult runOnAffineForOp(AffineForOp forOp);
 };
 } // end anonymous namespace

@ -98,7 +98,7 @@ void LoopUnrollAndJam::runOnFunction() {

 /// Unroll and jam a 'for' inst. Default unroll jam factor is
 /// kDefaultUnrollJamFactor. Return failure if nothing was done.
-LogicalResult LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
+LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
  // Unroll and jam by the factor that was passed if any.
  if (unrollJamFactor.hasValue())
    return loopUnrollJamByFactor(forOp, unrollJamFactor.getValue());
@ -110,7 +110,7 @@ LogicalResult LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
  return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
 }

-LogicalResult mlir::loopUnrollJamUpToFactor(OpPointer<AffineForOp> forOp,
+LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
                                            uint64_t unrollJamFactor) {
  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);

@ -121,7 +121,7 @@ LogicalResult mlir::loopUnrollJamUpToFactor(OpPointer<AffineForOp> forOp,
 }

 /// Unrolls and jams this loop by the specified factor.
-LogicalResult mlir::loopUnrollJamByFactor(OpPointer<AffineForOp> forOp,
+LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
                                          uint64_t unrollJamFactor) {
  // Gathers all maximal sub-blocks of instructions that do not themselves
  // include a for inst (a instruction could have a descendant for inst though
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@ -244,9 +244,9 @@ namespace {
 struct LowerAffinePass : public FunctionPass<LowerAffinePass> {
  void runOnFunction() override;

-  bool lowerAffineFor(OpPointer<AffineForOp> forOp);
-  bool lowerAffineIf(AffineIfOp *ifOp);
-  bool lowerAffineApply(AffineApplyOp *op);
+  bool lowerAffineFor(AffineForOp forOp);
+  bool lowerAffineIf(AffineIfOp ifOp);
+  bool lowerAffineApply(AffineApplyOp op);
 };
 } // end anonymous namespace

@ -319,7 +319,7 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
 //      |   <code after the AffineForOp> |
 //      +--------------------------------+
 //
-bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
+bool LowerAffinePass::lowerAffineFor(AffineForOp forOp) {
  auto loc = forOp->getLoc();
  auto *forInst = forOp->getInstruction();

@ -452,7 +452,7 @@ bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
 //      |   <code after the AffineIfOp>      |
 //      +--------------------------------+
 //
-bool LowerAffinePass::lowerAffineIf(AffineIfOp *ifOp) {
+bool LowerAffinePass::lowerAffineIf(AffineIfOp ifOp) {
  auto *ifInst = ifOp->getInstruction();
  auto loc = ifInst->getLoc();

@ -568,7 +568,7 @@ bool LowerAffinePass::lowerAffineIf(AffineIfOp *ifOp) {

 // Convert an "affine.apply" operation into a sequence of arithmetic
 // instructions using the StandardOps dialect.  Return true on error.
-bool LowerAffinePass::lowerAffineApply(AffineApplyOp *op) {
+bool LowerAffinePass::lowerAffineApply(AffineApplyOp op) {
  FuncBuilder builder(op->getInstruction());
  auto maybeExpandedMap =
      expandAffineMap(&builder, op->getLoc(), op->getAffineMap(),
--- a/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp
@ -102,7 +102,7 @@ namespace {
 /// a VectorTransferWriteOp is rewritten.
 template <typename VectorTransferOpTy> class VectorTransferRewriter {
 public:
-  VectorTransferRewriter(VectorTransferOpTy *transfer,
+  VectorTransferRewriter(VectorTransferOpTy transfer,
                         MLFuncLoweringRewriter *rewriter,
                         MLFuncGlobalLoweringState *state);

@ -121,7 +121,7 @@ public:
  void rewrite();

 private:
-  VectorTransferOpTy *transfer;
+  VectorTransferOpTy transfer;
  MLFuncLoweringRewriter *rewriter;
  MLFuncGlobalLoweringState *state;
 };
@ -132,7 +132,7 @@ private:
 /// `pivs` and `vectorView` are swapped so that the invocation of
 /// LoopNestBuilder captures it in the innermost loop.
 template <typename VectorTransferOpTy>
-void coalesceCopy(VectorTransferOpTy *transfer,
+void coalesceCopy(VectorTransferOpTy transfer,
                  SmallVectorImpl<edsc::ValueHandle *> *pivs,
                  edsc::VectorView *vectorView) {
  // rank of the remote memory access, coalescing behavior occurs on the
@ -166,7 +166,7 @@ void coalesceCopy(VectorTransferOpTy *transfer,
 /// MemRef.
 template <typename VectorTransferOpTy>
 static llvm::SmallVector<edsc::ValueHandle, 8>
-clip(VectorTransferOpTy *transfer, edsc::MemRefView &view,
+clip(VectorTransferOpTy transfer, edsc::MemRefView &view,
     ArrayRef<edsc::IndexHandle> ivs) {
  using namespace mlir::edsc;
  using namespace edsc::op;
@ -216,7 +216,7 @@ clip(VectorTransferOpTy *transfer, edsc::MemRefView &view,

 template <typename VectorTransferOpTy>
 VectorTransferRewriter<VectorTransferOpTy>::VectorTransferRewriter(
-    VectorTransferOpTy *transfer, MLFuncLoweringRewriter *rewriter,
+    VectorTransferOpTy transfer, MLFuncLoweringRewriter *rewriter,
    MLFuncGlobalLoweringState *state)
    : transfer(transfer), rewriter(rewriter), state(state){};

@ -368,7 +368,7 @@ public:
                     std::unique_ptr<PatternState> opState,
                     MLFuncLoweringRewriter *rewriter) const override {
    VectorTransferRewriter<VectorTransferOpTy>(
-        &*op->dyn_cast<VectorTransferOpTy>(), rewriter, funcWiseState)
+        op->dyn_cast<VectorTransferOpTy>(), rewriter, funcWiseState)
        .rewrite();
  }
 };
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@ -441,7 +441,7 @@ static Instruction *instantiate(FuncBuilder *b, Instruction *opInst,
 /// In particular, if a dimension is fully instantiated (i.e. unrolled) then it
 /// is projected out in the final result.
 template <typename VectorTransferOpTy>
-static AffineMap projectedPermutationMap(VectorTransferOpTy *transfer,
+static AffineMap projectedPermutationMap(VectorTransferOpTy transfer,
                                         VectorType hwVectorType) {
  static_assert(
      std::is_same<VectorTransferOpTy, VectorTransferReadOp>::value ||
@ -481,7 +481,7 @@ static AffineMap projectedPermutationMap(VectorTransferOpTy *transfer,
 /// `hwVectorType` int the covering of the super-vector type. For a more
 /// detailed description of the problem, see the description of
 /// reindexAffineIndices.
-static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp *read,
+static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp read,
                                VectorType hwVectorType,
                                ArrayRef<unsigned> hwVectorInstance,
                                DenseMap<Value *, Value *> *substitutionsMap) {
@ -505,7 +505,7 @@ static Instruction *instantiate(FuncBuilder *b, VectorTransferReadOp *read,
 /// `hwVectorType` int the covering of th3e super-vector type. For a more
 /// detailed description of the problem, see the description of
 /// reindexAffineIndices.
-static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp *write,
+static Instruction *instantiate(FuncBuilder *b, VectorTransferWriteOp write,
                                VectorType hwVectorType,
                                ArrayRef<unsigned> hwVectorInstance,
                                DenseMap<Value *, Value *> *substitutionsMap) {
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@ -72,7 +72,7 @@ namespace {
 struct MemRefDataFlowOpt : public FunctionPass<MemRefDataFlowOpt> {
  void runOnFunction() override;

-  void forwardStoreToLoad(OpPointer<LoadOp> loadOp);
+  void forwardStoreToLoad(LoadOp loadOp);

  // A list of memref's that are potentially dead / could be eliminated.
  SmallPtrSet<Value *, 4> memrefsToErase;
@ -93,7 +93,7 @@ FunctionPassBase *mlir::createMemRefDataFlowOptPass() {

 // This is a straightforward implementation not optimized for speed. Optimize
 // this in the future if needed.
-void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
+void MemRefDataFlowOpt::forwardStoreToLoad(LoadOp loadOp) {
  Instruction *lastWriteStoreOp = nullptr;
  Instruction *loadOpInst = loadOp->getInstruction();

@ -224,8 +224,7 @@ void MemRefDataFlowOpt::runOnFunction() {
  memrefsToErase.clear();

  // Walk all load's and perform load/store forwarding.
-  f->walk<LoadOp>(
-      [&](OpPointer<LoadOp> loadOp) { forwardStoreToLoad(loadOp); });
+  f->walk<LoadOp>([&](LoadOp loadOp) { forwardStoreToLoad(loadOp); });

  // Erase all load op's whose results were replaced with store fwd'ed ones.
  for (auto *loadOp : loadOpsToErase) {
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@ -40,9 +40,9 @@ namespace {

 struct PipelineDataTransfer : public FunctionPass<PipelineDataTransfer> {
  void runOnFunction() override;
-  void runOnAffineForOp(OpPointer<AffineForOp> forOp);
+  void runOnAffineForOp(AffineForOp forOp);

-  std::vector<OpPointer<AffineForOp>> forOps;
+  std::vector<AffineForOp> forOps;
 };

 } // end anonymous namespace
@ -71,7 +71,7 @@ static unsigned getTagMemRefPos(Instruction &dmaInst) {
 /// of the old memref by the new one while indexing the newly added dimension by
 /// the loop IV of the specified 'for' instruction modulo 2. Returns false if
 /// such a replacement cannot be performed.
-static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
+static bool doubleBuffer(Value *oldMemRef, AffineForOp forOp) {
  auto *forBody = forOp->getBody();
  FuncBuilder bInner(forBody, forBody->begin());
  bInner.setInsertionPoint(forBody, forBody->begin());
@ -145,14 +145,13 @@ void PipelineDataTransfer::runOnFunction() {
  // epilogue).
  forOps.clear();
  getFunction()->walkPostOrder<AffineForOp>(
-      [&](OpPointer<AffineForOp> forOp) { forOps.push_back(forOp); });
+      [&](AffineForOp forOp) { forOps.push_back(forOp); });
  for (auto forOp : forOps)
    runOnAffineForOp(forOp);
 }

 // Check if tags of the dma start op and dma wait op match.
-static bool checkTagMatch(OpPointer<DmaStartOp> startOp,
-                          OpPointer<DmaWaitOp> waitOp) {
+static bool checkTagMatch(DmaStartOp startOp, DmaWaitOp waitOp) {
  if (startOp->getTagMemRef() != waitOp->getTagMemRef())
    return false;
  auto startIndices = startOp->getTagIndices();
@ -176,15 +175,14 @@ static bool checkTagMatch(OpPointer<DmaStartOp> startOp,

 // Identify matching DMA start/finish instructions to overlap computation with.
 static void findMatchingStartFinishInsts(
-    OpPointer<AffineForOp> forOp,
+    AffineForOp forOp,
    SmallVectorImpl<std::pair<Instruction *, Instruction *>> &startWaitPairs) {

  // Collect outgoing DMA instructions - needed to check for dependences below.
-  SmallVector<OpPointer<DmaStartOp>, 4> outgoingDmaOps;
+  SmallVector<DmaStartOp, 4> outgoingDmaOps;
  for (auto &inst : *forOp->getBody()) {
-    OpPointer<DmaStartOp> dmaStartOp;
-    if ((dmaStartOp = inst.dyn_cast<DmaStartOp>()) &&
-        dmaStartOp->isSrcMemorySpaceFaster())
+    auto dmaStartOp = inst.dyn_cast<DmaStartOp>();
+    if (dmaStartOp && dmaStartOp->isSrcMemorySpaceFaster())
      outgoingDmaOps.push_back(dmaStartOp);
  }

@ -195,9 +193,10 @@ static void findMatchingStartFinishInsts(
      dmaFinishInsts.push_back(&inst);
      continue;
    }
-    OpPointer<DmaStartOp> dmaStartOp;
-    if (!(dmaStartOp = inst.dyn_cast<DmaStartOp>()))
+    auto dmaStartOp = inst.dyn_cast<DmaStartOp>();
+    if (!dmaStartOp)
      continue;
+
    // Only DMAs incoming into higher memory spaces are pipelined for now.
    // TODO(bondhugula): handle outgoing DMA pipelining.
    if (!dmaStartOp->isDestMemorySpaceFaster())
@ -247,7 +246,7 @@ static void findMatchingStartFinishInsts(
 /// Overlap DMA transfers with computation in this loop. If successful,
 /// 'forOp' is deleted, and a prologue, a new pipelined loop, and epilogue are
 /// inserted right before where it was.
-void PipelineDataTransfer::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
+void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
  auto mayBeConstTripCount = getConstantTripCount(forOp);
  if (!mayBeConstTripCount.hasValue()) {
    LLVM_DEBUG(
@ -329,7 +328,7 @@ void PipelineDataTransfer::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
    assert(dmaStartInst->isa<DmaStartOp>());
    instShiftMap[dmaStartInst] = 0;
    // Set shifts for DMA start inst's affine operand computation slices to 0.
-    SmallVector<OpPointer<AffineApplyOp>, 4> sliceOps;
+    SmallVector<AffineApplyOp, 4> sliceOps;
    mlir::createAffineComputationSlice(dmaStartInst, &sliceOps);
    if (!sliceOps.empty()) {
      for (auto sliceOp : sliceOps) {
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@ -43,8 +43,8 @@ using namespace mlir;
 /// part of the unrolled loop. Computes the bound as an AffineMap with its
 /// operands or a null map when the trip count can't be expressed as an affine
 /// expression.
-void mlir::getCleanupLoopLowerBound(OpPointer<AffineForOp> forOp,
-                                    unsigned unrollFactor, AffineMap *map,
+void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
+                                    AffineMap *map,
                                    SmallVectorImpl<Value *> *operands,
                                    FuncBuilder *b) {
  auto lbMap = forOp->getLowerBoundMap();
@ -67,11 +67,8 @@ void mlir::getCleanupLoopLowerBound(OpPointer<AffineForOp> forOp,

  unsigned step = forOp->getStep();

-  // We need to get non-const operands; we aren't changing them here.
-  auto ncForOp = *reinterpret_cast<OpPointer<AffineForOp> *>(&forOp);
-
-  SmallVector<Value *, 4> lbOperands(ncForOp->getLowerBoundOperands());
-  auto lb = b->create<AffineApplyOp>(ncForOp->getLoc(), lbMap, lbOperands);
+  SmallVector<Value *, 4> lbOperands(forOp->getLowerBoundOperands());
+  auto lb = b->create<AffineApplyOp>(forOp->getLoc(), lbMap, lbOperands);

  // For each upper bound expr, get the range.
  // Eg: for %i = lb to min (ub1, ub2),
@ -115,7 +112,7 @@ void mlir::getCleanupLoopLowerBound(OpPointer<AffineForOp> forOp,
 /// Promotes the loop body of a forOp to its containing block if the forOp
 /// was known to have a single iteration.
 // TODO(bondhugula): extend this for arbitrary affine bounds.
-LogicalResult mlir::promoteIfSingleIteration(OpPointer<AffineForOp> forOp) {
+LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
  Optional<uint64_t> tripCount = getConstantTripCount(forOp);
  if (!tripCount.hasValue() || tripCount.getValue() != 1)
    return failure();
@ -161,7 +158,7 @@ LogicalResult mlir::promoteIfSingleIteration(OpPointer<AffineForOp> forOp) {
 void mlir::promoteSingleIterationLoops(Function *f) {
  // Gathers all innermost loops through a post order pruned walk.
  f->walkPostOrder<AffineForOp>(
-      [](OpPointer<AffineForOp> forOp) { promoteIfSingleIteration(forOp); });
+      [](AffineForOp forOp) { promoteIfSingleIteration(forOp); });
 }

 /// Generates a 'for' inst with the specified lower and upper bounds while
@ -171,12 +168,11 @@ void mlir::promoteSingleIterationLoops(Function *f) {
 /// the pair specifies the shift applied to that group of instructions; note
 /// that the shift is multiplied by the loop step before being applied. Returns
 /// nullptr if the generated loop simplifies to a single iteration one.
-static OpPointer<AffineForOp>
+static AffineForOp
 generateLoop(AffineMap lbMap, AffineMap ubMap,
             const std::vector<std::pair<uint64_t, ArrayRef<Instruction *>>>
                 &instGroupQueue,
-             unsigned offset, OpPointer<AffineForOp> srcForInst,
-             FuncBuilder *b) {
+             unsigned offset, AffineForOp srcForInst, FuncBuilder *b) {
  SmallVector<Value *, 4> lbOperands(srcForInst->getLowerBoundOperands());
  SmallVector<Value *, 4> ubOperands(srcForInst->getUpperBoundOperands());

@ -216,7 +212,7 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
    }
  }
  if (succeeded(promoteIfSingleIteration(loopChunk)))
-    return OpPointer<AffineForOp>();
+    return AffineForOp();
  return loopChunk;
 }

@ -235,8 +231,7 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
 // asserts preservation of SSA dominance. A check for that as well as that for
 // memory-based depedence preservation check rests with the users of this
 // method.
-LogicalResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
-                                 ArrayRef<uint64_t> shifts,
+LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                                 bool unrollPrologueEpilogue) {
  if (forOp->getBody()->empty())
    return success();
@ -285,8 +280,8 @@ LogicalResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
  // Nevertheless, if 'unrollPrologueEpilogue' is set, we will treat the first
  // loop generated as the prologue and the last as epilogue and unroll these
  // fully.
-  OpPointer<AffineForOp> prologue;
-  OpPointer<AffineForOp> epilogue;
+  AffineForOp prologue;
+  AffineForOp epilogue;

  // Do a sweep over the sorted shifts while storing open groups in a
  // vector, and generating loop portions as necessary during the sweep. A block
@ -306,7 +301,7 @@ LogicalResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
      // The interval for which the loop needs to be generated here is:
      // [lbShift, min(lbShift + tripCount, d)) and the body of the
      // loop needs to have all instructions in instQueue in that order.
-      OpPointer<AffineForOp> res;
+      AffineForOp res;
      if (lbShift + tripCount * step < d * step) {
        res = generateLoop(
            b.getShiftedAffineMap(origLbMap, lbShift),
@ -357,7 +352,7 @@ LogicalResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,
 }

 /// Unrolls this loop completely.
-LogicalResult mlir::loopUnrollFull(OpPointer<AffineForOp> forOp) {
+LogicalResult mlir::loopUnrollFull(AffineForOp forOp) {
  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
  if (mayBeConstantTripCount.hasValue()) {
    uint64_t tripCount = mayBeConstantTripCount.getValue();
@ -371,7 +366,7 @@ LogicalResult mlir::loopUnrollFull(OpPointer<AffineForOp> forOp) {

 /// Unrolls and jams this loop by the specified factor or by the trip count (if
 /// constant) whichever is lower.
-LogicalResult mlir::loopUnrollUpToFactor(OpPointer<AffineForOp> forOp,
+LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
                                         uint64_t unrollFactor) {
  Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);

@ -383,7 +378,7 @@ LogicalResult mlir::loopUnrollUpToFactor(OpPointer<AffineForOp> forOp,

 /// Unrolls this loop by the specified factor. Returns success if the loop
 /// is successfully unrolled.
-LogicalResult mlir::loopUnrollByFactor(OpPointer<AffineForOp> forOp,
+LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
                                       uint64_t unrollFactor) {
  assert(unrollFactor >= 1 && "unroll factor should be >= 1");

@ -471,8 +466,7 @@ LogicalResult mlir::loopUnrollByFactor(OpPointer<AffineForOp> forOp,

 /// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
 /// nested within 'forOpA' as the only instruction in its block.
-void mlir::interchangeLoops(OpPointer<AffineForOp> forOpA,
-                            OpPointer<AffineForOp> forOpB) {
+void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
  auto *forOpAInst = forOpA->getInstruction();
  // 1) Slice forOpA's instruction list (which is just forOpB) just before
  // forOpA (in forOpA's parent's block) this should leave 'forOpA's
@ -492,11 +486,10 @@ void mlir::interchangeLoops(OpPointer<AffineForOp> forOpA,

 /// Performs a series of loop interchanges to sink 'forOp' 'loopDepth' levels
 /// deeper in the loop nest.
-void mlir::sinkLoop(OpPointer<AffineForOp> forOp, unsigned loopDepth) {
+void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) {
  for (unsigned i = 0; i < loopDepth; ++i) {
    assert(forOp->getBody()->front().isa<AffineForOp>());
-    OpPointer<AffineForOp> nextForOp =
-        forOp->getBody()->front().cast<AffineForOp>();
+    AffineForOp nextForOp = forOp->getBody()->front().cast<AffineForOp>();
    interchangeLoops(forOp, nextForOp);
  }
 }
@ -525,8 +518,8 @@ static void augmentMapAndBounds(FuncBuilder *b, Value *iv, AffineMap *map,
 // substituting `oldIv` in place of
 // `forOp.getInductionVariable()`.
 // Note: `newForOp` may be nested under `forOp`.
-static void cloneLoopBodyInto(OpPointer<AffineForOp> forOp, Value *oldIv,
-                              OpPointer<AffineForOp> newForOp) {
+static void cloneLoopBodyInto(AffineForOp forOp, Value *oldIv,
+                              AffineForOp newForOp) {
  BlockAndValueMapping map;
  map.map(oldIv, newForOp->getInductionVar());
  FuncBuilder b(newForOp->getBody(), newForOp->getBody()->end());
@ -554,9 +547,9 @@ static void cloneLoopBodyInto(OpPointer<AffineForOp> forOp, Value *oldIv,
 // responsibility to specify `targets` that are dominated by `forOp`.
 // Returns the new AffineForOps, one per `targets`, nested immediately under
 // each of the `targets`.
-static SmallVector<OpPointer<AffineForOp>, 8>
-stripmineSink(OpPointer<AffineForOp> forOp, uint64_t factor,
-              ArrayRef<OpPointer<AffineForOp>> targets) {
+static SmallVector<AffineForOp, 8>
+stripmineSink(AffineForOp forOp, uint64_t factor,
+              ArrayRef<AffineForOp> targets) {
  // TODO(ntv): Use cheap structural assertions that targets are nested under
  // forOp and that targets are not nested under each other when DominanceInfo
  // exposes the capability. It seems overkill to construct a whole function
@ -579,7 +572,7 @@ stripmineSink(OpPointer<AffineForOp> forOp, uint64_t factor,
  augmentMapAndBounds(&b, forOp->getInductionVar(), &ubMap, &ubOperands,
                      /*offset=*/scaledStep);

-  SmallVector<OpPointer<AffineForOp>, 8> innerLoops;
+  SmallVector<AffineForOp, 8> innerLoops;
  for (auto t : targets) {
    // Insert newForOp at the end of `t`.
    FuncBuilder b(t->getBody(), t->getBody()->end());
@ -601,21 +594,18 @@ stripmineSink(OpPointer<AffineForOp> forOp, uint64_t factor,

 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
 // Returns the new AffineForOps, nested immediately under `target`.
-OpPointer<AffineForOp> stripmineSink(OpPointer<AffineForOp> forOp,
-                                     uint64_t factor,
-                                     OpPointer<AffineForOp> target) {
-  auto res =
-      stripmineSink(forOp, factor, ArrayRef<OpPointer<AffineForOp>>{target});
+AffineForOp stripmineSink(AffineForOp forOp, uint64_t factor,
+                          AffineForOp target) {
+  auto res = stripmineSink(forOp, factor, ArrayRef<AffineForOp>{target});
  assert(res.size() == 1 && "Expected 1 inner forOp");
  return res[0];
 }

-SmallVector<SmallVector<OpPointer<AffineForOp>, 8>, 8>
-mlir::tile(ArrayRef<OpPointer<AffineForOp>> forOps, ArrayRef<uint64_t> sizes,
-           ArrayRef<OpPointer<AffineForOp>> targets) {
-  SmallVector<SmallVector<OpPointer<AffineForOp>, 8>, 8> res;
-  SmallVector<OpPointer<AffineForOp>, 8> currentTargets(targets.begin(),
-                                                        targets.end());
+SmallVector<SmallVector<AffineForOp, 8>, 8>
+mlir::tile(ArrayRef<AffineForOp> forOps, ArrayRef<uint64_t> sizes,
+           ArrayRef<AffineForOp> targets) {
+  SmallVector<SmallVector<AffineForOp, 8>, 8> res;
+  SmallVector<AffineForOp, 8> currentTargets(targets.begin(), targets.end());
  for (auto it : llvm::zip(forOps, sizes)) {
    auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
    res.push_back(step);
@ -624,8 +614,8 @@ mlir::tile(ArrayRef<OpPointer<AffineForOp>> forOps, ArrayRef<uint64_t> sizes,
  return res;
 }

-SmallVector<OpPointer<AffineForOp>, 8>
-mlir::tile(ArrayRef<OpPointer<AffineForOp>> forOps, ArrayRef<uint64_t> sizes,
-           OpPointer<AffineForOp> target) {
-  return tile(forOps, sizes, ArrayRef<OpPointer<AffineForOp>>{target})[0];
+SmallVector<AffineForOp, 8> mlir::tile(ArrayRef<AffineForOp> forOps,
+                                       ArrayRef<uint64_t> sizes,
+                                       AffineForOp target) {
+  return tile(forOps, sizes, ArrayRef<AffineForOp>{target})[0];
 }
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@ -221,7 +221,7 @@ bool mlir::replaceAllMemRefUsesWith(Value *oldMemRef, Value *newMemRef,
 /// uses besides this opInst; otherwise returns the list of affine.apply
 /// operations created in output argument `sliceOps`.
 void mlir::createAffineComputationSlice(
-    Instruction *opInst, SmallVectorImpl<OpPointer<AffineApplyOp>> *sliceOps) {
+    Instruction *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
  // Collect all operands that are results of affine apply ops.
  SmallVector<Value *, 4> subOperands;
  subOperands.reserve(opInst->getNumOperands());
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@ -853,7 +853,7 @@ static LogicalResult vectorizeRootOrTerminal(Value *iv,

 /// Coarsens the loops bounds and transforms all remaining load and store
 /// operations into the appropriate vector_transfer.
-static LogicalResult vectorizeAffineForOp(AffineForOp *loop, int64_t step,
+static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
                                          VectorizationState *state) {
  using namespace functional;
  loop->setStep(step);
@ -936,7 +936,7 @@ vectorizeLoopsAndLoadsRecursively(NestedMatch oneMatch,
  LLVM_DEBUG(dbgs() << "\n[early-vect] vectorizeForOp by " << vectorSize
                    << " : ");
  LLVM_DEBUG(loopInst->print(dbgs()));
-  return vectorizeAffineForOp(loop, loop->getStep() * vectorSize, state);
+  return vectorizeAffineForOp(loop, loop.getStep() * vectorSize, state);
 }

 /// Tries to transform a scalar constant into a vector splat of that constant.
@ -1012,7 +1012,7 @@ static Value *vectorizeOperand(Value *operand, Instruction *inst,
  // 3. vectorize constant.
  if (auto constant = operand->getDefiningInst()->dyn_cast<ConstantOp>()) {
    return vectorizeConstant(
-        inst, *constant,
+        inst, constant,
        VectorType::get(state->strategy->vectorSizes, operand->getType()));
  }
  // 4. currently non-vectorizable.
@ -1178,8 +1178,8 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
      clonedLoop->erase();
      return mlir::success();
    }
-    OpPointer<AffineForOp> loop;
-    OpPointer<AffineForOp> clonedLoop;
+    AffineForOp loop;
+    AffineForOp clonedLoop;
  } guard{loop, clonedLoop};

  //////////////////////////////////////////////////////////////////////////////
--- a/mlir/test/mlir-tblgen/op-decl.td
+++ b/mlir/test/mlir-tblgen/op-decl.td
@ -30,6 +30,7 @@ def NS_AOp : Op<"a_op", [NoSideEffect]> {

 // CHECK: class AOp : public Op<AOp, OpTrait::AtLeastNResults<1>::Impl, OpTrait::HasNoSideEffect, OpTrait::AtLeastNOperands<1>::Impl> {
 // CHECK: public:
+// CHECK:   using Op::Op;
 // CHECK:   static StringRef getOperationName();
 // CHECK:   Value *a();
 // CHECK:   Instruction::operand_range b();
@ -45,7 +46,4 @@ def NS_AOp : Op<"a_op", [NoSideEffect]> {
 // CHECK:   static void getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context);
 // CHECK:   LogicalResult constantFold(ArrayRef<Attribute> operands, SmallVectorImpl<Attribute> &results, MLIRContext *context);
 // CHECK:   bool fold(SmallVectorImpl<Value *> &results);
-// CHECK: private:
-// CHECK:   friend class ::mlir::Instruction;
-// CHECK:   explicit AOp(Instruction *state) : Op(state) {}
 // CHECK: };
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@ -315,14 +315,12 @@ void OpClass::writeDeclTo(raw_ostream &os) const {
  for (const auto &trait : traits)
    os << ", " << trait;
  os << "> {\npublic:\n";
+  os << "  using Op::Op;\n";
  for (const auto &method : methods) {
    method.writeDeclTo(os);
    os << "\n";
  }
-  os << "\nprivate:\n"
-     << "  friend class ::mlir::Instruction;\n";
-  os << "  explicit " << className << "(Instruction *state) : Op(state) {}\n"
-     << "};";
+  os << "};";
 }

 void OpClass::writeDefTo(raw_ostream &os) const {