NFC: Rename the 'for' operation in the AffineOps dialect to 'affine.for'. The is the second step to adding a namespace to the AffineOps dialect.

PiperOrigin-RevId: 232717775
2019-02-06 11:58:03 -08:00 · 2019-02-06 11:58:03 -08:00 · 90d10b4e00
parent 905d84851d
commit 90d10b4e00
62 changed files with 1055 additions and 1049 deletions
--- a/mlir/g3doc/Dialects/Affine.md
+++ b/mlir/g3doc/Dialects/Affine.md
@ -15,7 +15,7 @@ loops and if instructions), the result of a
 [`affine.apply` operation](#'affine.apply'-operation) that recursively takes as
 arguments any symbolic identifiers. Dimensions may be bound not only to anything
 that a symbol is bound to, but also to induction variables of enclosing
-[for instructions](#'for'-operation), and the result of an
+['affine.for' operations](#'affine.for'-operation), and the result of an
 [`affine.apply` operation](#'affine.apply'-operation) (which recursively may use
 other dimensions and symbols).

@ -47,12 +47,12 @@ Example:
 %2 = affine.apply (i)[s0] -> (i+s0) (%42)[%n]
 ```

-#### 'for' operation {#'for'-operation}
+#### 'affine.for' operation {#'affine.for'-operation}

 Syntax:

 ``` {.ebnf}
-operation   ::= `for` ssa-id `=` lower-bound `to` upper-bound
+operation   ::= `affine.for` ssa-id `=` lower-bound `to` upper-bound
                      (`step` integer-literal)? `{` inst* `}`

 lower-bound ::= `max`? affine-map dim-and-symbol-use-list | shorthand-bound
@ -60,17 +60,17 @@ upper-bound ::= `min`? affine-map dim-and-symbol-use-list | shorthand-bound
 shorthand-bound ::= ssa-id | `-`? integer-literal
 ```

-The `for` operation represents an affine loop nest, defining an SSA value for
-its induction variable. This SSA value always has type
+The `affine.for` operation represents an affine loop nest, defining an SSA value
+for its induction variable. This SSA value always has type
 [`index`](LangRef.md#index-type), which is the size of the machine word.

-The `for` operation executes its body a number of times iterating from a lower
-bound to an upper bound by a stride. The stride, represented by `step`, is a
-positive constant integer which defaults to "1" if not present. The lower and
+The `affine.for` operation executes its body a number of times iterating from a
+lower bound to an upper bound by a stride. The stride, represented by `step`, is
+a positive constant integer which defaults to "1" if not present. The lower and
 upper bounds specify a half-open range: the range includes the lower bound but
 does not include the upper bound.

-The lower and upper bounds of a `for` operation are represented as an
+The lower and upper bounds of a `affine.for` operation are represented as an
 application of an affine mapping to a list of SSA values passed to the map. The
 [same restrictions](#restrictions-on-dimensions-and-symbols) hold for these SSA
 values as for all bindings of SSA values to dimensions and symbols.
@ -94,8 +94,8 @@ Example showing reverse iteration of the inner loop:

 func @simple_example(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
  %N = dim %A, 0 : memref<?x?xf32>
-  for %i = 0 to %N step 1 {
-    for %j = 0 to %N {   // implicitly steps by 1
+  affine.for %i = 0 to %N step 1 {
+    affine.for %j = 0 to %N {   // implicitly steps by 1
      %0 = affine.apply #map57(%j)[%N]
      %tmp = call @F1(%A, %i, %0) : (memref<?x?xf32>, index, index)->(f32)
      call @F2(%tmp, %B, %i, %0) : (f32, memref<?x?xf32>, index, index)->()
@ -130,8 +130,8 @@ Example:
 #set = (d0, d1)[s0]: (d0 - 10 >= 0, s0 - d0 - 9 >= 0,
                      d1 - 10 >= 0, s0 - d1 - 9 >= 0)
 func @reduced_domain_example(%A, %X, %N) : (memref<10xi32>, i32, i32) {
-  for %i = 0 to %N {
-     for %j = 0 to %N {
+  affine.for %i = 0 to %N {
+     affine.for %j = 0 to %N {
       %0 = affine.apply #map42(%j)
       %tmp = call @S1(%X, %i, %0)
       if #set(%i, %j)[%N] {
--- a/mlir/g3doc/Dialects/SuperVector.md
+++ b/mlir/g3doc/Dialects/SuperVector.md
@ -22,9 +22,9 @@ Examples:
 // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into vector<32x256xf32> and
 // pad with %f0 to handle the boundary case:
 %f0 = constant 0.0f : f32
-for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 step 256 {
-    for %i2 = 0 to %2 step 32 {
+affine.for %i0 = 0 to %0 {
+  affine.for %i1 = 0 to %1 step 256 {
+    affine.for %i2 = 0 to %2 step 32 {
      %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
           {permutation_map: (d0, d1, d2) -> (d2, d1)} :
           (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -33,8 +33,8 @@ for %i0 = 0 to %0 {
 // Read the slice `%A[%i0, %i1]` (i.e. the element `%A[%i0, %i1]`) into
 // vector<128xf32>. The underlying implementation will require a 1-D vector
 // broadcast:
-for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 {
+affine.for %i0 = 0 to %0 {
+  affine.for %i1 = 0 to %1 {
    %3 = vector_transfer_read %A, %i0, %i1
         {permutation_map: (d0, d1) -> (0)} :
         (memref<?x?xf32>, index, index) -> vector<128xf32>
@ -80,9 +80,9 @@ A notional lowering of vector_transfer_read could generate code resembling:
 // %expr1, %expr2, %expr3, %expr4 defined before this point
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
-for %i = 0 to 3 {
-  for %j = 0 to 4 {
-    for %k = 0 to 5 {
+affine.for %i = 0 to 3 {
+  affine.for %j = 0 to 4 {
+    affine.for %k = 0 to 5 {
      %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
      store %tmp[%i, %j, %k] : vector<3x4x5xf32>
 }}}
@ -101,8 +101,8 @@ lowered code would resemble:
 // %expr1, %expr2, %expr3, %expr4 defined before this point
 %tmp = alloc() : vector<3x4x5xf32>
 %view_in_tmp = "element_type_cast"(%tmp) : memref<1xvector<3x4x5xf32>>
-for %i = 0 to 3 {
-  for %k = 0 to 5 {
+affine.for %i = 0 to 3 {
+  affine.for %k = 0 to 5 {
    %a = load %A[%expr1 + %k, %expr2, %expr3 + %i, %expr4] : memref<?x?x?x?xf32>
    store %tmp[%i, 0, %k] : vector<3x4x5xf32>
 }}
@ -129,10 +129,10 @@ Examples:

 ```mlir {.mlir}
 // write vector<16x32x64xf32> into the slice `%A[%i0, %i1:%i1+32, %i2:%i2+64, %i3:%i3+16]`:
-for %i0 = 0 to %0 {
-  for %i1 = 0 to %1 step 32 {
-    for %i2 = 0 to %2 step 64 {
-      for %i3 = 0 to %3 step 16 {
+affine.for %i0 = 0 to %0 {
+  affine.for %i1 = 0 to %1 step 32 {
+    affine.for %i2 = 0 to %2 step 64 {
+      affine.for %i3 = 0 to %3 step 16 {
        %val = `ssa-value` : vector<16x32x64xf32>
        vector_transfer_write %val, %A, %i0, %i1, %i2, %i3
          {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d2)} :
--- a/mlir/g3doc/LangRef.md
+++ b/mlir/g3doc/LangRef.md
@ -40,7 +40,7 @@ which means that values are defined before use and have scope defined by their
 dominance relations. Operations may produce zero or more results, and each is a
 distinct SSA value with its own type defined by the [type system](#type-system).

-MLIR incorporates polyhedral compiler concepts, including `for` and `if`
+MLIR incorporates polyhedral compiler concepts, including `affine.for` and `if`
 operations defined by the [affine dialect](Dialects/Affine.md), which model
 affine loops and affine conditionals. It also includes affine maps integrated
 into the type system - they are key to the representation of data and
@ -99,10 +99,10 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
  %C = alloc memref<100x50xf32>()

  // Multiplication loop nest.
-  for  %i = 0 to 100 {
-     for %j = 0 to 50 {
+  affine.for %i = 0 to 100 {
+     affine.for %j = 0 to 50 {
        store 0 to %C[%i, %j] : memref<100x50xf32>
-        for %k = 0 to %n {
+        affine.for %k = 0 to %n {
           %a_v  = load %A[%i, %k] : memref<100x?xf32>
           %b_v  = load %B[%k, %j] : memref<?x50xf32>
           %prod = mulf %a_v, %b_v : f32
@ -1434,8 +1434,8 @@ The arity of indices is the rank of the memref (i.e., if the memref loaded from
 is of rank 3, then 3 indices are required for the load following the memref
 identifier).

-In an `if` or `for` body, the indices of a load are restricted to SSA values
-bound to surrounding loop induction variables,
+In an `if` or `affine.for` body, the indices of a load are restricted to SSA
+values bound to surrounding loop induction variables,
 [symbols](#dimensions-and-symbols), results of a
 [`constant` operation](#'constant'-operation), or the result of an
 `affine.apply` operation that can in turn take as arguments all of the
@ -1456,7 +1456,7 @@ Example:

 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in affine `if` and
-`for` instructions) the compiler can follow use-def chains (e.g. through
+`affine.for` instructions) the compiler can follow use-def chains (e.g. through
 [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
 precisely analyze references at compile-time using polyhedral techniques. This
 is possible because of the
@ -1492,7 +1492,7 @@ store %100, %A[%1, 1023] : memref<4x?xf32, #layout, hbm>

 **Context:** The `load` and `store` instructions are specifically crafted to
 fully resolve a reference to an element of a memref, and (in polyhedral `if` and
-`for` instructions) the compiler can follow use-def chains (e.g. through
+`affine.for` instructions) the compiler can follow use-def chains (e.g. through
 [`affine.apply`](Dialects/Affine.md#'affine.apply'-operation) operations) to
 precisely analyze references at compile-time using polyhedral techniques. This
 is possible because of the
--- a/mlir/g3doc/Passes.md
+++ b/mlir/g3doc/Passes.md
@ -39,8 +39,8 @@ These restrictions may be lifted in the future.

 ### Output IR

-Functions with `for` and `if` instructions eliminated.  These functions may
-contain operations from the Standard dialect in addition to those already
+Functions with `affine.for` and `if` instructions eliminated. These functions
+may contain operations from the Standard dialect in addition to those already
 present before the pass.

 ### Invariants
--- a/mlir/g3doc/Rationale.md
+++ b/mlir/g3doc/Rationale.md
@ -150,8 +150,8 @@ func bar(%A : memref<8x?xf32, #lmap>) {
  // dynamically using dim instruction.
  %N = dim %A, 1 : memref<8x?xf32, #lmap>

-  for %i = 0 to 8 {
-    for %j = 0 to %N {
+  affine.for %i = 0 to 8 {
+    affine.for %j = 0 to %N {
      // A[i,j] += 1
      %s1 = load %A [%i, %j] : memref<8x?xf32, #lmap>
      %s2 = add %s1, 1
@ -534,7 +534,7 @@ nested in an outer function that using affine loops.
 func @search(memref<?x?xi32 %A, <?xi32> %S, i32 %key) {
  %ni = dim %A, 0 : memref<?x?xi32>
  // This loop can be parallelized
-  for %i = 0 to %ni {
+  affine.for %i = 0 to %ni {
    call @search_body (%A, %S, %i) : (memref<?x?xi32>, memref<?xi32>, i32)
  }
  return
@ -568,10 +568,10 @@ func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32) {

 As per the [MLIR spec](LangRef.md), the restrictions on dimensions and symbol
 identifiers to be used with the affine.apply instruction only apply to accesses
-inside `for` and `if` instructions. However, an analysis of accesses inside the
-called function (`@search_body`) is necessary to determine if the `%i` loop
-could be parallelized: such function access analysis is calling context
-sensitive.
+inside `affine.for` and `if` instructions. However, an analysis of accesses
+inside the called function (`@search_body`) is necessary to determine if the
+`%i` loop could be parallelized: such function access analysis is calling
+context sensitive.

 ### Non-affine loop bounds {#non-affine-loop-bounds}

@ -590,8 +590,8 @@ for (i=0; i <N; i++)

 ```mlir {.mlir}
 func @outer_nest(%n) : (i32) {
-  for %i = 0 to %n {
-    for %j = 0 to %n {
+  affine.for %i = 0 to %n {
+    affine.for %j = 0 to %n {
      call @inner_nest(%i, %j, %n)
    }
  }
@ -606,8 +606,8 @@ func @inner_nest(%i: i32, %j: i32, %n: i32) {
 }

 func @inner_nest2(%m, %n) -> i32 {
-  for %k = 0 to %m {
-    for %l = 0 to %n {
+  affine.for %k = 0 to %m {
+    affine.for %l = 0 to %n {
      ...
    }
  }
@ -649,13 +649,13 @@ in a dilated convolution.
 func @conv2d(memref<16x1024x1024x3xf32, #lm0, vmem> %input,
             memref<5x5x3x32xf32, #lm0, vmem> %kernel,
             memref<16x512x512x32xf32, #lm0, vmem> %output) {
-  for %b = 0 to %batch {
-    for %oh = 0 to %output_height {
-      for %ow = 0 to %output_width {
-        for %of = 0 to %output_feature {
-          for %kh = 0 to %kernel_height {
-            for %kw = 0 to %kernel_width {
-              for %if = 0 to %input_feature {
+  affine.for %b = 0 to %batch {
+    affine.for %oh = 0 to %output_height {
+      affine.for %ow = 0 to %output_width {
+        affine.for %of = 0 to %output_feature {
+          affine.for %kh = 0 to %kernel_height {
+            affine.for %kw = 0 to %kernel_width {
+              affine.for %if = 0 to %input_feature {
                // Calculate input indices.
                %1_0 = affine.apply #map1_0 (%0#1, %0#2, %0#4, %0#5)
                  [%h_stride, %w_stride, %h_kernel_dilation, %w_kernel_dilation,
@ -899,14 +899,14 @@ func @dma_hbm_to_vmem(memref<1024 x f32, #layout_map0, hbm> %a,
    representation. 2(b) requires no change, but impacts how cost models look at
    index and layout maps.

-### `if` and `for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}
+### `if` and `affine.for` Extensions for "Escaping Scalars" {#extensions-for-"escaping-scalars"}

 We considered providing a representation for SSA values that are live out of
-`if/else` conditional bodies and loop carried in `for` loops. We ultimately
-abandoned this approach due to its complexity. In the current design of MLIR,
-scalar variables cannot escape for loops or if instructions. In situations,
-where escaping is necessary, we use zero-dimensional tensors and memrefs instead
-of scalars.
+`if/else` conditional bodies and loop carried in `affine.for` loops. We
+ultimately abandoned this approach due to its complexity. In the current design
+of MLIR, scalar variables cannot escape for loops or if instructions. In
+situations, where escaping is necessary, we use zero-dimensional tensors and
+memrefs instead of scalars.

 **TODO**: This whole section is obsolete and should be updated to use block
 arguments and a yield like terminator in for/if instructions.
@ -919,7 +919,7 @@ Syntax:

 ``` {.ebnf}
 [<out-var-list> =]
-for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
+affine.for %<index-variable-name> = <lower-bound> ... <upper-bound> step <step>
   [with <in-var-list>] { <loop-instruction-list> }
 ```

@ -934,7 +934,7 @@ Example:
 // Return sum of elements in 1-dimensional mref A
 func int32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
   %init = 0
-   %result = for %i = 0 to N with %tmp(%init) {
+   %result = affine.for %i = 0 to N with %tmp(%init) {
      %value = load %A[%i]
      %sum = %value + %tmp
      yield %sum
@ -964,7 +964,7 @@ Example:
 // Compute sum of half of the array
 func int32 @sum_half(%A, %N) {
   %s0 = 0
-   %s1 = for %i = 1 ... N step 1 with %s2 (%s0) {
+   %s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
       %s3 = if (%i >= %N / 2) {
          %v0 = load %A[%i]
          %s4 = %s2 + %v0
--- a/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
+++ b/mlir/g3doc/RationaleSimplifiedPolyhedralForm.md
@ -184,8 +184,8 @@ Our simple example above would be represented as:

 ```mlir
  mlfunc @simple_example(... %N) {
-    for %i = 0 ... %N step 1 {
-      for %j = 0 ... %N step 1 {
+    affine.for %i = 0 ... %N step 1 {
+      affine.for %j = 0 ... %N step 1 {
        // identity noop in this case, but can exist in general.
        %0,%1 = affine.apply #57(%i, %j)

@ -203,8 +203,8 @@ The example with the reduced domain would be represented with an if instruction:

 ```mlir
  mlfunc @reduced_domain_example(... %N) {
-    for %i = 0 ... %N step 1 {
-      for %j = 0 ... %N step 1 {
+    affine.for %i = 0 ... %N step 1 {
+      affine.for %j = 0 ... %N step 1 {
        // identity noop in this case, but can exist in general.
        %0,%1 = affinecall #57(%i, %j)

@ -233,8 +233,8 @@ that transformations call into):

 ```mlir
 mlfunc @skewed_domain_example(... %N) {
-  for %t1 = 0 ... 2*N-2 step 1 {
-    for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
+  affine.for %t1 = 0 ... 2*N-2 step 1 {
+    affine.for %t2 = max(0, t1-N+1) ... min(N, t1) step 1 {
      (%i, %j) = (%t1-%t2, %t2)
      ...
    }
@ -373,7 +373,7 @@ mlfunc's (if we support them) will also have to have domains.
 ### Lack of redundancy in IR

 The traditional form has multiple encodings for the same sorts of behavior: you
-end up having bits on `for` loops to specify whether codegen should use
+end up having bits on `affine.for` loops to specify whether codegen should use
 "atomic/separate" policies, unroll loops, etc. Instructions can be split or can
 generate multiple copies of their instruction because of overlapping domains,
 etc.
--- a/mlir/include/mlir/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/AffineOps/AffineOps.h
@ -90,15 +90,15 @@ private:
  explicit AffineApplyOp(const Instruction *state) : Op(state) {}
 };

-/// The "for" instruction represents an affine loop nest, defining an SSA value
-/// for its induction variable. The induction variable is represented as a
+/// The "affine.for" instruction represents an affine loop nest, defining an SSA
+/// value for its induction variable. The induction variable is represented as a
 /// BlockArgument to the entry block of the body. The body and induction
-/// variable can be created automatically for new "for" ops with 'createBody'.
-/// This SSA value always has type index, which is the size of the machine word.
-/// The stride, represented by step, is a positive constant integer which
-/// defaults to "1" if not present. The lower and upper bounds specify a
-/// half-open range: the range includes the lower bound but does not include the
-/// upper bound.
+/// variable can be created automatically for new "affine.for" ops with
+/// 'createBody'. This SSA value always has type index, which is the size of the
+/// machine word. The stride, represented by step, is a positive constant
+/// integer which defaults to "1" if not present. The lower and upper bounds
+/// specify a half-open range: the range includes the lower bound but does not
+/// include the upper bound.
 ///
 /// The lower and upper bounds of a for operation are represented as an
 /// application of an affine mapping to a list of SSA values passed to the map.
@ -110,7 +110,7 @@ private:
 ///
 /// Example:
 ///
-///   for %i = 1 to 10 {
+///   affine.for %i = 1 to 10 {
 ///     ...
 ///   }
 ///
@ -131,7 +131,7 @@ public:
  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
                                          MLIRContext *context);

-  static StringRef getOperationName() { return "for"; }
+  static StringRef getOperationName() { return "affine.for"; }
  static StringRef getStepAttrName() { return "step"; }
  static StringRef getLowerBoundAttrName() { return "lower_bound"; }
  static StringRef getUpperBoundAttrName() { return "upper_bound"; }
@ -253,15 +253,15 @@ ConstOpPointer<AffineForOp> getForInductionVarOwner(const Value *val);
 void extractForInductionVars(ArrayRef<OpPointer<AffineForOp>> forInsts,
                             SmallVectorImpl<Value *> *ivs);

-/// Adds constraints (lower and upper bounds) for the specified 'for'
+/// Adds constraints (lower and upper bounds) for the specified 'affine.for'
 /// instruction's Value using IR information stored in its bound maps. The
 /// right identifier is first looked up using forOp's Value. Returns
 /// false for the yet unimplemented/unsupported cases, and true if the
 /// information is successfully added. Asserts if the Value corresponding to
-/// the 'for' instruction isn't found in the constraint system. Any new
-/// identifiers that are found in the bound operands of the 'for' instruction
-/// are added as trailing identifiers (either dimensional or symbolic
-/// depending on whether the operand is a valid ML Function symbol).
+/// the 'affine.for' instruction isn't found in the constraint system. Any new
+/// identifiers that are found in the bound operands of the 'affine.for'
+/// instruction are added as trailing identifiers (either dimensional or
+/// symbolic depending on whether the operand is a valid ML Function symbol).
 //  TODO(bondhugula): add support for non-unit strides.
 bool addAffineForOpDomain(ConstOpPointer<AffineForOp> forOp,
                          FlatAffineConstraints *constraints);
@ -297,10 +297,10 @@ public:
  operand_range getOperands() const { return {operand_begin(), operand_end()}; }

 private:
-  // 'for' instruction that contains this bound.
+  // 'affine.for' instruction that contains this bound.
  ConstOpPointer<AffineForOp> inst;
  // Start and end positions of this affine bound operands in the list of
-  // the containing 'for' instruction operands.
+  // the containing 'affine.for' instruction operands.
  unsigned opStart, opEnd;
  // Affine map for this bound.
  AffineMap map;
--- a/mlir/include/mlir/Analysis/Utils.h
+++ b/mlir/include/mlir/Analysis/Utils.h
@ -52,7 +52,7 @@ bool dominates(const Instruction &a, const Instruction &b);
 bool properlyDominates(const Instruction &a, const Instruction &b);

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
 //  TODO(bondhugula): handle 'if' inst's.
 void getLoopIVs(const Instruction &inst,
                SmallVectorImpl<OpPointer<AffineForOp>> *loops);
@ -105,8 +105,8 @@ insertBackwardComputationSlice(Instruction *srcOpInst, Instruction *dstOpInst,
 /// surrounding such op's.
 // For example, the memref region for a load operation at loop depth = 1:
 //
-//    for %i = 0 to 32 {
-//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    affine.for %i = 0 to 32 {
+//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@ -139,8 +139,8 @@ struct MemRefRegion {
  ///  For example, the memref region for this operation at loopDepth = 1 will
  ///  be:
  ///
-  ///    for %i = 0 to 32 {
-  ///      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+  ///    affine.for %i = 0 to 32 {
+  ///      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
  ///        load %A[%ii]
  ///      }
  ///    }
--- a/mlir/include/mlir/Analysis/VectorAnalysis.h
+++ b/mlir/include/mlir/Analysis/VectorAnalysis.h
@ -76,9 +76,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// The following MLIR snippet:
 ///
 /// ```mlir
-///    for %i3 = 0 to %0 {
-///      for %i4 = 0 to %1 {
-///        for %i5 = 0 to %2 {
+///    affine.for %i3 = 0 to %0 {
+///      affine.for %i4 = 0 to %1 {
+///        affine.for %i5 = 0 to %2 {
 ///          %a5 = load %arg0[%i4, %i5, %i3] : memref<?x?x?xf32>
 ///    }}}
 /// ```
@ -86,9 +86,9 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0, d1, d2) -> (d2, d1)} into:
 ///
 /// ```mlir
-///    for %i3 = 0 to %0 step 32 {
-///      for %i4 = 0 to %1 {
-///        for %i5 = 0 to %2 step 256 {
+///    affine.for %i3 = 0 to %0 step 32 {
+///      affine.for %i4 = 0 to %1 {
+///        affine.for %i5 = 0 to %2 step 256 {
 ///          %4 = vector_transfer_read %arg0, %i4, %i5, %i3
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index) -> vector<32x256xf32>
@ -103,7 +103,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 ///
 /// ```mlir
 ///    %cst0 = constant 0 : index
-///    for %i0 = 0 to %0 {
+///    affine.for %i0 = 0 to %0 {
 ///      %a0 = load %arg0[%cst0, %cst0] : memref<?x?xf32>
 ///    }
 /// ```
@ -111,7 +111,7 @@ shapeRatio(VectorType superVectorType, VectorType subVectorType);
 /// may vectorize with {permutation_map: (d0) -> (0)} into:
 ///
 /// ```mlir
-///    for %i0 = 0 to %0 step 128 {
+///    affine.for %i0 = 0 to %0 step 128 {
 ///      %3 = vector_transfer_read %arg0, %c0_0, %c0_0
 ///           {permutation_map: (d0, d1) -> (0)} :
 ///           (memref<?x?xf32>, index, index) -> vector<128xf32>
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@ -83,9 +83,10 @@ AffineMap getUnrolledLoopUpperBound(ConstOpPointer<AffineForOp> forOp,
                                    unsigned unrollFactor,
                                    FuncBuilder *builder);

-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied.
 UtilResult instBodySkew(OpPointer<AffineForOp> forOp, ArrayRef<uint64_t> shifts,
                        bool unrollPrologueEpilogue = false);

--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@ -94,14 +94,14 @@ Instruction *createComposedAffineApplyOp(FuncBuilder *builder, Location loc,
 ///
 /// Before
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %v = "compute"(%idx, ...)
 ///
 /// After
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   send %A[%idx], ...
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
--- a/mlir/lib/AffineOps/AffineOps.cpp
+++ b/mlir/lib/AffineOps/AffineOps.cpp
@ -716,7 +716,7 @@ static void printBound(AffineBound bound, const char *prefix, OpAsmPrinter *p) {
 }

 void AffineForOp::print(OpAsmPrinter *p) const {
-  *p << "for ";
+  *p << "affine.for ";
  p->printOperand(getBody()->getArgument(0));
  *p << " = ";
  printBound(getLowerBound(), "max", p);
--- a/mlir/lib/Analysis/AffineAnalysis.cpp
+++ b/mlir/lib/Analysis/AffineAnalysis.cpp
@ -756,8 +756,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 // For example, given the following MLIR code with with "source" and
 // "destination" accesses to the same memref labled, and symbols %M, %N, %K:
 //
-//   for %i0 = 0 to 100 {
-//     for %i1 = 0 to 50 {
+//   affine.for %i0 = 0 to 100 {
+//     affine.for %i1 = 0 to 50 {
 //       %a0 = affine.apply
 //         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
 //       // Source memref access.
@ -765,8 +765,8 @@ void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
 //     }
 //   }
 //
-//   for %i2 = 0 to 100 {
-//     for %i3 = 0 to 50 {
+//   affine.for %i2 = 0 to 100 {
+//     affine.for %i3 = 0 to 50 {
 //       %a1 = affine.apply
 //         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
 //       // Destination memref access.
--- a/mlir/lib/Analysis/Utils.cpp
+++ b/mlir/lib/Analysis/Utils.cpp
@ -36,13 +36,13 @@
 using namespace mlir;

 /// Populates 'loops' with IVs of the loops surrounding 'inst' ordered from
-/// the outermost 'for' instruction to the innermost one.
+/// the outermost 'affine.for' instruction to the innermost one.
 void mlir::getLoopIVs(const Instruction &inst,
                      SmallVectorImpl<OpPointer<AffineForOp>> *loops) {
  auto *currInst = inst.getParentInst();
  OpPointer<AffineForOp> currAffineForOp;
-  // Traverse up the hierarchy collecing all 'for' instruction while skipping
-  // over 'if' instructions.
+  // Traverse up the hierarchy collecing all 'affine.for' instruction while
+  // skipping over 'if' instructions.
  while (currInst && ((currAffineForOp = currInst->dyn_cast<AffineForOp>()) ||
                      currInst->isa<AffineIfOp>())) {
    if (currAffineForOp)
@ -111,8 +111,8 @@ bool MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
 //  For example, the memref region for this load operation at loopDepth = 1 will
 //  be as below:
 //
-//    for %i = 0 to 32 {
-//      for %ii = %i to (d0) -> (d0 + 8) (%i) {
+//    affine.for %i = 0 to 32 {
+//      affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
 //        load %A[%ii]
 //      }
 //    }
@ -614,7 +614,7 @@ Optional<int64_t> mlir::getMemoryFootprintBytes(const Block &block,
                                                int memorySpace) {
  std::vector<std::unique_ptr<MemRefRegion>> regions;

-  // Walk this 'for' instruction to gather all memory regions.
+  // Walk this 'affine.for' instruction to gather all memory regions.
  bool error = false;
  const_cast<Block *>(&block)->walk([&](Instruction *opInst) {
    if (!opInst->isa<LoadOp>() && !opInst->isa<StoreOp>()) {
--- a/mlir/lib/IR/Block.cpp
+++ b/mlir/lib/IR/Block.cpp
@ -189,7 +189,7 @@ unsigned Block::getNumSuccessors() const {
    return terminator->getNumSuccessors();
  }
  assert(getParent() && "top-level block with no terminator");
-  // Blocks inside 'for'/'if' instructions don't have successors.
+  // Blocks inside 'affine.for'/'if' instructions don't have successors.
  return 0;
 }

--- a/mlir/lib/Transforms/DmaGeneration.cpp
+++ b/mlir/lib/Transforms/DmaGeneration.cpp
@ -338,7 +338,8 @@ bool DmaGeneration::generateDma(const MemRefRegion &region, Block *block,
    auto fastMemRefType = top.getMemRefType(
        fastBufferShape, memRefType.getElementType(), {}, fastMemorySpace);

-    // Create the fast memory space buffer just before the 'for' instruction.
+    // Create the fast memory space buffer just before the 'affine.for'
+    // instruction.
    fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType)->getResult();
    // Record it.
    fastBufferMap[memref] = fastMemRef;
@ -456,7 +457,7 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
  // approach is conservative in some cases at the moment, we do a check later
  // and report an error with location info.
  // TODO(bondhugula): An 'if' instruction is being treated similar to an
-  // operation instruction. 'if''s could have 'for's in them; treat them
+  // operation instruction. 'if''s could have 'affine.for's in them; treat them
  // separately.

  // Get to the first load, store, or for op.
@ -470,9 +471,9 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
    if (auto forOp = it->dyn_cast<AffineForOp>()) {
      // We'll assume for now that loops with steps are tiled loops, and so DMAs
      // are not performed for that depth, but only further inside.
-      // If the memory footprint of the 'for' loop is higher than fast memory
-      // capacity (when provided), we recurse to DMA at an inner level until
-      // we find a depth at which footprint fits in the capacity. If the
+      // If the memory footprint of the 'affine.for' loop is higher than fast
+      // memory capacity (when provided), we recurse to DMA at an inner level
+      // until we find a depth at which footprint fits in the capacity. If the
      // footprint can't be calcuated, we assume for now it fits.

      // Returns true if the footprint is known to exceed capacity.
@ -489,13 +490,13 @@ bool DmaGeneration::runOnBlock(Block *block, uint64_t consumedCapacityBytes) {
        consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);
        // Recurse onto the body of this loop.
        runOnBlock(forOp->getBody(), consumedCapacityBytes);
-        // The next region starts right after the 'for' instruction.
+        // The next region starts right after the 'affine.for' instruction.
        curBegin = std::next(it);
      } else {
        // We have enough capacity, i.e., DMAs will be computed for the portion
-        // of the block until 'it', and for the 'for' loop. For the latter, they
-        // are placed just before this loop (for incoming DMAs) and right after
-        // (for outgoing ones).
+        // of the block until 'it', and for the 'affine.for' loop. For the
+        // latter, they are placed just before this loop (for incoming DMAs) and
+        // right after (for outgoing ones).
        consumedCapacityBytes += runOnBlock(/*begin=*/curBegin, /*end=*/it);

        // Inner loop DMAs have their own scope - we don't thus update consumed
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@ -510,7 +510,8 @@ bool MemRefDependenceGraph::init(Function *f) {
      // all loads and store accesses it contains.
      LoopNestStateCollector collector;
      collector.collect(&inst);
-      // Return false if a non 'for' region was found (not currently supported).
+      // Return false if a non 'affine.for' region was found (not currently
+      // supported).
      if (collector.hasNonForRegion)
        return false;
      Node node(nextNodeId++, &inst);
--- a/mlir/lib/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Transforms/LoopTiling.cpp
@ -231,7 +231,8 @@ UtilResult mlir::tileCodeGen(MutableArrayRef<OpPointer<AffineForOp>> band,
 static void
 getTileableBands(Function *f,
                 std::vector<SmallVector<OpPointer<AffineForOp>, 6>> *bands) {
-  // Get maximal perfect nest of 'for' insts starting from root (inclusive).
+  // Get maximal perfect nest of 'affine.for' insts starting from root
+  // (inclusive).
  auto getMaximalPerfectLoopNest = [&](OpPointer<AffineForOp> root) {
    SmallVector<OpPointer<AffineForOp>, 6> band;
    OpPointer<AffineForOp> currInst = root;
--- a/mlir/lib/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Transforms/LoopUnroll.cpp
@ -164,7 +164,7 @@ PassResult LoopUnroll::runOnFunction(Function *f) {
  return success();
 }

-/// Unrolls a 'for' inst. Returns true if the loop was unrolled, false
+/// Unrolls a 'affine.for' inst. Returns true if the loop was unrolled, false
 /// otherwise. The default unroll factor is 4.
 bool LoopUnroll::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
  // Use the function callback if one was provided.
--- a/mlir/lib/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Transforms/LoopUnrollAndJam.cpp
@ -105,7 +105,7 @@ PassResult LoopUnrollAndJam::runOnFunction(Function *f) {
  return success();
 }

-/// Unroll and jam a 'for' inst. Default unroll jam factor is
+/// Unroll and jam a 'affine.for' inst. Default unroll jam factor is
 /// kDefaultUnrollJamFactor. Return false if nothing was done.
 bool LoopUnrollAndJam::runOnAffineForOp(OpPointer<AffineForOp> forOp) {
  // Unroll and jam by the factor that was passed if any.
--- a/mlir/lib/Transforms/LowerAffine.cpp
+++ b/mlir/lib/Transforms/LowerAffine.cpp
@ -283,7 +283,8 @@ static Value *buildMinMaxReductionSeq(Location loc, CmpIPredicate predicate,
  return value;
 }

-// Convert a "for" loop to a flow of blocks.  Return `false` on success.
+// Convert a "affine.for" loop to a flow of blocks.  Return `false` on
+// success.
 //
 // Create an SESE region for the loop (including its body) and append it to the
 // end of the current region.  The loop region consists of the initialization
@ -330,8 +331,9 @@ bool LowerAffinePass::lowerAffineFor(OpPointer<AffineForOp> forOp) {
  auto loc = forOp->getLoc();
  auto *forInst = forOp->getInstruction();

-  // Start by splitting the block containing the 'for' into two parts.  The part
-  // before will get the init code, the part after will be the end point.
+  // Start by splitting the block containing the 'affine.for' into two parts.
+  // The part before will get the init code, the part after will be the end
+  // point.
  auto *initBlock = forInst->getBlock();
  auto *endBlock = initBlock->splitBlock(forInst);

--- a/mlir/lib/Transforms/LowerVectorTransfers.cpp
+++ b/mlir/lib/Transforms/LowerVectorTransfers.cpp
@ -126,9 +126,9 @@ private:
 ///    // Read the slice `%A[%i0, %i1:%i1+256, %i2:%i2+32]` into
 ///    // vector<32x256xf32> and pad with %f0 to handle the boundary case:
 ///    %f0 = constant 0.0f : f32
-///    for %i0 = 0 to %0 {
-///      for %i1 = 0 to %1 step 256 {
-///        for %i2 = 0 to %2 step 32 {
+///    affine.for %i0 = 0 to %0 {
+///      affine.for %i1 = 0 to %1 step 256 {
+///        affine.for %i2 = 0 to %2 step 32 {
 ///          %v = vector_transfer_read %A, %i0, %i1, %i2, %f0
 ///               {permutation_map: (d0, d1, d2) -> (d2, d1)} :
 ///               (memref<?x?x?xf32>, index, index, f32) -> vector<32x256xf32>
@ -139,8 +139,8 @@ private:
 /// MLIR resembling:
 ///
 /// ```mlir
-///    for %d1 = 0 to 256 {
-///      for %d2 = 0 to 32 {
+///    affine.for %d1 = 0 to 256 {
+///      affine.for %d2 = 0 to 32 {
 ///        %s = %A[%i0, %i1 + %d1, %i2 + %d2] : f32
 ///        %tmp[%d2, %d1] = %s
 ///      }
--- a/mlir/lib/Transforms/MaterializeVectors.cpp
+++ b/mlir/lib/Transforms/MaterializeVectors.cpp
@ -101,10 +101,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> :
-///      vector<4x4x4xf32> for %i0 = 0 to %M step 4 {
-///        for %i1 = 0 to %N step 4 {
-///          for %i2 = 0 to %O {
-///            for %i3 = 0 to %P step 4 {
+///      vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
+///        affine.for %i1 = 0 to %N step 4 {
+///          affine.for %i2 = 0 to %O {
+///            affine.for %i3 = 0 to %P step 4 {
 ///              vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3
 ///                {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
 ///                 vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>,
@ -120,10 +120,10 @@
 ///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
 ///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
 ///      %f1 = constant splat<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-///       for %i0 = 0 to %arg0 step 4 {
-///         for %i1 = 0 to %arg1 step 4 {
-///           for %i2 = 0 to %arg2 {
-///             for %i3 = 0 to %arg3 step 4 {
+///       affine.for %i0 = 0 to %arg0 step 4 {
+///         affine.for %i1 = 0 to %arg1 step 4 {
+///           affine.for %i2 = 0 to %arg2 {
+///             affine.for %i3 = 0 to %arg3 step 4 {
 ///               %1 = affine.apply (d0, d1, d2, d3) -> (d0, d1, d2, d3)
 ///                    (%i0, %i1, %i2, %i3)
 ///               vector_transfer_write f1, %0, %1#0, %1#1, %1#2, %1#3
@ -293,10 +293,10 @@ static Value *substitute(Value *v, VectorType hwVectorType,
 /// super-vectorization has been applied:
 ///
 /// ```mlir
-/// for %i0 = 0 to %M {
-///   for %i1 = 0 to %N step 3 {
-///     for %i2 = 0 to %O {
-///       for %i3 = 0 to %P step 32 {
+/// affine.for %i0 = 0 to %M {
+///   affine.for %i1 = 0 to %N step 3 {
+///     affine.for %i2 = 0 to %O {
+///       affine.for %i3 = 0 to %P step 32 {
 ///         %r = vector_transfer_read(%A, map(%i..)#0, map(%i..)#1, map(%i..)#2)
 ///                                   -> vector<3x32xf32>
 ///         ...
--- a/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
+++ b/mlir/lib/Transforms/MemRefDataFlowOpt.cpp
@ -19,7 +19,7 @@
 // potentially getting rid of intermediate memref's entirely.
 // TODO(mlir-team): In the future, similar techniques could be used to eliminate
 // dead memref store's and perform more complex forwarding when support for
-// SSA scalars live out of 'for'/'if' statements is available.
+// SSA scalars live out of 'affine.for'/'if' statements is available.
 //===----------------------------------------------------------------------===//

 #include "mlir/Analysis/AffineAnalysis.h"
@ -55,7 +55,7 @@ namespace {
 //
 // (* A dependence being satisfied at a block: a dependence that is satisfied by
 // virtue of the destination instruction appearing textually / lexically after
-// the source instruction within the body of a 'for' instruction; thus, a
+// the source instruction within the body of a 'affine.for' instruction; thus, a
 // dependence is always either satisfied by a loop or by a block).
 //
 // The above conditions are simple to check, sufficient, and powerful for most
@ -145,8 +145,8 @@ void MemRefDataFlowOpt::forwardStoreToLoad(OpPointer<LoadOp> loadOp) {
      // Check if this store is a candidate for forwarding; we only forward if
      // the dependence from the store is carried by the *body* of innermost
      // common surrounding loop. As an example this filters out cases like:
-      // for %i0
-      //   for %i1
+      // affine.for %i0
+      //   affine.for %i1
      //     %idx = affine.apply (d0) -> (d0 + 1) (%i0)
      //     store %A[%idx]
      //     load %A[%i0]
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@ -71,11 +71,11 @@ static unsigned getTagMemRefPos(const Instruction &dmaInst) {
  return 0;
 }

-/// Doubles the buffer of the supplied memref on the specified 'for' instruction
-/// by adding a leading dimension of size two to the memref. Replaces all uses
-/// of the old memref by the new one while indexing the newly added dimension by
-/// the loop IV of the specified 'for' instruction modulo 2. Returns false if
-/// such a replacement cannot be performed.
+/// Doubles the buffer of the supplied memref on the specified 'affine.for'
+/// instruction by adding a leading dimension of size two to the memref.
+/// Replaces all uses of the old memref by the new one while indexing the newly
+/// added dimension by the loop IV of the specified 'affine.for' instruction
+/// modulo 2. Returns false if such a replacement cannot be performed.
 static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
  auto *forBody = forOp->getBody();
  FuncBuilder bInner(forBody, forBody->begin());
@ -108,7 +108,7 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
                                                   dynamicDimCount++));
  }

-  // Create and place the alloc right before the 'for' instruction.
+  // Create and place the alloc right before the 'affine.for' instruction.
  // TODO(mlir-team): we are assuming scoped allocation here, and aren't
  // inserting a dealloc -- this isn't the right thing.
  Value *newMemRef =
@ -137,9 +137,9 @@ static bool doubleBuffer(Value *oldMemRef, OpPointer<AffineForOp> forOp) {
 /// Returns success if the IR is in a valid state.
 PassResult PipelineDataTransfer::runOnFunction(Function *f) {
  // Do a post order walk so that inner loop DMAs are processed first. This is
-  // necessary since 'for' instructions nested within would otherwise become
-  // invalid (erased) when the outer loop is pipelined (the pipelined one gets
-  // deleted and replaced by a prologue, a new steady-state loop and an
+  // necessary since 'affine.for' instructions nested within would otherwise
+  // become invalid (erased) when the outer loop is pipelined (the pipelined one
+  // gets deleted and replaced by a prologue, a new steady-state loop and an
  // epilogue).
  forOps.clear();
  f->walkPostOrder<AffineForOp>(
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@ -138,8 +138,8 @@ void mlir::promoteSingleIterationLoops(Function *f) {
      [](OpPointer<AffineForOp> forOp) { promoteIfSingleIteration(forOp); });
 }

-/// Generates a 'for' inst with the specified lower and upper bounds while
-/// generating the right IV remappings for the shifted instructions. The
+/// Generates a 'affine.for' inst with the specified lower and upper bounds
+/// while generating the right IV remappings for the shifted instructions. The
 /// instruction blocks that go into the loop are specified in instGroupQueue
 /// starting from the specified offset, and in that order; the first element of
 /// the pair specifies the shift applied to that group of instructions; note
@ -194,10 +194,10 @@ generateLoop(AffineMap lbMap, AffineMap ubMap,
  return loopChunk;
 }

-/// Skew the instructions in the body of a 'for' instruction with the specified
-/// instruction-wise shifts. The shifts are with respect to the original
-/// execution order, and are multiplied by the loop 'step' before being applied.
-/// A shift of zero for each instruction will lead to no change.
+/// Skew the instructions in the body of a 'affine.for' instruction with the
+/// specified instruction-wise shifts. The shifts are with respect to the
+/// original execution order, and are multiplied by the loop 'step' before being
+/// applied. A shift of zero for each instruction will lead to no change.
 // The skewing of instructions with respect to one another can be used for
 // example to allow overlap of asynchronous operations (such as DMA
 // communication) with computation, or just relative shifting of instructions
@ -246,7 +246,7 @@ UtilResult mlir::instBodySkew(OpPointer<AffineForOp> forOp,

  // An array of instruction groups sorted by shift amount; each group has all
  // instructions with the same shift in the order in which they appear in the
-  // body of the 'for' inst.
+  // body of the 'affine.for' inst.
  std::vector<std::vector<Instruction *>> sortedInstGroups(maxShift + 1);
  unsigned pos = 0;
  for (auto &inst : *forOp->getBody()) {
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ b/mlir/lib/Transforms/Utils/Utils.cpp
@ -194,14 +194,14 @@ bool mlir::replaceAllMemRefUsesWith(const Value *oldMemRef, Value *newMemRef,
 ///
 /// Before
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   "compute"(%idx)
 ///
 /// After
 ///
-/// for %i = 0 to #map(%N)
+/// affine.for %i = 0 to #map(%N)
 ///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
 ///   "send"(%idx, %A, ...)
 ///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
--- a/mlir/lib/Transforms/Vectorize.cpp
+++ b/mlir/lib/Transforms/Vectorize.cpp
@ -113,7 +113,7 @@ using namespace mlir;
 ///
 /// At a high level, a vectorized load in a loop will resemble:
 /// ```mlir
-///   for %i = ? to ? step ? {
+///   affine.for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@ -309,7 +309,7 @@ using namespace mlir;
 /// ```mlir
 /// mlfunc @fill(%A : memref<128xf32>) -> () {
 ///   %f1 = constant 1.0 : f32
-///   for %i0 = 0 to 32 {
+///   affine.for %i0 = 0 to 32 {
 ///     store %f1, %A[%i0] : memref<128xf32, 0>
 ///   }
 ///   return
@ -322,7 +322,7 @@ using namespace mlir;
 /// is still subject to exploratory tradeoffs. In particular, say we want to
 /// vectorize by a factor 128, we want to transform the following input:
 /// ```mlir
-///   for %i = %M to %N {
+///   affine.for %i = %M to %N {
 ///     %a = load A[%i] : memref<?xf32>
 ///   }
 /// ```
@ -331,8 +331,8 @@ using namespace mlir;
 /// memory promotion etc) say after stripmining (and potentially unrolling in
 /// the case of LLVM's SLP vectorizer):
 /// ```mlir
-///   for %i = floor(%M, 128) to ceil(%N, 128) {
-///     for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
+///   affine.for %i = floor(%M, 128) to ceil(%N, 128) {
+///     affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
 ///       %a = load A[%ii] : memref<?xf32>
 ///     }
 ///   }
@ -341,7 +341,7 @@ using namespace mlir;
 /// Instead, we seek to vectorize early and freeze vector types before
 /// scheduling, so we want to generate a pattern that resembles:
 /// ```mlir
-///   for %i = ? to ? step ? {
+///   affine.for %i = ? to ? step ? {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@ -362,7 +362,7 @@ using namespace mlir;
 /// For the simple strawman example above, vectorizing for a 1-D vector
 /// abstraction of size 128 returns code similar to:
 /// ```mlir
-///   for %i = %M to %N step 128 {
+///   affine.for %i = %M to %N step 128 {
 ///     %v_a = "vector_transfer_read" (A, %i) : (memref<?xf32>, index) ->
 ///                                              vector<128xf32>
 ///   }
@ -391,20 +391,20 @@ using namespace mlir;
 ///   %C = alloc (%M, %N) : memref<?x?xf32, 0>
 ///   %f1 = constant 1.0 : f32
 ///   %f2 = constant 2.0 : f32
-///   for %i0 = 0 to %M {
-///     for %i1 = 0 to %N {
+///   affine.for %i0 = 0 to %M {
+///     affine.for %i1 = 0 to %N {
 ///       // non-scoped %f1
 ///       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   for %i2 = 0 to %M {
-///     for %i3 = 0 to %N {
+///   affine.for %i2 = 0 to %M {
+///     affine.for %i3 = 0 to %N {
 ///       // non-scoped %f2
 ///       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
 ///     }
 ///   }
-///   for %i4 = 0 to %M {
-///     for %i5 = 0 to %N {
+///   affine.for %i4 = 0 to %M {
+///     affine.for %i5 = 0 to %N {
 ///       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
 ///       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
 ///       %s5 = addf %a5, %b5 : f32
@ -438,24 +438,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   for %i0 = 0 to %arg0 {
-///     for %i1 = 0 to %arg1 step 256 {
+///   affine.for %i0 = 0 to %arg0 {
+///     affine.for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<256xf32>, 1.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i2 = 0 to %arg0 {
-///     for %i3 = 0 to %arg1 step 256 {
+///   affine.for %i2 = 0 to %arg0 {
+///     affine.for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<256xf32>, 2.0> :
 ///                vector<256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i4 = 0 to %arg0 {
-///     for %i5 = 0 to %arg1 step 256 {
+///   affine.for %i4 = 0 to %arg0 {
+///     affine.for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                      (memref<?x?xf32>, index, index) -> vector<256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
@ -494,24 +494,24 @@ using namespace mlir;
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %cst = constant 1.0 : f32
 ///   %cst_0 = constant 2.0 : f32
-///   for %i0 = 0 to %arg0 step 32 {
-///     for %i1 = 0 to %arg1 step 256 {
+///   affine.for %i0 = 0 to %arg0 step 32 {
+///     affine.for %i1 = 0 to %arg1 step 256 {
 ///       %cst_1 = constant splat<vector<32x256xf32>, 1.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_1, %0, %i0, %i1) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i2 = 0 to %arg0 step 32 {
-///     for %i3 = 0 to %arg1 step 256 {
+///   affine.for %i2 = 0 to %arg0 step 32 {
+///     affine.for %i3 = 0 to %arg1 step 256 {
 ///       %cst_2 = constant splat<vector<32x256xf32>, 2.0> :
 ///                vector<32x256xf32>
 ///       "vector_transfer_write"(%cst_2, %1, %i2, %i3) :
 ///                (vector<32x256xf32>, memref<?x?xf32>, index, index) -> ()
 ///     }
 ///   }
-///   for %i4 = 0 to %arg0 step 32 {
-///     for %i5 = 0 to %arg1 step 256 {
+///   affine.for %i4 = 0 to %arg0 step 32 {
+///     affine.for %i5 = 0 to %arg1 step 256 {
 ///       %3 = "vector_transfer_read"(%0, %i4, %i5) :
 ///                (memref<?x?xf32>, index, index) -> vector<32x256xf32>
 ///       %4 = "vector_transfer_read"(%1, %i4, %i5) :
--- a/mlir/test/AffineOps/canonicalize.mlir
+++ b/mlir/test/AffineOps/canonicalize.mlir
@ -32,7 +32,7 @@
 func @compose_affine_maps_1dto2d_no_symbols() {
  %0 = alloc() : memref<4x4xf32>

-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
    // Test load[%x, %x]

    %x0 = affine.apply (d0) -> (d0 - 1) (%i0)
@ -78,7 +78,7 @@ func @compose_affine_maps_1dto2d_no_symbols() {
 func @compose_affine_maps_1dto2d_with_symbols() {
  %0 = alloc() : memref<4x4xf32>

-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
    // Test load[%x0, %x0] with symbol %c4
    %c4 = constant 4 : index
    %x0 = affine.apply (d0)[s0] -> (d0 - s0) (%i0)[%c4]
@ -119,13 +119,13 @@ func @compose_affine_maps_2d_tile() {
  %c4 = constant 4 : index
  %c8 = constant 8 : index

-  for %i0 = 0 to 3 {
+  affine.for %i0 = 0 to 3 {
    %x0 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i0)[%c4]
-    for %i1 = 0 to 3 {
+    affine.for %i1 = 0 to 3 {
      %x1 = affine.apply (d0)[s0] -> (d0 ceildiv s0) (%i1)[%c8]
-      for %i2 = 0 to 3 {
+      affine.for %i2 = 0 to 3 {
        %x2 = affine.apply (d0)[s0] -> (d0 mod s0) (%i2)[%c4]
-        for %i3 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
          %x3 = affine.apply (d0)[s0] -> (d0 mod s0) (%i3)[%c8]

          %x40 = affine.apply (d0, d1, d2, d3)[s0, s1] ->
@ -151,9 +151,9 @@ func @compose_affine_maps_dependent_loads() {
  %0 = alloc() : memref<16x32xf32>
  %1 = alloc() : memref<16x32xf32>

-  for %i0 = 0 to 3 {
-    for %i1 = 0 to 3 {
-      for %i2 = 0 to 3 {
+  affine.for %i0 = 0 to 3 {
+    affine.for %i1 = 0 to 3 {
+      affine.for %i2 = 0 to 3 {
        %c3 = constant 3 : index
        %c7 = constant 7 : index

@ -197,7 +197,7 @@ func @compose_affine_maps_dependent_loads() {
 func @compose_affine_maps_diamond_dependency() {
  %0 = alloc() : memref<4x4xf32>

-  for %i0 = 0 to 15 {
+  affine.for %i0 = 0 to 15 {
    %a = affine.apply (d0) -> (d0 - 1) (%i0)
    %b = affine.apply (d0) -> (d0 + 7) (%a)
    %c = affine.apply (d0) -> (d0 * 4) (%a)
@ -217,8 +217,8 @@ func @arg_used_as_dim_and_symbol(%arg0: memref<100x100xf32>, %arg1: index) {
  %c9 = constant 9 : index
  %1 = alloc() : memref<100x100xf32, 1>
  %2 = alloc() : memref<1xi32>
-  for %i0 = 0 to 100 {
-    for %i1 = 0 to 100 {
+  affine.for %i0 = 0 to 100 {
+    affine.for %i1 = 0 to 100 {
      %3 = affine.apply (d0, d1)[s0, s1] -> (d1 + s0 + s1)
        (%i0, %i1)[%arg1, %c9]
      %4 = affine.apply (d0, d1, d3) -> (d3 - (d0 + d1))
@ -238,7 +238,7 @@ func @trivial_maps() {
  %0 = alloc() : memref<10xf32>
  %c0 = constant 0 : index
  %cst = constant 0.000000e+00 : f32
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
    %1 = affine.apply ()[s0] -> (s0)()[%c0]
    store %cst, %0[%1] : memref<10xf32>
    %2 = load %0[%c0] : memref<10xf32>
@ -277,20 +277,20 @@ func @constant_fold_bounds(%N : index) {
  %c3 = affine.apply (d0, d1) -> (d0 + d1) (%c1, %c2)
  %l = "foo"() : () -> index

-  // CHECK:  for %i0 = 5 to 7 {
-  for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
+  // CHECK:  affine.for %i0 = 5 to 7 {
+  affine.for %i = max (d0, d1) -> (0, d0 + d1)(%c2, %c3) to min (d0, d1) -> (d0 - 2, 32*d1) (%c9, %c1) {
    "foo"(%i, %c3) : (index, index) -> ()
  }

  // Bound takes a non-constant argument but can still be folded.
-  // CHECK:  for %i1 = 1 to 7 {
-  for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
+  // CHECK:  affine.for %i1 = 1 to 7 {
+  affine.for %j = max (d0) -> (0, 1)(%N) to min (d0, d1) -> (7, 9)(%N, %l) {
    "foo"(%j, %c3) : (index, index) -> ()
  }

  // None of the bounds can be folded.
-  // CHECK: for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
-  for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
+  // CHECK: affine.for %i2 = max [[MAP0]]()[%0] to min [[MAP1]]()[%arg0] {
+  affine.for %k = max ()[s0] -> (0, s0) ()[%l] to min ()[s0] -> (100, s0)()[%N] {
    "foo"(%k, %c3) : (index, index) -> ()
  }
  return
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@ -204,35 +204,35 @@ func @illegaltype(i0) // expected-error {{invalid integer width}}
 // -----

 func @malformed_for_percent() {
-  for i = 1 to 10 { // expected-error {{expected SSA operand}}
+  affine.for i = 1 to 10 { // expected-error {{expected SSA operand}}

 // -----

 func @malformed_for_equal() {
-  for %i 1 to 10 { // expected-error {{expected '='}}
+  affine.for %i 1 to 10 { // expected-error {{expected '='}}

 // -----

 func @malformed_for_to() {
-  for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
+  affine.for %i = 1 too 10 { // expected-error {{expected 'to' between bounds}}
  }
 }

 // -----

 func @incomplete_for() {
-  for %i = 1 to 10 step 2
+  affine.for %i = 1 to 10 step 2
 }        // expected-error {{expected '{' to begin block list}}

 // -----

 func @nonconstant_step(%1 : i32) {
-  for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}
+  affine.for %2 = 1 to 5 step %1 { // expected-error {{expected non-function type}}

 // -----

 func @for_negative_stride() {
-  for %i = 1 to 10 step -1
+  affine.for %i = 1 to 10 step -1
 }        // expected-error@-1 {{expected step to be representable as a positive signed integer}}

 // -----
@ -244,7 +244,7 @@ func @non_instruction() {
 // -----

 func @invalid_if_conditional2() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if (i)[N] : (i >= )  // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -252,7 +252,7 @@ func @invalid_if_conditional2() {
 // -----

 func @invalid_if_conditional3() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if (i)[N] : (i == 1) // expected-error {{expected '0' after '=='}}
  }
 }
@ -260,7 +260,7 @@ func @invalid_if_conditional3() {
 // -----

 func @invalid_if_conditional4() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if (i)[N] : (i >= 2) // expected-error {{expected '0' after '>='}}
  }
 }
@ -268,7 +268,7 @@ func @invalid_if_conditional4() {
 // -----

 func @invalid_if_conditional5() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if (i)[N] : (i <= 0 ) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -276,7 +276,7 @@ func @invalid_if_conditional5() {
 // -----

 func @invalid_if_conditional6() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if (i) : (i) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -284,7 +284,7 @@ func @invalid_if_conditional6() {
 // -----
 // TODO (support if (1)?
 func @invalid_if_conditional7() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if (i) : (1) // expected-error {{expected '== 0' or '>= 0' at end of affine constraint}}
  }
 }
@ -438,8 +438,8 @@ func @undef() {
 // -----

 func @duplicate_induction_var() {
-  for %i = 1 to 10 {   // expected-error {{previously defined here}}
-    for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
+  affine.for %i = 1 to 10 {   // expected-error {{previously defined here}}
+    affine.for %i = 1 to 10 { // expected-error {{redefinition of SSA value '%i'}}
    }
  }
  return
@ -448,7 +448,7 @@ func @duplicate_induction_var() {
 // -----

 func @dominance_failure() {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
  }
  "xxx"(%i) : (index)->()   // expected-error {{operand #0 does not dominate this use}}
  return
@ -475,7 +475,7 @@ func @return_type_mismatch() -> i32 {
 // -----

 func @return_inside_loop() -> i8 {
-  for %i = 1 to 100 {
+  affine.for %i = 1 to 100 {
    %a = "foo"() : ()->i8
    return %a : i8
    // expected-error@-1 {{'return' op may only be at the top level of a function}}
@ -521,7 +521,7 @@ func @referer() {
 #map1 = (i)[j] -> (i+j)

 func @bound_symbol_mismatch(%N : index) {
-  for %i = #map1(%N) to 100 {
+  affine.for %i = #map1(%N) to 100 {
  // expected-error@-1 {{symbol operand count and integer set symbol count must match}}
  }
  return
@ -532,7 +532,7 @@ func @bound_symbol_mismatch(%N : index) {
 #map1 = (i)[j] -> (i+j)

 func @bound_dim_mismatch(%N : index) {
-  for %i = #map1(%N, %N)[%N] to 100 {
+  affine.for %i = #map1(%N, %N)[%N] to 100 {
  // expected-error@-1 {{dim operand count and integer set dim count must match}}
  }
  return
@ -541,7 +541,7 @@ func @bound_dim_mismatch(%N : index) {
 // -----

 func @large_bound() {
-  for %i = 1 to 9223372036854775810 {
+  affine.for %i = 1 to 9223372036854775810 {
  // expected-error@-1 {{integer constant out of range for attribute}}
  }
  return
@ -550,7 +550,7 @@ func @large_bound() {
 // -----

 func @max_in_upper_bound(%N : index) {
-  for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
+  affine.for %i = 1 to max (i)->(N, 100) { //expected-error {{expected non-function type}}
  }
  return
 }
@ -558,7 +558,7 @@ func @max_in_upper_bound(%N : index) {
 // -----

 func @step_typo() {
-  for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
+  affine.for %i = 1 to 100 step -- 1 { //expected-error {{expected constant integer}}
  }
  return
 }
@ -566,7 +566,7 @@ func @step_typo() {
 // -----

 func @invalid_bound_map(%N : i32) {
-  for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
+  affine.for %i = 1 to (i)->(j)(%N) { //expected-error {{use of undeclared identifier}}
  }
  return
 }
@ -579,7 +579,7 @@ func @invalid_bound_map(%N : i32) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @invalid_if_operands1(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if #set0(%i) {
    // expected-error@-1 {{symbol operand count and integer set symbol count must match}}

@ -587,7 +587,7 @@ func @invalid_if_operands1(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @invalid_if_operands2(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if #set0()[%N] {
    // expected-error@-1 {{dim operand count and integer set dim count must match}}

@ -595,7 +595,7 @@ func @invalid_if_operands2(%N : index) {
 #set0 = (i)[N] : (i >= 0, N - i >= 0)

 func @invalid_if_operands3(%N : index) {
-  for %i = 1 to 10 {
+  affine.for %i = 1 to 10 {
    if #set0(%i)[%i] {
    // expected-error@-1 {{operand cannot be used as a symbol}}
    }
@ -736,11 +736,11 @@ func @f(f32) {
 // -----

 func @f(%m : memref<?x?xf32>) {
-  for %i0 = 0 to 42 {
+  affine.for %i0 = 0 to 42 {
    // expected-error@+1 {{operand #2 does not dominate this use}}
    %x = load %m[%i0, %i1] : memref<?x?xf32>
  }
-  for %i1 = 0 to 42 {
+  affine.for %i1 = 0 to 42 {
  }
  return
 }
@ -790,7 +790,7 @@ func @type_alias_unknown(!unknown_alias) -> () { // expected-error {{undefined t

 // Check ill-formed opaque tensor.
 func @complex_loops() {
-  for %i1 = 1 to 100 {
+  affine.for %i1 = 1 to 100 {
  // expected-error @+1 {{expected '"' in string literal}}
  "opaqueIntTensor"(){bar: opaque<tensor<2x1x4xi32>, "0x686]>} : () -> ()

@ -824,7 +824,7 @@ func @invalid_affine_structure() {

 func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
  // expected-error @+1 {{lower loop bound affine map with multiple results requires 'max' prefix}}
-  for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
+  affine.for %i0 = ()[s]->(0,s-1)()[%arg0] to %arg1 {
  }
  return
 }
@ -833,7 +833,7 @@ func @missing_for_max(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {

 func @missing_for_min(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
  // expected-error @+1 {{upper loop bound affine map with multiple results requires 'min' prefix}}
-  for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
+  affine.for %i0 = %arg0 to ()[s]->(100,s+1)()[%arg1] {
  }
  return
 }
--- a/mlir/test/IR/locations.mlir
+++ b/mlir/test/IR/locations.mlir
@ -13,7 +13,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
  %2 = constant 4 : index loc(callsite("foo" at "mysource.cc":10:8))

  // CHECK: } loc(fused["foo", "mysource.cc":10:8])
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
  } loc(fused["foo", "mysource.cc":10:8])

  // CHECK: } loc(fused<"myPass">["foo", "foo2"])
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@ -208,8 +208,8 @@ func @identity_functor(%a : () -> ()) -> (() -> ())  {
 func @func_ops_in_loop() {
  // CHECK: %0 = "foo"() : () -> i64
  %a = "foo"() : ()->i64
-  // CHECK: for %i0 = 1 to 10 {
-  for %i = 1 to 10 {
+  // CHECK: affine.for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {
    // CHECK: %1 = "doo"() : () -> f32
    %b = "doo"() : ()->f32
    // CHECK: "bar"(%0, %1) : (i64, f32) -> ()
@ -224,10 +224,10 @@ func @func_ops_in_loop() {

 // CHECK-LABEL: func @loops() {
 func @loops() {
-  // CHECK: for %i0 = 1 to 100 step 2 {
-  for %i = 1 to 100 step 2 {
-    // CHECK: for %i1 = 1 to 200 {
-    for %j = 1 to 200 {
+  // CHECK: affine.for %i0 = 1 to 100 step 2 {
+  affine.for %i = 1 to 100 step 2 {
+    // CHECK: affine.for %i1 = 1 to 200 {
+    affine.for %j = 1 to 200 {
    }        // CHECK:     }
  }          // CHECK:   }
  return     // CHECK:   return
@ -235,14 +235,14 @@ func @loops() {

 // CHECK-LABEL: func @complex_loops() {
 func @complex_loops() {
-  for %i1 = 1 to 100 {      // CHECK:   for %i0 = 1 to 100 {
-    for %j1 = 1 to 100 {    // CHECK:     for %i1 = 1 to 100 {
+  affine.for %i1 = 1 to 100 {      // CHECK:   affine.for %i0 = 1 to 100 {
+    affine.for %j1 = 1 to 100 {    // CHECK:     affine.for %i1 = 1 to 100 {
       // CHECK: "foo"(%i0, %i1) : (index, index) -> ()
       "foo"(%i1, %j1) : (index,index) -> ()
    }                       // CHECK:     }
    "boo"() : () -> ()      // CHECK:     "boo"() : () -> ()
-    for %j2 = 1 to 10 {     // CHECK:     for %i2 = 1 to 10 {
-      for %k2 = 1 to 10 {   // CHECK:       for %i3 = 1 to 10 {
+    affine.for %j2 = 1 to 10 {     // CHECK:     affine.for %i2 = 1 to 10 {
+      affine.for %k2 = 1 to 10 {   // CHECK:       affine.for %i3 = 1 to 10 {
        "goo"() : () -> ()  // CHECK:         "goo"() : () -> ()
      }                     // CHECK:       }
    }                       // CHECK:     }
@ -253,8 +253,8 @@ func @complex_loops() {
 // CHECK: func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
 func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {
  %c = constant 0 : i32       // CHECK: %c0_i32 = constant 0 : i32
-  for %i0 = 1 to %arg0 {      // CHECK: for %i0 = 1 to %arg0 {
-    for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
+  affine.for %i0 = 1 to %arg0 {      // CHECK: affine.for %i0 = 1 to %arg0 {
+    affine.for %i1 = (d0)[]->(d0)(%i0)[] to %arg0 {  // CHECK:   affine.for %i1 = #map{{[0-9]+}}(%i0) to %arg0 {
      store %c, %arg1[%i0, %i1] : memref<?x?xi32>  // CHECK: store %c0_i32, %arg1[%i0, %i1]
    }          // CHECK:     }
  }            // CHECK:   }
@ -263,8 +263,8 @@ func @triang_loop(%arg0: index, %arg1: memref<?x?xi32>) {

 // CHECK: func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
-  // CHECK: for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
-  for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
+  // CHECK: affine.for %i0 = max #map{{.*}}()[%arg0] to min #map{{.*}}()[%arg1] {
+  affine.for %i0 = max()[s]->(0,s-1)()[%arg0] to min()[s]->(100,s+1)()[%arg1] {
    // CHECK: "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
    "foo"(%arg2, %i0) : (memref<100xf32>, index) -> ()
  }      // CHECK:   }
@ -275,24 +275,24 @@ func @minmax_loop(%arg0: index, %arg1: index, %arg2: memref<100xf32>) {
 func @loop_bounds(%N : index) {
  // CHECK: %0 = "foo"(%arg0) : (index) -> index
  %s = "foo"(%N) : (index) -> index
-  // CHECK: for %i0 = %0 to %arg0
-  for %i = %s to %N {
-    // CHECK: for %i1 = #map{{[0-9]+}}(%i0) to 0
-    for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
+  // CHECK: affine.for %i0 = %0 to %arg0
+  affine.for %i = %s to %N {
+    // CHECK: affine.for %i1 = #map{{[0-9]+}}(%i0) to 0
+    affine.for %j = (d0)[]->(d0)(%i)[] to 0 step 1 {
       // CHECK: %1 = affine.apply #map{{.*}}(%i0, %i1)[%0]
       %w1 = affine.apply(d0, d1)[s0] -> (d0+d1) (%i, %j) [%s]
       // CHECK: %2 = affine.apply #map{{.*}}(%i0, %i1)[%0]
       %w2 = affine.apply(d0, d1)[s0] -> (s0+1) (%i, %j) [%s]
-       // CHECK: for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
-       for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
+       // CHECK: affine.for %i2 = #map{{.*}}(%1, %i0)[%arg0] to #map{{.*}}(%2, %i1)[%0] {
+       affine.for %k = #bound_map1 (%w1, %i)[%N] to (i, j)[s] -> (i + j + s) (%w2, %j)[%s] {
          // CHECK: "foo"(%i0, %i1, %i2) : (index, index, index) -> ()
          "foo"(%i, %j, %k) : (index, index, index)->()
          // CHECK: %c30 = constant 30 : index
          %c = constant 30 : index
          // CHECK: %3 = affine.apply #map{{.*}}(%arg0, %c30)
          %u = affine.apply (d0, d1)->(d0+d1) (%N, %c)
-          // CHECK: for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
-          for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
+          // CHECK: affine.for %i3 = max #map{{.*}}(%i0)[%3] to min #map{{.*}}(%i2)[%c30] {
+          affine.for %l = max #bound_map2(%i)[%u] to min #bound_map2(%k)[%c] {
            // CHECK: "bar"(%i3) : (index) -> ()
            "bar"(%l) : (index) -> ()
          } // CHECK:           }
@ -305,7 +305,7 @@ func @loop_bounds(%N : index) {
 // CHECK-LABEL: func @ifinst(%arg0: index) {
 func @ifinst(%N: index) {
  %c = constant 200 : index // CHECK   %c200 = constant 200
-  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
    if #set0(%i)[%N, %c] {     // CHECK     if #set0(%i0)[%arg0, %c200] {
      %x = constant 1 : i32
       // CHECK: %c1_i32 = constant 1 : i32
@ -328,7 +328,7 @@ func @ifinst(%N: index) {
 // CHECK-LABEL: func @simple_ifinst(%arg0: index) {
 func @simple_ifinst(%N: index) {
  %c = constant 200 : index // CHECK   %c200 = constant 200
-  for %i = 1 to 10 {           // CHECK   for %i0 = 1 to 10 {
+  affine.for %i = 1 to 10 {           // CHECK   affine.for %i0 = 1 to 10 {
    if #set0(%i)[%N, %c] {     // CHECK     if #set0(%i0)[%arg0, %c200] {
      %x = constant 1 : i32
       // CHECK: %c1_i32 = constant 1 : i32
@ -544,18 +544,18 @@ func @funcattrwithblock() -> ()
 #map_non_simple2 = ()[s0, s1] -> (s0 + s1)
 #map_non_simple3 = ()[s0] -> (s0 + 3)
 func @funcsimplemap(%arg0: index, %arg1: index) -> () {
-  for %i0 = 0 to #map_simple0()[] {
-  // CHECK: for %i0 = 0 to 10 {
-    for %i1 = 0 to #map_simple1()[%arg1] {
-    // CHECK: for %i1 = 0 to %arg1 {
-      for %i2 = 0 to #map_non_simple0(%i0)[] {
-      // CHECK: for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
-        for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
-        // CHECK: for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
-          for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
-          // CHECK: for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
-            for %i5 = 0 to #map_non_simple3()[%arg0] {
-            // CHECK: for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
+  affine.for %i0 = 0 to #map_simple0()[] {
+  // CHECK: affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to #map_simple1()[%arg1] {
+    // CHECK: affine.for %i1 = 0 to %arg1 {
+      affine.for %i2 = 0 to #map_non_simple0(%i0)[] {
+      // CHECK: affine.for %i2 = 0 to #map{{[a-z_0-9]*}}(%i0) {
+        affine.for %i3 = 0 to #map_non_simple1(%i0)[%arg1] {
+        // CHECK: affine.for %i3 = 0 to #map{{[a-z_0-9]*}}(%i0)[%arg1] {
+          affine.for %i4 = 0 to #map_non_simple2()[%arg1, %arg0] {
+          // CHECK: affine.for %i4 = 0 to #map{{[a-z_0-9]*}}()[%arg1, %arg0] {
+            affine.for %i5 = 0 to #map_non_simple3()[%arg0] {
+            // CHECK: affine.for %i5 = 0 to #map{{[a-z_0-9]*}}()[%arg0] {
              %c42_i32 = constant 42 : i32
            }
          }
@ -749,9 +749,9 @@ func @sparsevectorattr() -> () {
 // CHECK-LABEL: func @loops_with_blockids() {
 func @loops_with_blockids() {
 ^block0:
-  for %i = 1 to 100 step 2 {
+  affine.for %i = 1 to 100 step 2 {
  ^block1:
-    for %j = 1 to 200 {
+    affine.for %j = 1 to 200 {
    ^block2:
    }
  }
--- a/mlir/test/IR/pretty-locations.mlir
+++ b/mlir/test/IR/pretty-locations.mlir
@ -18,7 +18,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
  %3 = constant 4 : index loc(callsite("foo" at callsite("mysource1.cc":10:8 at callsite("mysource2.cc":13:8 at "mysource3.cc":100:10))))

  // CHECK: } ["foo", mysource.cc:10:8]
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
  } loc(fused["foo", "mysource.cc":10:8])

  // CHECK: } <"myPass">["foo", "foo2"]
--- a/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
+++ b/mlir/test/Transforms/Vectorize/lower_vector_transfers.mlir
@ -6,8 +6,8 @@
 // CHECK-LABEL: func @materialize_read_1d() {
 func @materialize_read_1d() {
  %A = alloc () : memref<7x42xf32>
-  for %i0 = 0 to 7 step 4 {
-    for %i1 = 0 to 42 step 4 {
+  affine.for %i0 = 0 to 7 step 4 {
+    affine.for %i1 = 0 to 42 step 4 {
      %f1 = vector_transfer_read %A, %i0, %i1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
      %ip1 = affine.apply (d0) -> (d0 + 1) (%i1)
      %f2 = vector_transfer_read %A, %i0, %ip1 {permutation_map: (d0, d1) -> (d0)} : (memref<7x42xf32>, index, index) -> vector<4xf32>
@ -29,11 +29,11 @@ func @materialize_read_1d() {
 // CHECK-LABEL: func @materialize_read_1d_partially_specialized
 func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %dyn4 : index) {
  %A = alloc (%dyn1, %dyn2, %dyn4) : memref<7x?x?x42x?xf32>
-  for %i0 = 0 to 7 {
-    for %i1 = 0 to %dyn1 {
-      for %i2 = 0 to %dyn2 {
-        for %i3 = 0 to 42 step 2 {
-          for %i4 = 0 to %dyn4 {
+  affine.for %i0 = 0 to 7 {
+    affine.for %i1 = 0 to %dyn1 {
+      affine.for %i2 = 0 to %dyn2 {
+        affine.for %i3 = 0 to 42 step 2 {
+          affine.for %i4 = 0 to %dyn4 {
            %f1 = vector_transfer_read %A, %i0, %i1, %i2, %i3, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
            %i3p1 = affine.apply (d0) -> (d0 + 1) (%i3)
            %f2 = vector_transfer_read %A, %i0, %i1, %i2, %i3p1, %i4 {permutation_map: (d0, d1, d2, d3, d4) -> (d3)} : ( memref<7x?x?x42x?xf32>, index, index, index, index, index) -> vector<4xf32>
@ -54,10 +54,10 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d
 // CHECK-LABEL: func @materialize_read(%arg0: index, %arg1: index, %arg2: index, %arg3: index) {
 func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
-  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 {
-  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 {
+  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
  // CHECK-NEXT:          %[[C0:.*]] = constant 0 : index
  // CHECK-NEXT:          %[[C1:.*]] = constant 1 : index
  //      CHECK:          {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
@ -66,9 +66,9 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:          {{.*}} = dim %0, 3 : memref<?x?x?x?xf32>
  //      CHECK:          %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
  // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast %[[ALLOC]] : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]]
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]]
@ -109,10 +109,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:  return
  // CHECK-NEXT:}
  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
-  for %i0 = 0 to %M step 3 {
-    for %i1 = 0 to %N {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 5 {
+  affine.for %i0 = 0 to %M step 3 {
+    affine.for %i1 = 0 to %N {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 5 {
          %f = vector_transfer_read %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, 0, d0)} : (memref<?x?x?x?xf32, 0>, index, index, index, index) -> vector<5x4x3xf32>
        }
      }
@ -125,10 +125,10 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) {
 func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:  %0 = alloc(%arg0, %arg1, %arg2, %arg3) : memref<?x?x?x?xf32>
  // CHECK-NEXT:  %cst = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  // CHECK-NEXT:  for %[[I0:.*]] = 0 to %arg0 step 3 {
-  // CHECK-NEXT:    for %[[I1:.*]] = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      for %[[I2:.*]] = 0 to %arg2 {
-  // CHECK-NEXT:        for %[[I3:.*]] = 0 to %arg3 step 5 {
+  // CHECK-NEXT:  affine.for %[[I0:.*]] = 0 to %arg0 step 3 {
+  // CHECK-NEXT:    affine.for %[[I1:.*]] = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      affine.for %[[I2:.*]] = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %[[I3:.*]] = 0 to %arg3 step 5 {
  // CHECK-NEXT:          %[[C0:.*]] = constant 0 : index
  // CHECK-NEXT:          %[[C1:.*]] = constant 1 : index
  // CHECK:               {{.*}} = dim %0, 0 : memref<?x?x?x?xf32>
@ -138,9 +138,9 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK:               %[[ALLOC:.*]] = alloc() : memref<5x4x3xf32>
  // CHECK-NEXT:          %[[VECTOR_VIEW:.*]] = vector_type_cast {{.*}} : memref<5x4x3xf32>, memref<1xvector<5x4x3xf32>>
  // CHECK-NEXT:          store %cst, {{.*}}[%[[C0]]] : memref<1xvector<5x4x3xf32>>
-  // CHECK-NEXT:          for %[[I4:.*]] = 0 to 3 {
-  // CHECK-NEXT:            for %[[I5:.*]] = 0 to 4 {
-  // CHECK-NEXT:              for %[[I6:.*]] = 0 to 5 {
+  // CHECK-NEXT:          affine.for %[[I4:.*]] = 0 to 3 {
+  // CHECK-NEXT:            affine.for %[[I5:.*]] = 0 to 4 {
+  // CHECK-NEXT:              affine.for %[[I6:.*]] = 0 to 5 {
  // CHECK-NEXT:                {{.*}} = load {{.*}}[%[[I6]], %[[I5]], %[[I4]]] : memref<5x4x3xf32>
  // CHECK-NEXT:                {{.*}} = affine.apply #[[ADD]](%[[I0]], %[[I4]])
  // CHECK-NEXT:                {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index
@ -184,10 +184,10 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
  // CHECK-NEXT:}
  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
  %f1 = constant splat<vector<5x4x3xf32>, 1.000000e+00> : vector<5x4x3xf32>
-  for %i0 = 0 to %M step 3 {
-    for %i1 = 0 to %N step 4 {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 5 {
+  affine.for %i0 = 0 to %M step 3 {
+    affine.for %i1 = 0 to %N step 4 {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 5 {
          vector_transfer_write %f1, %A, %i0, %i1, %i2, %i3 {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : vector<5x4x3xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index
        }
      }
--- a/mlir/test/Transforms/Vectorize/materialize.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize.mlir
@ -10,10 +10,10 @@
 func @materialize(%M : index, %N : index, %O : index, %P : index) {
  %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
  %f1 = constant splat<vector<4x4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-  // CHECK:  for %i0 = 0 to %arg0 step 4 {
-  // CHECK-NEXT:    for %i1 = 0 to %arg1 step 4 {
-  // CHECK-NEXT:      for %i2 = 0 to %arg2 {
-  // CHECK-NEXT:        for %i3 = 0 to %arg3 step 4 {
+  // CHECK:  affine.for %i0 = 0 to %arg0 step 4 {
+  // CHECK-NEXT:    affine.for %i1 = 0 to %arg1 step 4 {
+  // CHECK-NEXT:      affine.for %i2 = 0 to %arg2 {
+  // CHECK-NEXT:        affine.for %i3 = 0 to %arg3 step 4 {
  // CHECK-NEXT:          %[[a:[0-9]+]] = {{.*}}[[ID1]](%i0)
  // CHECK-NEXT:          %[[b:[0-9]+]] = {{.*}}[[ID1]](%i1)
  // CHECK-NEXT:          %[[c:[0-9]+]] = {{.*}}[[ID1]](%i2)
@ -25,10 +25,10 @@ func @materialize(%M : index, %N : index, %O : index, %P : index) {
  // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b2]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
  // CHECK:          %[[b3:[0-9]+]] = {{.*}}[[D0P3]](%i1)
  // CHECK:          vector_transfer_write {{.*}}, %0, {{.*}}, %[[b3]], {{.*}} {permutation_map: #[[D0D1D2D3TOD1D0]]} : vector<4x4xf32>, memref<?x?x?x?xf32>, index, index, index, index
-  for %i0 = 0 to %M step 4 {
-    for %i1 = 0 to %N step 4 {
-      for %i2 = 0 to %O {
-        for %i3 = 0 to %P step 4 {
+  affine.for %i0 = 0 to %M step 4 {
+    affine.for %i1 = 0 to %N step 4 {
+      affine.for %i2 = 0 to %O {
+        affine.for %i3 = 0 to %P step 4 {
          "vector_transfer_write"(%f1, %A, %i0, %i1, %i2, %i3) {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} : (vector<4x4x4xf32>, memref<?x?x?x?xf32, 0>, index, index, index, index) -> ()
        }
      }
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_1d_to_1d.mlir
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
  // 4x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i0 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -34,15 +34,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
  // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
  //
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
  // 4x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i2 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     [[CST0:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST1:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     [[CST2:%.*]] = constant splat<vector<8xf32>, 2.000000e+00> : vector<8xf32>
@ -60,15 +60,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL31:%.*]] = affine.apply [[D0P24]]{{.*}}
  // CHECK-NEXT:     vector_transfer_write [[CST3]], {{.*}}, [[VAL30]], [[VAL31]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
  //
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
  // 4x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i4 = 0 to %arg0 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = vector_transfer_read
@ -110,8 +110,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     vector_transfer_write
  //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
      %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
      %s5 = addf %a5, %b5 : f32
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_1d.mlir
@ -15,8 +15,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
  // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 16 {
  // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<8xf32>, 1.000000e+00> : vector<8xf32>
@ -41,26 +41,26 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL50:%.*]] = affine.apply [[D0P2]](%i0)
  // CHECK-NEXT:     [[VAL51:%.*]] = affine.apply [[D0P8]](%i1)
  // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL50]], [[VAL51]] {permutation_map: [[D0D1TOD1]]} : vector<8xf32>
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
  // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 16 {
  // .....
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // non-scoped %f2
      // CHECK does (3x4)x unrolling.
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
  // (3x2)x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 16 {
+  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 16 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = vector_transfer_read
@ -122,8 +122,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     vector_transfer_write
  //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
      %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
      %s5 = addf %a5, %b5 : f32
--- a/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/materialize_vectors_2d_to_2d.mlir
@ -13,8 +13,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
  // 2x unroll (jammed by construction).
-  // CHECK: for %i0 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i1 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i0 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i1 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 1.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i0)
@ -24,15 +24,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i1)
  // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
  //
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
  // 2x unroll (jammed by construction).
-  // CHECK: for %i2 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i3 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i2 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i3 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     {{.*}} = constant splat<vector<3x16xf32>, 2.000000e+00> : vector<3x16xf32>
  // CHECK-NEXT:     [[VAL00:%.*]] = affine.apply [[ID1]](%i2)
@ -42,15 +42,15 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     [[VAL11:%.*]] = affine.apply [[D0P16]](%i3)
  // CHECK-NEXT:     vector_transfer_write {{.*}}, {{.*}}, [[VAL10]], [[VAL11]] {permutation_map: [[ID2]]} : vector<3x16xf32>
  //
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
  // 2x unroll (jammed by construction).
-  // CHECK: for %i4 = 0 to %arg0 step 3 {
-  // CHECK-NEXT:   for %i5 = 0 to %arg1 step 32 {
+  // CHECK: affine.for %i4 = 0 to %arg0 step 3 {
+  // CHECK-NEXT:   affine.for %i5 = 0 to %arg1 step 32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     {{.*}} = vector_transfer_read
@ -72,8 +72,8 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  // CHECK-NEXT:     {{.*}} = affine.apply
  // CHECK-NEXT:     vector_transfer_write
  //
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
      %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
      %s5 = addf %a5, %b5 : f32
--- a/mlir/test/Transforms/Vectorize/normalize_maps.mlir
+++ b/mlir/test/Transforms/Vectorize/normalize_maps.mlir
@ -9,19 +9,19 @@

 // CHECK-LABEL: func @simple()
 func @simple() {
-  for %i0 = 0 to 7 {
+  affine.for %i0 = 0 to 7 {
    %0 = affine.apply (d0) -> (d0) (%i0)
    %1 = affine.apply (d0) -> (d0) (%0)
    %2 = affine.apply (d0, d1) -> (d0 + d1) (%0, %0)
    %3 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
  }
-  // CHECK-NEXT: for %i0 = 0 to 7
+  // CHECK-NEXT: affine.for %i0 = 0 to 7
  // CHECK-NEXT:   {{.*}} affine.apply #[[ID1]](%i0)
  // CHECK-NEXT:   {{.*}} affine.apply #[[D0TIMES2]](%i0)
  // CHECK-NEXT:   {{.*}} affine.apply #[[ZERO]]()

-  for %i1 = 0 to 7 {
-    for %i2 = 0 to 42 {
+  affine.for %i1 = 0 to 7 {
+    affine.for %i2 = 0 to 42 {
      %20 = affine.apply (d0, d1) -> (d1) (%i1, %i2)
      %21 = affine.apply (d0, d1) -> (d0) (%i1, %i2)
      %22 = affine.apply (d0, d1) -> (d0 + d1) (%20, %21)
@ -29,15 +29,15 @@ func @simple() {
      %24 = affine.apply (d0, d1) -> (-d0 + d1) (%20, %21)
    }
  }
-  //      CHECK: for %i1 = 0 to 7
-  // CHECK-NEXT:   for %i2 = 0 to 42
+  //      CHECK: affine.for %i1 = 0 to 7
+  // CHECK-NEXT:   affine.for %i2 = 0 to 42
  // CHECK-NEXT:     {{.*}} affine.apply #[[D0PLUSD1]](%i1, %i2)
  // CHECK-NEXT:     {{.*}} affine.apply #[[MINSD0PLUSD1]](%i1, %i2)
  // CHECK-NEXT:     {{.*}} affine.apply #[[D0MINUSD1]](%i1, %i2)

-  for %i3 = 0 to 16 {
-    for %i4 = 0 to 47 step 2 {
-      for %i5 = 0 to 78 step 16 {
+  affine.for %i3 = 0 to 16 {
+    affine.for %i4 = 0 to 47 step 2 {
+      affine.for %i5 = 0 to 78 step 16 {
        %50 = affine.apply (d0) -> (d0) (%i3)
        %51 = affine.apply (d0) -> (d0) (%i4)
        %52 = affine.apply (d0) -> (d0) (%i5)
@ -47,9 +47,9 @@ func @simple() {
      }
    }
  }
-  // CHECK:      for %i3 = 0 to 16
-  // CHECK-NEXT:   for %i4 = 0 to 47 step 2
-  // CHECK-NEXT:     for %i5 = 0 to 78 step 16
+  // CHECK:      affine.for %i3 = 0 to 16
+  // CHECK-NEXT:   affine.for %i4 = 0 to 47 step 2
+  // CHECK-NEXT:     affine.for %i5 = 0 to 78 step 16
  // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i3)
  // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i4)
  // CHECK-NEXT:       {{.*}} affine.apply #[[ID1]](%i5)
--- a/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_1d.mlir
@ -23,17 +23,17 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for {{.*}} step 128
 // CHECK-NEXT: {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
-   for %i0 = 0 to %M { // vectorized due to scalar -> vector
+   affine.for %i0 = 0 to %M { // vectorized due to scalar -> vector
     %a0 = load %A[%cst0, %cst0] : memref<?x?xf32>
   }
 //
 // CHECK:for {{.*}} [[ARG_M]] {
-   for %i1 = 0 to %M { // not vectorized
+   affine.for %i1 = 0 to %M { // not vectorized
     %a1 = load %A[%i1, %i1] : memref<?x?xf32>
   }
 //
-// CHECK:   for %i{{[0-9]*}} = 0 to [[ARG_M]] {
-   for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+// CHECK:   affine.for %i{{[0-9]*}} = 0 to [[ARG_M]] {
+   affine.for %i2 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
     %r2 = affine.apply (d0) -> (d0) (%i2)
     %a2 = load %A[%r2#0, %cst0] : memref<?x?xf32>
   }
@ -41,7 +41,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK:for [[IV3:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK-NEXT:   [[APP3:%[a-zA-Z0-9]+]] = affine.apply {{.*}}[[IV3]]
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[C0]], [[APP3]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i3 = 0 to %M { // vectorized
+   affine.for %i3 = 0 to %M { // vectorized
     %r3 = affine.apply (d0) -> (d0) (%i3)
     %a3 = load %A[%cst0, %r3#0] : memref<?x?xf32>
   }
@ -51,8 +51,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP50:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   [[APP51:%[0-9]+]] = affine.apply {{.*}}([[IV4]], [[IV5]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP50]], [[APP51]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i4 = 0 to %M { // vectorized
-     for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
+   affine.for %i4 = 0 to %M { // vectorized
+     affine.for %i5 = 0 to %N { // not vectorized, would vectorize with --test-fastest-varying=1
       %r50 = affine.apply (d0, d1) -> (d1) (%i4, %i5)
       %r51 = affine.apply (d0, d1) -> (d0) (%i4, %i5)
       %a5 = load %A[%r50, %r51] : memref<?x?xf32>
@ -61,8 +61,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV6:%[i0-9]*]] = 0 to [[ARG_M]] {
 // CHECK-NEXT:   for [[IV7:%[i0-9]*]] = 0 to [[ARG_N]] {
-   for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
-     for %i7 = 0 to %N { // not vectorized, can never vectorize
+   affine.for %i6 = 0 to %M { // not vectorized, would vectorize with --test-fastest-varying=1
+     affine.for %i7 = 0 to %N { // not vectorized, can never vectorize
       %r70 = affine.apply (d0, d1) -> (d1 + d0) (%i6, %i7)
       %r71 = affine.apply (d0, d1) -> (d0) (%i6, %i7)
       %a7 = load %A[%r70, %r71] : memref<?x?xf32>
@ -74,8 +74,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK-NEXT:   [[APP9_0:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   [[APP9_1:%[0-9]+]] = affine.apply {{.*}}([[IV8]], [[IV9]])
 // CHECK-NEXT:   {{.*}} = vector_transfer_read %arg0, [[APP9_0]], [[APP9_1]] {permutation_map: #[[map_proj_d0d1_d1]]} : {{.*}} -> vector<128xf32>
-   for %i8 = 0 to %M { // vectorized
-     for %i9 = 0 to %N {
+   affine.for %i8 = 0 to %M { // vectorized
+     affine.for %i9 = 0 to %N {
       %r90 = affine.apply (d0, d1) -> (d1) (%i8, %i9)
       %r91 = affine.apply (d0, d1) -> (d0 + d1) (%i8, %i9)
       %a9 = load %A[%r90, %r91] : memref<?x?xf32>
@ -84,8 +84,8 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 //
 // CHECK: for [[IV10:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV11:%[i0-9]*]] = 0 to %{{[0-9]*}} {
-   for %i10 = 0 to %M { // not vectorized, need per load transposes
-     for %i11 = 0 to %N { // not vectorized, need per load transposes
+   affine.for %i10 = 0 to %M { // not vectorized, need per load transposes
+     affine.for %i11 = 0 to %N { // not vectorized, need per load transposes
       %r11_0 = affine.apply (d0, d1) -> (d0) (%i10, %i11)
       %r11_1 = affine.apply (d0, d1) -> (d1) (%i10, %i11)
       %a11 = load %A[%r11_0, %r11_1] : memref<?x?xf32>
@ -98,9 +98,9 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
 // CHECK: for [[IV12:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV13:%[i0-9]*]] = 0 to %{{[0-9]*}} {
 // CHECK:     for [[IV14:%[i0-9]+]] = 0 to [[ARG_P]] step 128
-   for %i12 = 0 to %M { // not vectorized, can never vectorize
-     for %i13 = 0 to %N { // not vectorized, can never vectorize
-       for %i14 = 0 to %P { // vectorized
+   affine.for %i12 = 0 to %M { // not vectorized, can never vectorize
+     affine.for %i13 = 0 to %N { // not vectorized, can never vectorize
+       affine.for %i14 = 0 to %P { // vectorized
         %r14_0 = affine.apply (d0, d1, d2) -> (d1) (%i12, %i13, %i14)
         %r14_1 = affine.apply (d0, d1, d2) -> (d0 + d1) (%i12, %i13, %i14)
         %r14_2 = affine.apply (d0, d1, d2) -> (d0 + d2) (%i12, %i13, %i14)
@ -109,24 +109,24 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
     }
   }
 //
-// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   for %i15 = 0 to %M { // not vectorized due to condition below
+// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   affine.for %i15 = 0 to %M { // not vectorized due to condition below
     if #set0(%i15) {
       %a15 = load %A[%cst0, %cst0] : memref<?x?xf32>
     }
   }
 //
-// CHECK:  for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
-   for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
+// CHECK:  affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+   affine.for %i16 = 0 to %M { // not vectorized, can't vectorize a vector load
     %a16 = alloc(%M) : memref<?xvector<2xf32>>
     %l16 = load %a16[%i16] : memref<?xvector<2xf32>>
   }
 //
-// CHECK: for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
+// CHECK: affine.for %i{{[0-9]*}} = 0 to %{{[0-9]*}} {
 // CHECK:   for [[IV18:%[a-zA-Z0-9]+]] = 0 to [[ARG_M]] step 128
 // CHECK:     {{.*}} = vector_transfer_read %arg0, [[C0]], [[C0]] {permutation_map: #[[map_proj_d0d1_0]]} : {{.*}} -> vector<128xf32>
-   for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
-     for %i18 = 0 to %M { // vectorized due to scalar -> vector
+   affine.for %i17 = 0 to %M { // not vectorized, the 1-D pattern that matched %i18 in DFS post-order prevents vectorizing %i17
+     affine.for %i18 = 0 to %M { // vectorized due to scalar -> vector
       %a18 = load %A[%cst0, %cst0] : memref<?x?xf32>
     }
   }
@ -139,24 +139,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %C = alloc (%M, %N) : memref<?x?xf32, 0>
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // CHECK: [[C1:%.*]] = constant splat<vector<128xf32>, 1.000000e+00> : vector<128xf32>
      // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // CHECK: [[C3:%.*]] = constant splat<vector<128xf32>, 2.000000e+00> : vector<128xf32>
      // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : vector<128xf32>, memref<?x?xf32>, index, index
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
      // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d1]]} : (memref<?x?xf32>, index, index) -> vector<128xf32>
      // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<128xf32>
@ -188,10 +188,10 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
 // CHECK-LABEL: @vec_rejected
 func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
  %N = dim %A, 0 : memref<?x?xf32>
-  for %i = 0 to %N {
+  affine.for %i = 0 to %N {
 // CHECK-NOT: vector
    %a = load %A[%i, %i] : memref<?x?xf32> // not vectorized
-    for %j = 0 to %N {
+    affine.for %j = 0 to %N {
      %b = load %A[%i, %j] : memref<?x?xf32> // may be vectorized
 // CHECK-NOT: vector
      %c = addf %a, %b : f32 // not vectorized because %a wasn't
--- a/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_2d.mlir
@ -11,13 +11,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:   for {{.*}} = 0 to %1 step 32
   // CHECK:     for {{.*}} = 0 to %2 step 256
   // Example:
-   // for %i0 = 0 to %0 {
-   //   for %i1 = 0 to %1 step 32 {
-   //     for %i2 = 0 to %2 step 256 {
+   // affine.for %i0 = 0 to %0 {
+   //   affine.for %i1 = 0 to %1 step 32 {
+   //     affine.for %i2 = 0 to %2 step 256 {
   //       %3 = "vector_transfer_read"(%arg0, %i0, %i1, %i2) : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
@ -27,9 +27,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=1 --test-fastest-varying=0 no
   // vectorization happens because of loop nesting order .
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
@ -43,24 +43,24 @@ func @vector_add_2d(%M : index, %N : index) -> f32 {
  %C = alloc (%M, %N) : memref<?x?xf32, 0>
  %f1 = constant 1.0 : f32
  %f2 = constant 2.0 : f32
-  for %i0 = 0 to %M {
-    for %i1 = 0 to %N {
+  affine.for %i0 = 0 to %M {
+    affine.for %i1 = 0 to %N {
      // CHECK: [[C1:%.*]] = constant splat<vector<32x256xf32>, 1.000000e+00> : vector<32x256xf32>
      // CHECK: vector_transfer_write [[C1]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : vector<32x256xf32>, memref<?x?xf32>, index, index
      // non-scoped %f1
      store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
    }
  }
-  for %i2 = 0 to %M {
-    for %i3 = 0 to %N {
+  affine.for %i2 = 0 to %M {
+    affine.for %i3 = 0 to %N {
      // CHECK: [[C3:%.*]] = constant splat<vector<32x256xf32>, 2.000000e+00> : vector<32x256xf32>
      // CHECK: vector_transfer_write [[C3]], {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]}  : vector<32x256xf32>, memref<?x?xf32>, index, index
      // non-scoped %f2
      store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
    }
  }
-  for %i4 = 0 to %M {
-    for %i5 = 0 to %N {
+  affine.for %i4 = 0 to %M {
+    affine.for %i5 = 0 to %N {
      // CHECK: [[A5:%.*]] = vector_transfer_read %0, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
      // CHECK: [[B5:%.*]] = vector_transfer_read %1, {{.*}} {permutation_map: #[[map_proj_d0d1_d0d1]]} : (memref<?x?xf32>, index, index) -> vector<32x256xf32>
      // CHECK: [[S5:%.*]] = addf [[A5]], [[B5]] : vector<32x256xf32>
--- a/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_3d.mlir
@ -7,17 +7,17 @@ func @vec3d(%A : memref<?x?x?xf32>) {
   %0 = dim %A, 0 : memref<?x?x?xf32>
   %1 = dim %A, 1 : memref<?x?x?xf32>
   %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 {
-   // CHECK:   for %i1 = 0 to %0 {
-   // CHECK:     for %i2 = 0 to %0 step 32 {
-   // CHECK:       for %i3 = 0 to %1 step 64 {
-   // CHECK:         for %i4 = 0 to %2 step 256 {
+   // CHECK: affine.for %i0 = 0 to %0 {
+   // CHECK:   affine.for %i1 = 0 to %0 {
+   // CHECK:     affine.for %i2 = 0 to %0 step 32 {
+   // CHECK:       affine.for %i3 = 0 to %1 step 64 {
+   // CHECK:         affine.for %i4 = 0 to %2 step 256 {
   // CHECK:           %3 = vector_transfer_read %arg0, %i2, %i3, %i4 {permutation_map: #[[map_proj_d0d1d2_d0d1d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x64x256xf32>
-   for %t0 = 0 to %0 {
-     for %t1 = 0 to %0 {
-       for %i0 = 0 to %0 {
-         for %i1 = 0 to %1 {
-           for %i2 = 0 to %2 {
+   affine.for %t0 = 0 to %0 {
+     affine.for %t1 = 0 to %0 {
+       affine.for %i0 = 0 to %0 {
+         affine.for %i1 = 0 to %1 {
+           affine.for %i2 = 0 to %2 {
             %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
           }
         }
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_2d.mlir
@ -7,13 +7,13 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   %M = dim %A, 0 : memref<?x?x?xf32>
   %N = dim %A, 1 : memref<?x?x?xf32>
   %P = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32
-   // CHECK:   for %i1 = 0 to %1 {
-   // CHECK:     for %i2 = 0 to %2 step 256
+   // CHECK: affine.for %i0 = 0 to %0 step 32
+   // CHECK:   affine.for %i1 = 0 to %1 {
+   // CHECK:     affine.for %i2 = 0 to %2 step 256
   // CHECK:       {{.*}} = vector_transfer_read %arg0, %i0, %i1, %i2 {permutation_map: #[[map_proj_d0d1d2_d0d2]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
@ -23,9 +23,9 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=2 --test-fastest-varying=0 no
   // vectorization happens because of loop nesting order
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
--- a/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_outer_loop_transpose_2d.mlir
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
   // vectorization happens because of loop nesting order.
-   for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+   affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
   }
-   // CHECK: for %i3 = 0 to %0 step 32
-   // CHECK:   for %i4 = 0 to %1 step 256
-   // CHECK:     for %i5 = 0 to %2 {
+   // CHECK: affine.for %i3 = 0 to %0 step 32
+   // CHECK:   affine.for %i4 = 0 to %1 step 256
+   // CHECK:     affine.for %i5 = 0 to %2 {
   // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
   %0 = dim %A, 0 : memref<?x?x?xf32>
   %1 = dim %A, 1 : memref<?x?x?xf32>
   %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32 {
-   // CHECK:   for %i1 = 0 to %1 {
-   // CHECK:     for %i2 = 0 to %2 step 256 {
+   // CHECK: affine.for %i0 = 0 to %0 step 32 {
+   // CHECK:   affine.for %i1 = 0 to %1 {
+   // CHECK:     affine.for %i2 = 0 to %2 step 256 {
   // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   for %i3 = 0 to %1 step 256 {
-   // CHECK:     for %i4 = 0 to %2 {
+   // CHECK:   affine.for %i3 = 0 to %1 step 256 {
+   // CHECK:     affine.for %i4 = 0 to %2 {
   // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     for %i5 = 0 to %2 {
+   // CHECK:     affine.for %i5 = 0 to %2 {
   // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d0]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %0 {
-     for %i1 = 0 to %1 {
-       for %i2 = 0 to %2 {
+   affine.for %i0 = 0 to %0 {
+     affine.for %i1 = 0 to %1 {
+       affine.for %i2 = 0 to %2 {
         %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
       }
     }
-     for %i3 = 0 to %1 {
-       for %i4 = 0 to %2 {
+     affine.for %i3 = 0 to %1 {
+       affine.for %i4 = 0 to %2 {
         %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
       }
-       for %i5 = 0 to %2 {
+       affine.for %i5 = 0 to %2 {
         %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
       }
     }
--- a/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
+++ b/mlir/test/Transforms/Vectorize/vectorize_transpose_2d.mlir
@ -12,20 +12,20 @@ func @vec2d(%A : memref<?x?x?xf32>) {
   // CHECK:     for  {{.*}} = 0 to %2 {
   // For the case: --test-fastest-varying=0 --test-fastest-varying=1 no
   // vectorization happens because of loop nesting order.
-  for %i0 = 0 to %M {
-     for %i1 = 0 to %N {
-       for %i2 = 0 to %P {
+  affine.for %i0 = 0 to %M {
+     affine.for %i1 = 0 to %N {
+       affine.for %i2 = 0 to %P {
         %a2 = load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
       }
     }
   }
-   // CHECK: for %i3 = 0 to %0 step 32
-   // CHECK:   for %i4 = 0 to %1 {
-   // CHECK:     for %i5 = 0 to %2 step 256
+   // CHECK: affine.for %i3 = 0 to %0 step 32
+   // CHECK:   affine.for %i4 = 0 to %1 {
+   // CHECK:     affine.for %i5 = 0 to %2 step 256
   // CHECK:       {{.*}} = vector_transfer_read %arg0, %i4, %i5, %i3 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i3 = 0 to %M {
-     for %i4 = 0 to %N {
-       for %i5 = 0 to %P {
+   affine.for %i3 = 0 to %M {
+     affine.for %i4 = 0 to %N {
+       affine.for %i5 = 0 to %P {
         %a5 = load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
       }
     }
@ -37,26 +37,26 @@ func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
   %0 = dim %A, 0 : memref<?x?x?xf32>
   %1 = dim %A, 1 : memref<?x?x?xf32>
   %2 = dim %A, 2 : memref<?x?x?xf32>
-   // CHECK: for %i0 = 0 to %0 step 32 {
-   // CHECK:   for %i1 = 0 to %1 step 256 {
-   // CHECK:     for %i2 = 0 to %2 {
+   // CHECK: affine.for %i0 = 0 to %0 step 32 {
+   // CHECK:   affine.for %i1 = 0 to %1 step 256 {
+   // CHECK:     affine.for %i2 = 0 to %2 {
   // CHECK:       %3 = vector_transfer_read %arg0, %i2, %i1, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:   for %i3 = 0 to %1 {
-   // CHECK:     for %i4 = 0 to %2 step 256 {
+   // CHECK:   affine.for %i3 = 0 to %1 {
+   // CHECK:     affine.for %i4 = 0 to %2 step 256 {
   // CHECK:       %4 = vector_transfer_read %arg0, %i3, %i4, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   // CHECK:     for %i5 = 0 to %2 step 256 {
+   // CHECK:     affine.for %i5 = 0 to %2 step 256 {
   // CHECK:       %5 = vector_transfer_read %arg0, %i3, %i5, %i0 {permutation_map: #[[map_proj_d0d1d2_d2d1]]} : (memref<?x?x?xf32>, index, index, index) -> vector<32x256xf32>
-   for %i0 = 0 to %0 {
-     for %i1 = 0 to %1 {
-       for %i2 = 0 to %2 {
+   affine.for %i0 = 0 to %0 {
+     affine.for %i1 = 0 to %1 {
+       affine.for %i2 = 0 to %2 {
         %a2 = load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
       }
     }
-     for %i3 = 0 to %1 {
-       for %i4 = 0 to %2 {
+     affine.for %i3 = 0 to %1 {
+       affine.for %i4 = 0 to %2 {
         %a4 = load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
       }
-       for %i5 = 0 to %2 {
+       affine.for %i5 = 0 to %2 {
         %a5 = load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
       }
     }
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@ -213,10 +213,10 @@ func @dyn_shape_fold(%L : index, %M : index) -> (memref<? x ? x i32>, memref<? x
  // CHECK-NEXT: %2 = alloc() : memref<512x1024xi32>
  %c = alloc(%K, %N) : memref<? x ? x i32>

-  // CHECK: for %i0 =
-  for %i = 0 to %L {
-    // CHECK-NEXT: for %i1 =
-    for %j = 0 to 10 {
+  // CHECK: affine.for %i0 =
+  affine.for %i = 0 to %L {
+    // CHECK-NEXT: affine.for %i1 =
+    affine.for %j = 0 to 10 {
      // CHECK-NEXT: %4 = load %0[%i0, %i1] : memref<?x1024xf32>
      // CHECK-NEXT: store %4, %1[%c0, %c0, %i0, %i1, %c0] : memref<4x1024x8x512x?xf32>
      %v = load %a[%i, %j] : memref<?x?xf32>
@ -242,8 +242,8 @@ func @merge_constants() -> (index, index) {
 // CHECK-LABEL: func @hoist_constant
 func @hoist_constant(%arg0: memref<8xi32>) {
  // CHECK-NEXT: %c42_i32 = constant 42 : i32
-  // CHECK-NEXT: for %i0 = 0 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
    // CHECK-NEXT: store %c42_i32, %arg0[%i0]
    %c42_i32 = constant 42 : i32
    store %c42_i32, %arg0[%i0] : memref<8xi32>
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@ -2,8 +2,8 @@

 // CHECK-LABEL: @test(%arg0: memref<f32>) {
 func @test(%p : memref<f32>) {
-  for %i0 = 0 to 128 {
-    for %i1 = 0 to 8 { // CHECK: for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 128 {
+    affine.for %i1 = 0 to 8 { // CHECK: affine.for %i1 = 0 to 8 {
      %0 = constant 4.5 : f32
      %1 = constant 1.5 : f32

--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@ -123,8 +123,8 @@ func @down_propagate_for_ml() {
  // CHECK: %c1_i32 = constant 1 : i32
  %0 = constant 1 : i32

-  // CHECK-NEXT: for %i0 = 0 to 4 {
-  for %i = 0 to 4 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 4 {
+  affine.for %i = 0 to 4 {
    // CHECK-NEXT: "foo"(%c1_i32, %c1_i32) : (i32, i32) -> ()
    %1 = constant 1 : i32
    "foo"(%0, %1) : (i32, i32) -> ()
@ -155,8 +155,8 @@ func @down_propagate_cfg() -> i32 {
 /// Check that operation definitions are NOT propagated up the dominance tree.
 // CHECK-LABEL: @up_propagate_ml
 func @up_propagate_ml() -> i32 {
-  // CHECK: for %i0 = 0 to 4 {
-  for %i = 0 to 4 {
+  // CHECK: affine.for %i0 = 0 to 4 {
+  affine.for %i = 0 to 4 {
    // CHECK-NEXT: %c1_i32 = constant 1 : i32
    // CHECK-NEXT: "foo"(%c1_i32) : (i32) -> ()
    %0 = constant 1 : i32
--- a/mlir/test/Transforms/dma-generate.mlir
+++ b/mlir/test/Transforms/dma-generate.mlir
@ -32,7 +32,7 @@ func @loop_nest_1d() {
  // Second DMA transfer.
  // CHECK:       dma_start %1[%c256], %5[%c0], %c256_0, %6[%c0] : memref<512xf32>, memref<256xf32, 1>, memref<1xi32>
  // CHECK-NEXT:  dma_wait %6[%c0], %c256_0 : memref<1xi32>
-  // CHECK: for %i0 = 0 to 256 {
+  // CHECK: affine.for %i0 = 0 to 256 {
      // CHECK-NEXT: %7 = load %3[%i0] : memref<256xf32, 1>
      // CHECK:      %8 = affine.apply [[MAP_PLUS_256]](%i0)
      // CHECK:      %9 = affine.apply [[MAP_MINUS_256]](%8)
@ -41,7 +41,7 @@ func @loop_nest_1d() {
      // CHECK:     %11 = load %2[%i0] : memref<256xf32, 1>
  // CHECK-NEXT: }
  // CHECK-NEXT: return
-  for %i = 0 to 256 {
+  affine.for %i = 0 to 256 {
    load %A[%i] : memref<256 x f32>
    %idx = affine.apply (d0) -> (d0 + 256)(%i)
    load %B[%idx] : memref<512 x f32>
@ -68,20 +68,20 @@ func @loop_nest_1d() {
 // INCOMING DMA for C.
 // CHECK-DAG:  dma_start %arg2[%c0, %c0], [[BUFC]][%c0, %c0], %c16384_0, [[TAGC]][%c0] : memref<512x32xf32>, memref<512x32xf32, 1>, memref<1xi32>
 // CHECK-DAG:  dma_wait [[TAGC]][%c0], %c16384_0 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 32 {
-// CHECK-NEXT:    for %i1 = 0 to 32 {
-// CHECK-NEXT:      for %i2 = 0 to 32 {
-// CHECK-NEXT:        for %i3 = 0 to 16 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 32 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 32 {
+// CHECK-NEXT:      affine.for %i2 = 0 to 32 {
+// CHECK-NEXT:        affine.for %i3 = 0 to 16 {
 // CHECK-NEXT:          %7 = affine.apply #map{{[0-9]+}}(%i1, %i3)
 // CHECK-NEXT:          %8 = load [[BUFB]][%7, %i0] : memref<512x32xf32, 1>
 // CHECK-NEXT:          "foo"(%8) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i4 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i4 = 0 to 16 {
 // CHECK-NEXT:          %9 = affine.apply #map{{[0-9]+}}(%i2, %i4)
 // CHECK-NEXT:          %10 = load [[BUFA]][%9, %i1] : memref<512x32xf32, 1>
 // CHECK-NEXT:          "bar"(%10) : (f32) -> ()
 // CHECK-NEXT:        }
-// CHECK-NEXT:        for %i5 = 0 to 16 {
+// CHECK-NEXT:        affine.for %i5 = 0 to 16 {
 // CHECK-NEXT:          %11 = "abc_compute"() : () -> f32
 // CHECK-NEXT:          %12 = affine.apply #map{{[0-9]+}}(%i2, %i5)
 // CHECK-NEXT:          %13 = load [[BUFC]][%12, %i0] : memref<512x32xf32, 1>
@ -102,20 +102,20 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
  // DMAs will be performed at this level (jT is the first loop without a stride).
  // A and B are read, while C is both read and written. A total of three new buffers
  // are allocated and existing load's/store's are replaced by accesses to those buffers.
-  for %jT = 0 to 32 {
-    for %kT = 0 to 32 {
-      for %iT = 0 to 32 {
-        for %kk = 0 to 16 { // k intratile
+  affine.for %jT = 0 to 32 {
+    affine.for %kT = 0 to 32 {
+      affine.for %iT = 0 to 32 {
+        affine.for %kk = 0 to 16 { // k intratile
          %k = affine.apply (d0, d1) -> (16*d0 + d1) (%kT, %kk)
          %v0 = load %B[%k, %jT] : memref<512 x 32 x f32>
          "foo"(%v0) : (f32) -> ()
        }
-        for %ii = 0 to 16 { // i intratile.
+        affine.for %ii = 0 to 16 { // i intratile.
          %i = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii)
          %v1 = load %A[%i, %kT] : memref<512 x 32 x f32>
          "bar"(%v1) : (f32) -> ()
        }
-        for %ii_ = 0 to 16 { // i intratile.
+        affine.for %ii_ = 0 to 16 { // i intratile.
          %v2 = "abc_compute"() : () -> f32
          %i_ = affine.apply (d0, d1) -> (16*d0 + d1)(%iT, %ii_)
          %v3 =  load %C[%i_, %jT] : memref<512 x 32 x f32>
@ -134,13 +134,13 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 //
 // CHECK-LABEL: func @loop_nest_modulo() {
 // CHECK:       %0 = alloc() : memref<256x8xf32>
-// CHECK-NEXT:    for %i0 = 0 to 32 step 4 {
+// CHECK-NEXT:    affine.for %i0 = 0 to 32 step 4 {
 // CHECK-NEXT:      %1 = affine.apply #map{{[0-9]+}}(%i0)
 // CHECK-NEXT:      %2 = alloc() : memref<1x2xf32, 1>
 // CHECK-NEXT:      %3 = alloc() : memref<1xi32>
 // CHECK-NEXT:      dma_start %0[%1, %c0], %2[%c0, %c0], %c2, %3[%c0] : memref<256x8xf32>, memref<1x2xf32, 1>, memref<1xi32>
 // CHECK-NEXT:      dma_wait %3[%c0], %c2 : memref<1xi32>
-// CHECK-NEXT:      for %i1 = 0 to 8 {
+// CHECK-NEXT:      affine.for %i1 = 0 to 8 {
 //                    ...
 //                    ...
 // CHECK:           }
@ -148,9 +148,9 @@ func @loop_nest_high_d(%A: memref<512 x 32 x f32>,
 // CHECK-NEXT:    return
 func @loop_nest_modulo() {
  %A = alloc() : memref<256 x 8 x f32>
-  for %i = 0 to 32 step 4 {
+  affine.for %i = 0 to 32 step 4 {
    // DMAs will be performed at this level (%j is the first unit stride loop)
-    for %j = 0 to 8 {
+    affine.for %j = 0 to 8 {
      %idx = affine.apply (d0) -> (d0 mod 2) (%j)
      // A buffer of size 32 x 2 will be allocated (original buffer was 256 x 8).
      %v = load %A[%i, %idx] : memref<256 x 8 x f32>
@ -164,17 +164,17 @@ func @loop_nest_modulo() {
 // CHECK-LABEL: func @loop_nest_tiled() -> memref<256x1024xf32> {
 func @loop_nest_tiled() -> memref<256x1024xf32> {
  %0 = alloc() : memref<256x1024xf32>
-  for %i0 = 0 to 256 step 32 {
-    for %i1 = 0 to 1024 step 32 {
+  affine.for %i0 = 0 to 256 step 32 {
+    affine.for %i1 = 0 to 1024 step 32 {
 // CHECK:      %3 = alloc() : memref<32x32xf32, 1>
 // CHECK-NEXT: %4 = alloc() : memref<1xi32>
 // Strided DMA here: 32 x 32 tile in a 256 x 1024 memref.
 // CHECK-NEXT: dma_start %0[%1, %2], %3[%c0, %c0], %c1024, %4[%c0], %c1024_0, %c32 : memref<256x1024xf32>, memref<32x32xf32, 1>, memref<1xi32>
 // CHECK-NEXT: dma_wait
-// CHECK-NEXT: for %i2 = #map
-// CHECK-NEXT:   for %i3 = #map
-      for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
-        for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
+// CHECK-NEXT: affine.for %i2 = #map
+// CHECK-NEXT:   affine.for %i3 = #map
+      affine.for %i2 = (d0) -> (d0)(%i0) to (d0) -> (d0 + 32)(%i0) {
+        affine.for %i3 = (d0) -> (d0)(%i1) to (d0) -> (d0 + 32)(%i1) {
          // CHECK-NEXT: %5 = affine.apply [[MAP_INDEX_DIFF_EVEN]](%i0, %i1, %i2, %i3)
          // CHECK-NEXT: %6 = affine.apply [[MAP_INDEX_DIFF_ODD]](%i0, %i1, %i2, %i3)
          // CHECK-NEXT: %7 = load %3[%5, %6] : memref<32x32xf32, 1>
@ -196,8 +196,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
  // No strided DMA needed here.
  // CHECK:      dma_start %arg0[%c1, %c0], %0[%c0, %c0], %c100, %1[%c0] : memref<100x100xf32>, memref<1x100xf32, 1>,
  // CHECK-NEXT: dma_wait %1[%c0], %c100 : memref<1xi32>
-  for %i = 0 to 100 {
-    for %j = 0 to ()[s0] -> (s0) ()[%N] {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to ()[s0] -> (s0) ()[%N] {
      // CHECK:      %2 = affine.apply [[MAP_D0_MINUS_ONE]](%c1_0, %i1)
      // CHECK:      %3 = affine.apply [[MAP_D1]](%c1_0, %i1)
      // CHECK-NEXT: %4 = load %0[%2, %3] : memref<1x100xf32, 1>
@ -210,8 +210,8 @@ func @dma_constant_dim_access(%A : memref<100x100xf32>) {
 // CHECK-LABEL: func @dma_with_symbolic_accesses
 func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
  %N = constant 9 : index
-  for %i = 0 to 100 {
-    for %j = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to 100 {
      %idy = affine.apply (d0, d1) [s0, s1] -> (d1 + s0 + s1)(%i, %j)[%M, %N]
      load %A[%i, %idy] : memref<100 x 100 x f32>
    }
@ -221,8 +221,8 @@ func @dma_with_symbolic_accesses(%A : memref<100x100xf32>, %M : index) {
 // CHECK-NEXT:  %2 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %0], %1[%c0, %c0], %c10000, %2[%c0]
 // CHECK-NEXT:  dma_wait %2[%c0], %c10000
-// CHECK-NEXT:  for %i0 = 0 to 100 {
-// CHECK-NEXT:    for %i1 = 0 to 100 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 100 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 100 {
 // CHECK-NEXT:      %3 = affine.apply [[MAP_SYM_SHIFT]](%i0, %i1)[%arg1, %c9]
 // CHECK-NEXT:      %4 = affine.apply [[MAP_3D_D1]](%arg1, %i0, %3)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_SUB_OFFSET]](%arg1, %i0, %3)
@ -241,8 +241,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 // CHECK-NEXT:  %1 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %arg0[%c0, %c0], %0[%c0, %c0], %c10000, %1[%c0] : memref<100x100xf32>, memref<100x100xf32, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %1[%c0], %c10000 : memref<1xi32>
-  for %i = 0 to 100 {
-    for %j = %M to %N {
+  affine.for %i = 0 to 100 {
+    affine.for %j = %M to %N {
      %idy = affine.apply (d1) [s0] -> (d1 + s0)(%j)[%K]
      load %A[%i, %idy] : memref<100 x 100 x f32>
    }
@ -256,8 +256,8 @@ func @dma_with_symbolic_loop_bounds(%A : memref<100x100xf32>, %M : index, %N: in
 func @dma_unknown_size(%arg0: memref<?x?xf32>) {
  %M = dim %arg0, 0 : memref<? x ? x f32>
  %N = dim %arg0, 0 : memref<? x ? x f32>
-  for %i = 0 to %M {
-    for %j = 0 to %N {
+  affine.for %i = 0 to %M {
+    affine.for %j = 0 to %N {
      // If this loop nest isn't tiled, the access requires a non-constant DMA
      // size -- not yet implemented.
      // CHECK: %2 = load %arg0[%i0, %i1] : memref<?x?xf32>
@ -272,9 +272,9 @@ func @dma_unknown_size(%arg0: memref<?x?xf32>) {

 // CHECK-LABEL: func @dma_memref_3d
 func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
-  for %i = 0 to 1024 {
-    for %j = 0 to 1024 {
-      for %k = 0 to 1024 {
+  affine.for %i = 0 to 1024 {
+    affine.for %j = 0 to 1024 {
+      affine.for %k = 0 to 1024 {
        %idx = affine.apply (d0) -> (d0 mod 128)(%i)
        %idy = affine.apply (d0) -> (d0 mod 128)(%j)
        %idz = affine.apply (d0) -> (d0 mod 128)(%k)
@ -308,8 +308,8 @@ func @dma_memref_3d(%arg0: memref<1024x1024x1024xf32>) {
 // CHECK-LABEL: func @multi_load_store_union() {
 func @multi_load_store_union() {
  %A = alloc() : memref<512 x 512 x f32>
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx = affine.apply (d0) -> (d0 + 64)(%i)
      %idy = affine.apply (d0) -> (d0 + 128)(%j)
      %ishift = affine.apply (d0) -> (d0 + 2)(%i)
@ -333,8 +333,8 @@ func @multi_load_store_union() {
 // CHECK-NEXT:  dma_start %0[%c2_1, %c2_2], %1[%c0, %c0], %c170372_3, %2[%c0], %c512_4, %c446_5 : memref<512x512xf32>, memref<382x446xf32, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %2[%c0], %c170372_3 : memref<1xi32>
 // CHECK-NEXT:  %3 = alloc() : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 256 {
-// CHECK-NEXT:    for %i1 = 0 to 256 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = affine.apply [[MAP_PLUS_64]](%i0)
 // CHECK-NEXT:      %5 = affine.apply [[MAP_PLUS_128]](%i1)
 // CHECK-NEXT:      %6 = affine.apply [[MAP_PLUS_2]](%i0)
@ -370,7 +370,7 @@ func @dma_loop_straightline_interspersed() {
  %c255 = constant 255 : index
  %A = alloc() : memref<256 x f32>
  %v = load %A[%c0] : memref<256 x f32>
-  for %i = 1 to 255 {
+  affine.for %i = 1 to 255 {
    load %A[%i] : memref<256 x f32>
  }
  %l = load %A[%c255] : memref<256 x f32>
@ -389,7 +389,7 @@ func @dma_loop_straightline_interspersed() {
 // CHECK-NEXT:  %5 = alloc() : memref<1xi32>
 // CHECK-NEXT:  dma_start %0[%c1_0], %4[%c0], %c254, %5[%c0] : memref<256xf32>, memref<254xf32, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait %5[%c0], %c254 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 1 to 255 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 255 {
 // CHECK-NEXT:    %6 = affine.apply [[MAP_MINUS_ONE]](%i0)
 // CHECK-NEXT:    %7 = load %4[%6] : memref<254xf32, 1>
 // CHECK-NEXT:  }
@ -410,10 +410,10 @@ func @dma_loop_straightline_interspersed() {
 func @dma_mixed_loop_blocks() {
  %c0 = constant 0 : index
  %A = alloc() : memref<256 x 256 x vector<8 x f32>>
-  for %i = 0 to 256 {
+  affine.for %i = 0 to 256 {
    %v = load %A[%c0, %c0] : memref<256 x 256 x vector<8 x f32>>
    "foo"(%v) : (vector<8 x f32>) -> ()
-    for %j = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %w = load %A[%i, %j] : memref<256 x 256 x vector<8 x f32>>
      "bar"(%w) : (vector<8 x f32>) -> ()
    }
@ -425,7 +425,7 @@ func @dma_mixed_loop_blocks() {
 // CHECK-DAG:   [[TAG:%[0-9]+]] = alloc() : memref<1xi32>
 // CHECK:       dma_start [[MEM]][%c0, %c0], [[BUF]][%c0, %c0], %c65536, [[TAG]][%c0] : memref<256x256xvector<8xf32>>, memref<256x256xvector<8xf32>, 1>, memref<1xi32>
 // CHECK-NEXT:  dma_wait [[TAG]][%c0], %c65536 : memref<1xi32>
-// CHECK-NEXT:  for %i0 = 0 to 256 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 256 {
 // CHECK-NEXT:    %3 = load [[BUF]][%c0_0, %c0_0] : memref<256x256xvector<8xf32>, 1>
-// CHECK:         for %i1 = 0 to 256 {
+// CHECK:         affine.for %i1 = 0 to 256 {
 // CHECK-NEXT:      %4 = load [[BUF]][%i0, %i1] : memref<256x256xvector<8xf32>, 1>
--- a/mlir/test/Transforms/loop-fusion.mlir
+++ b/mlir/test/Transforms/loop-fusion.mlir
--- a/mlir/test/Transforms/loop-tiling.mlir
+++ b/mlir/test/Transforms/loop-tiling.mlir
@ -8,12 +8,12 @@
 // CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)

 // CHECK-LABEL: func @loop_tiling()
-// CHECK-NEXT:   for %i0 = 0 to 256 step 32 {
-// CHECK-NEXT:     for %i1 = 0 to 512 step 32 {
-// CHECK-NEXT:       for %i2 = 0 to 1024 step 32 {
-// CHECK-NEXT:         for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
-// CHECK-NEXT:           for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
-// CHECK-NEXT:             for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
+// CHECK-NEXT:   affine.for %i0 = 0 to 256 step 32 {
+// CHECK-NEXT:     affine.for %i1 = 0 to 512 step 32 {
+// CHECK-NEXT:       affine.for %i2 = 0 to 1024 step 32 {
+// CHECK-NEXT:         affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
+// CHECK-NEXT:           affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
+// CHECK-NEXT:             affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
 // CHECK-NEXT:               "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
 // CHECK-NEXT:             }
 // CHECK-NEXT:           }
@ -21,32 +21,32 @@
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT:   for %i6 = 0 to 50 step 32 {
-// CHECK-NEXT:     for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
+// CHECK-NEXT:   affine.for %i6 = 0 to 50 step 32 {
+// CHECK-NEXT:     affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
 // CHECK-NEXT:       "bar"(%i7, %i7) : (index, index) -> ()
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
-// CHECK-NEXT: for %i8 = 0 to 21 step 32 {
-// CHECK-NEXT:    for %i9 = [[IDENTITY]](%i8) to 21 {
+// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
+// CHECK-NEXT:    affine.for %i9 = [[IDENTITY]](%i8) to 21 {
 // CHECK-NEXT:      "foobar"(%i9) : (index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 func @loop_tiling() {
-  for %i = 0 to 256 {
-    for %j = 0 to 512 {
-      for %k = 0 to 1024 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 512 {
+      affine.for %k = 0 to 1024 {
        "foo"(%i, %j, %k) : (index, index, index) -> ()
      }
    }
  }

-  for %x = 0 to 50 {
+  affine.for %x = 0 to 50 {
    "bar"(%x, %x) : (index, index) -> ()
  }

  // Intra-tile loop won't need a min expression.
-  for %y = 0 to 21 {
+  affine.for %y = 0 to 21 {
    "foobar"(%y) : (index) -> ()
  }

@ -58,12 +58,12 @@ func @loop_tiling() {
 // CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
  %M = dim %A, 0 : memref<? x i32>
-  for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
+  affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
      %out = affine.apply (d0) -> (d0) (%iTT)
  }
  return
-// CHECK:       for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
-// CHECK-NEXT:    for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
+// CHECK:       affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
+// CHECK-NEXT:    affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
 // CHECK-NEXT:      %1 = affine.apply [[IDENTITY]](%i1)
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
--- a/mlir/test/Transforms/lower-affine.mlir
+++ b/mlir/test/Transforms/lower-affine.mlir
@ -24,7 +24,7 @@ func @body(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @simple_loop() {
-  for %i = 1 to 42 {
+  affine.for %i = 1 to 42 {
    call @body(%i) : (index) -> ()
  }
  return
@ -65,9 +65,9 @@ func @post(index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @imperfectly_nested_loops() {
-  for %i = 0 to 42 {
+  affine.for %i = 0 to 42 {
    call @pre(%i) : (index) -> ()
-    for %j = 7 to 56 step 2 {
+    affine.for %j = 7 to 56 step 2 {
      call @body2(%i, %j) : (index, index) -> ()
    }
    call @post(%i) : (index) -> ()
@ -122,13 +122,13 @@ func @body3(index, index) -> ()
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @more_imperfectly_nested_loops() {
-  for %i = 0 to 42 {
+  affine.for %i = 0 to 42 {
    call @pre(%i) : (index) -> ()
-    for %j = 7 to 56 step 2 {
+    affine.for %j = 7 to 56 step 2 {
      call @body2(%i, %j) : (index, index) -> ()
    }
    call @mid(%i) : (index) -> ()
-    for %k = 18 to 37 step 3 {
+    affine.for %k = 18 to 37 step 3 {
      call @body3(%i, %k) : (index, index) -> ()
    }
    call @post(%i) : (index) -> ()
@ -161,8 +161,8 @@ func @more_imperfectly_nested_loops() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @affine_apply_loops_shorthand(%N : index) {
-  for %i = 0 to %N {
-    for %j = %i to 42 {
+  affine.for %i = 0 to %N {
+    affine.for %j = %i to 42 {
      call @body2(%i, %j) : (index, index) -> ()
    }
  }
@ -360,7 +360,7 @@ func @if_for() {
 // CHECK-NEXT: [[outerEndBB]]:
 // CHECK-NEXT:   br [[outerLoopInit:\^bb[0-9]+]]
  if #set1(%i) {
-    for %j = 0 to 42 {
+    affine.for %j = 0 to 42 {
      if #set2(%j) {
        call @body2(%i, %j) : (index, index) -> ()
      }
@ -397,9 +397,9 @@ func @if_for() {
 // CHECK-NEXT:   %c1_9 = constant 1 : index
 // CHECK-NEXT:   %16 = addi %9, %c1_9 : index
 // CHECK-NEXT:   br [[outerLoopCond]](%16 : index)
-  for %k = 0 to 42 {
+  affine.for %k = 0 to 42 {
    if #set2(%k) {
-      for %l = 0 to 42 {
+      affine.for %l = 0 to 42 {
        call @body3(%k, %l) : (index, index) -> ()
      }
    }
@ -446,8 +446,8 @@ func @if_for() {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @loop_min_max(%N : index) {
-  for %i = 0 to 42 {
-    for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
+  affine.for %i = 0 to 42 {
+    affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] {
      call @body2(%i, %j) : (index, index) -> ()
    }
  }
@ -486,7 +486,7 @@ func @loop_min_max(%N : index) {
 // CHECK-NEXT:   return
 // CHECK-NEXT: }
 func @min_reduction_tree(%v : index) {
-  for %i = 0 to min #map_7_values(%v)[] {
+  affine.for %i = 0 to min #map_7_values(%v)[] {
    call @body(%i) : (index) -> ()
  }
  return
--- a/mlir/test/Transforms/memref-bound-check.mlir
+++ b/mlir/test/Transforms/memref-bound-check.mlir
@ -11,8 +11,8 @@ func @test() {
  %A = alloc() : memref<9 x 9 x i32>
  %B = alloc() : memref<111 x i32>

-  for %i = -1 to 10 {
-    for %j = -1 to 10 {
+  affine.for %i = -1 to 10 {
+    affine.for %j = -1 to 10 {
      %idx0 = affine.apply (d0, d1) -> (d0)(%i, %j)
      %idx1 = affine.apply (d0, d1) -> (d1)(%i, %j)
      // Out of bound access.
@ -27,7 +27,7 @@ func @test() {
    }
  }

-  for %k = 0 to 10 {
+  affine.for %k = 0 to 10 {
      // In bound.
      %u = load %B[%zero] : memref<111 x i32>
      // Out of bounds.
@ -43,8 +43,8 @@ func @test_mod_floordiv_ceildiv() {
  %zero = constant 0 : index
  %A = alloc() : memref<128 x 64 x 64 x i32>

-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
      %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
      %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -69,8 +69,8 @@ func @test_no_out_of_bounds() {
  %C = alloc() : memref<257 x i32>
  %B = alloc() : memref<1 x i32>

-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      // All of these accesses are in bound; check that no errors are emitted.
      // CHECK: %3 = affine.apply {{#map.*}}(%i0, %i1)
      // CHECK-NEXT: %4 = load %0[%3, %c0] : memref<257x256xi32>
@ -93,8 +93,8 @@ func @mod_div() {
  %zero = constant 0 : index
  %A = alloc() : memref<128 x 64 x 64 x i32>

-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx0 = affine.apply (d0, d1, d2) -> (d0 mod 128 + 1)(%i, %j, %j)
      %idx1 = affine.apply (d0, d1, d2) -> (d1 floordiv 4 + 1)(%i, %j, %j)
      %idx2 = affine.apply (d0, d1, d2) -> (d2 ceildiv 4)(%i, %j, %j)
@ -115,8 +115,8 @@ func @mod_div() {
 // CHECK-LABEL: func @mod_floordiv_nested() {
 func @mod_floordiv_nested() {
  %A = alloc() : memref<256 x 256 x i32>
-  for %i = 0 to 256 {
-    for %j = 0 to 256 {
+  affine.for %i = 0 to 256 {
+    affine.for %j = 0 to 256 {
      %idx0 = affine.apply (d0, d1) -> ((d0 mod 1024) floordiv 4)(%i, %j)
      %idx1 = affine.apply (d0, d1) -> ((((d1 mod 128) mod 32) ceildiv 4) * 32)(%i, %j)
      load %A[%idx0, %idx1] : memref<256 x 256 x i32> // expected-error {{'load' op memref out of upper bound access along dimension #2}}
@ -128,7 +128,7 @@ func @mod_floordiv_nested() {
 // CHECK-LABEL: func @test_semi_affine_bailout
 func @test_semi_affine_bailout(%N : index) {
  %B = alloc() : memref<10 x i32>
-  for %i = 0 to 10 {
+  affine.for %i = 0 to 10 {
    %idx = affine.apply (d0)[s0] -> (d0 * s0)(%i)[%N]
    %y = load %B[%idx] : memref<10 x i32>
  }
@ -138,7 +138,7 @@ func @test_semi_affine_bailout(%N : index) {
 // CHECK-LABEL: func @multi_mod_floordiv
 func @multi_mod_floordiv() {
  %A = alloc() : memref<2x2xi32>
-  for %ii = 0 to 64 {
+  affine.for %ii = 0 to 64 {
      %idx0 = affine.apply (d0) -> ((d0 mod 147456) floordiv 1152) (%ii)
      %idx1 = affine.apply (d0) -> (((d0 mod 147456) mod 1152) floordiv 384) (%ii)
      %v = load %A[%idx0, %idx1] : memref<2x2xi32>
@ -153,8 +153,8 @@ func @delinearize_mod_floordiv() {
  %out = alloc() : memref<64x9xi32>

  // Reshape '%in' into '%out'.
-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
      %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
      %a10 = affine.apply (d0) ->
        (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
@ -189,7 +189,7 @@ func @out_of_bounds() {
  %in = alloc() : memref<1xi32>
  %c9 = constant 9 : i32

-  for %i0 = 10 to 11 {
+  affine.for %i0 = 10 to 11 {
    %idy = affine.apply (d0) ->  (100 * d0 floordiv 1000) (%i0)
    store %c9, %in[%idy] : memref<1xi32> // expected-error {{'store' op memref out of upper bound access along dimension #1}}
  }
--- a/mlir/test/Transforms/memref-dataflow-opt.mlir
+++ b/mlir/test/Transforms/memref-dataflow-opt.mlir
@ -10,14 +10,14 @@
 func @simple_store_load() {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
    %v0 = load %m[%i0] : memref<10xf32>
    %v1 = addf %v0, %v0 : f32
  }
  return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@ -30,7 +30,7 @@ func @multi_store_load() {
  %cf8 = constant 8.0 : f32
  %cf9 = constant 9.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
    %v0 = load %m[%i0] : memref<10xf32>
    %v1 = addf %v0, %v0 : f32
@ -45,7 +45,7 @@ func @multi_store_load() {
 // CHECK-NEXT:  %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %cst_0 = constant 8.000000e+00 : f32
 // CHECK-NEXT:  %cst_1 = constant 9.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    %1 = mulf %cst_1, %cst_1 : f32
 // CHECK-NEXT:  }
@ -59,8 +59,8 @@ func @multi_store_load() {
 func @store_load_affine_apply() -> memref<10x10xf32> {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10x10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %t0 = affine.apply (d0, d1) -> (d1 + 1)(%i0, %i1)
      %t1 = affine.apply (d0, d1) -> (d0)(%i0, %i1)
      %idx0 = affine.apply (d0, d1) -> (d1) (%t0, %t1)
@ -75,8 +75,8 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
  return %m : memref<10x10xf32>
 // CHECK:       %cst = constant 7.000000e+00 : f32
 // CHECK-NEXT:  %0 = alloc() : memref<10x10xf32>
-// CHECK-NEXT:  for %i0 = 0 to 10 {
-// CHECK-NEXT:    for %i1 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 10 {
 // CHECK-NEXT:      %1 = affine.apply [[MAP0]](%i0, %i1)
 // CHECK-NEXT:      %2 = affine.apply [[MAP1]](%i0, %i1)
 // CHECK-NEXT:      %3 = affine.apply [[MAP2]](%1, %2)
@ -92,17 +92,17 @@ func @store_load_affine_apply() -> memref<10x10xf32> {
 func @store_load_nested(%N : index) {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
    }
  }
  return
 // CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  for %i0 = 0 to 10 {
-// CHECK-NEXT:    for %i1 = 0 to %arg0 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %0 = addf %cst, %cst : f32
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@ -117,12 +117,12 @@ func @multi_store_load_nested_no_fwd(%N : index) {
  %cf7 = constant 7.0 : f32
  %cf8 = constant 8.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      store %cf8, %m[%i1] : memref<10xf32>
    }
-    for %i2 = 0 to %N {
+    affine.for %i2 = 0 to %N {
      // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
@ -138,9 +138,9 @@ func @store_load_store_nested_no_fwd(%N : index) {
  %cf7 = constant 7.0 : f32
  %cf9 = constant 9.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      // CHECK: %{{[0-9]+}} = load %0[%i0] : memref<10xf32>
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
@ -159,16 +159,16 @@ func @multi_store_load_nested_fwd(%N : index) {
  %cf9 = constant 9.0 : f32
  %cf10 = constant 10.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      store %cf8, %m[%i1] : memref<10xf32>
    }
-    for %i2 = 0 to %N {
+    affine.for %i2 = 0 to %N {
      store %cf9, %m[%i2] : memref<10xf32>
    }
    store %cf10, %m[%i0] : memref<10xf32>
-    for %i3 = 0 to %N {
+    affine.for %i3 = 0 to %N {
      // CHECK-NOT: %{{[0-9]+}} = load
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
@ -182,10 +182,10 @@ func @multi_store_load_nested_fwd(%N : index) {
 func @store_load_no_fwd() {
  %cf7 = constant 7.0 : f32
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
        // CHECK: load %{{[0-9]+}}
        %v0 = load %m[%i2] : memref<10xf32>
        %v1 = addf %v0, %v0 : f32
@ -202,9 +202,9 @@ func @store_load_fwd() {
  %c0 = constant 0 : index
  %m = alloc() : memref<10xf32>
  store %cf7, %m[%c0] : memref<10xf32>
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
-      for %i2 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
+      affine.for %i2 = 0 to 10 {
        // CHECK-NOT: load %{{[0-9]}}+
        %v0 = load %m[%c0] : memref<10xf32>
        %v1 = addf %v0, %v0 : f32
@ -223,9 +223,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %m = alloc() : memref<10xf32>
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cf7, %m[%i0] : memref<10xf32>
-    for %i1 = 0 to %N {
+    affine.for %i1 = 0 to %N {
      %v0 = load %m[%i0] : memref<10xf32>
      %v1 = addf %v0, %v0 : f32
      %idx = affine.apply (d0) -> (d0 + 1) (%i0)
@ -236,9 +236,9 @@ func @store_load_store_nested_fwd(%N : index) -> f32 {
  %v3 = load %m[%c1] : memref<10xf32>
  return %v3 : f32
 // CHECK:       %0 = alloc() : memref<10xf32>
-// CHECK-NEXT:  for %i0 = 0 to 10 {
+// CHECK-NEXT:  affine.for %i0 = 0 to 10 {
 // CHECK-NEXT:    store %cst, %0[%i0] : memref<10xf32>
-// CHECK-NEXT:    for %i1 = 0 to %arg0 {
+// CHECK-NEXT:    affine.for %i1 = 0 to %arg0 {
 // CHECK-NEXT:      %1 = addf %cst, %cst : f32
 // CHECK-NEXT:      %2 = affine.apply [[MAP4]](%i0)
 // CHECK-NEXT:      store %cst_0, %0[%2] : memref<10xf32>
--- a/mlir/test/Transforms/memref-dependence-check.mlir
+++ b/mlir/test/Transforms/memref-dependence-check.mlir
@ -13,14 +13,14 @@ func @store_may_execute_before_load() {
  // ancestor IfOp of the store, dominates the ancestor ForSmt of the load,
  // and thus the store "may" conditionally execute before the load.
  if #set0(%c0) {
-    for %i0 = 0 to 10 {
+    affine.for %i0 = 0 to 10 {
      store %cf7, %m[%i0] : memref<10xf32>
      // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
      // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
      // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
    }
  }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
    %v0 = load %m[%i1] : memref<10xf32>
    // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
    // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -37,13 +37,13 @@ func @dependent_loops() {
  %cst = constant 7.000000e+00 : f32
  // There is a dependence from 0 to 1 at depth 1 (common surrounding loops 0)
  // because the first loop with the store dominates the second loop.
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    store %cst, %0[%i0] : memref<10xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
    // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
    // expected-note@-3 {{dependence from 0 to 1 at depth 1 = true}}
  }
-  for %i1 = 0 to 10 {
+  affine.for %i1 = 0 to 10 {
    %1 = load %0[%i1] : memref<10xf32>
    // expected-note@-1 {{dependence from 1 to 1 at depth 1 = false}}
    // expected-note@-2 {{dependence from 1 to 1 at depth 2 = false}}
@ -231,7 +231,7 @@ func @store_range_load_after_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c10 = constant 10 : index
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -254,7 +254,7 @@ func @store_load_func_symbol(%arg0: index, %arg1: index) {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c10 = constant 10 : index
-  for %i0 = 0 to %arg1 {
+  affine.for %i0 = 0 to %arg1 {
    %a0 = affine.apply (d0) -> (d0) (%arg0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = [1, +inf]}}
@ -277,7 +277,7 @@ func @store_range_load_last_in_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c10 = constant 10 : index
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    // For dependence from 0 to 1, we do not have a loop carried dependence
    // because only the final write in the loop accesses the same element as the
@ -305,7 +305,7 @@ func @store_range_load_before_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c0 = constant 0 : index
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -328,7 +328,7 @@ func @store_range_load_first_in_range() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
  %c0 = constant 0 : index
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    // Dependence from 0 to 1 at depth 1 is a range because all loads at
    // constant index zero are reads after first store at index zero during
@ -353,7 +353,7 @@ func @store_range_load_first_in_range() {
 func @store_plus_3() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 1 to 11 {
+  affine.for %i0 = 1 to 11 {
    %a0 = affine.apply (d0) -> (d0 + 3) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -375,7 +375,7 @@ func @store_plus_3() {
 func @load_minus_2() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 2 to 11 {
+  affine.for %i0 = 2 to 11 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    store %c7, %m[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -397,8 +397,8 @@ func @load_minus_2() {
 func @perfectly_nested_loops_loop_independent() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 11 {
-    for %i1 = 0 to 11 {
+  affine.for %i0 = 0 to 11 {
+    affine.for %i1 = 0 to 11 {
      // Dependence from access 0 to 1 is loop independent at depth = 3.
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -428,8 +428,8 @@ func @perfectly_nested_loops_loop_independent() {
 func @perfectly_nested_loops_loop_carried_at_depth1() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 9 {
-    for %i1 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
+    affine.for %i1 = 0 to 9 {
      // Dependence from access 0 to 1 is loop carried at depth 1.
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -459,8 +459,8 @@ func @perfectly_nested_loops_loop_carried_at_depth1() {
 func @perfectly_nested_loops_loop_carried_at_depth2() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      // Dependence from access 0 to 1 is loop carried at depth 2.
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
@ -491,8 +491,8 @@ func @one_common_loop() {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
  // There is a loop-independent dependence from access 0 to 1 at depth 2.
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %a00 = affine.apply (d0, d1) -> (d0) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1) (%i0, %i1)
      store %c7, %m[%a00, %a01] : memref<10x10xf32>
@ -502,7 +502,7 @@ func @one_common_loop() {
      // expected-note@-4 {{dependence from 0 to 1 at depth 1 = false}}
      // expected-note@-5 {{dependence from 0 to 1 at depth 2 = true}}
    }
-    for %i2 = 0 to 9 {
+    affine.for %i2 = 0 to 9 {
      %a10 = affine.apply (d0, d1) -> (d0) (%i0, %i2)
      %a11 = affine.apply (d0, d1) -> (d1) (%i0, %i2)
      %v0 = load %m[%a10, %a11] : memref<10x10xf32>
@ -525,7 +525,7 @@ func @dependence_cycle() {
  // Dependences:
  // *) loop-independent dependence from access 1 to 2 at depth 2.
  // *) loop-carried dependence from access 3 to 0 at depth 1.
-  for %i0 = 0 to 9 {
+  affine.for %i0 = 0 to 9 {
    %a0 = affine.apply (d0) -> (d0) (%i0)
    %v0 = load %m.a[%a0] : memref<100xf32>
    // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -575,8 +575,8 @@ func @dependence_cycle() {
 func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
  %m = alloc() : memref<10x10xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to %arg0 {
-    for %i1 = 0 to %arg1 {
+  affine.for %i0 = 0 to %arg0 {
+    affine.for %i1 = 0 to %arg1 {
      %a00 = affine.apply (d0, d1) -> (d0 - 1) (%i0, %i1)
      %a01 = affine.apply (d0, d1) -> (d1 + 1) (%i0, %i1)
      %v0 = load %m[%a00, %a01] : memref<10x10xf32>
@ -605,8 +605,8 @@ func @negative_and_positive_direction_vectors(%arg0: index, %arg1: index) {
 func @war_raw_waw_deps() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
-    for %i1 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
+    affine.for %i1 = 0 to 10 {
      %a0 = affine.apply (d0) -> (d0 + 1) (%i1)
      %v0 = load %m[%a0] : memref<100xf32>
      // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
@ -633,7 +633,7 @@ func @war_raw_waw_deps() {
 func @mod_deps() {
  %m = alloc() : memref<100xf32>
  %c7 = constant 7.0 : f32
-  for %i0 = 0 to 10 {
+  affine.for %i0 = 0 to 10 {
    %a0 = affine.apply (d0) -> (d0 mod 2) (%i0)
    // Results are conservative here since we currently don't have a way to
    // represent strided sets in FlatAffineConstraints.
@ -658,8 +658,8 @@ func @loop_nest_depth() {
  %0 = alloc() : memref<100x100xf32>
  %c7 = constant 7.0 : f32

-  for %i0 = 0 to 128 {
-    for %i1 = 0 to 8 {
+  affine.for %i0 = 0 to 128 {
+    affine.for %i1 = 0 to 8 {
      store %c7, %0[%i0, %i1] : memref<100x100xf32>
      // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
      // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -667,10 +667,10 @@ func @loop_nest_depth() {
      // expected-note@-4 {{dependence from 0 to 1 at depth 1 = true}}
    }
  }
-  for %i2 = 0 to 8 {
-    for %i3 = 0 to 8 {
-      for %i4 = 0 to 8 {
-        for %i5 = 0 to 16 {
+  affine.for %i2 = 0 to 8 {
+    affine.for %i3 = 0 to 8 {
+      affine.for %i4 = 0 to 8 {
+        affine.for %i5 = 0 to 16 {
          %8 = affine.apply (d0, d1) -> (d0 * 16 + d1)(%i4, %i5)
          %9 = load %0[%8, %i3] : memref<100x100xf32>
          // expected-note@-1 {{dependence from 1 to 0 at depth 1 = false}}
@ -693,9 +693,9 @@ func @loop_nest_depth() {
 func @mod_div_3d() {
  %M = alloc() : memref<2x2x2xi32>
  %c0 = constant 0 : i32
-  for %i0 = 0 to 8 {
-    for %i1 = 0 to 8 {
-      for %i2 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
+    affine.for %i1 = 0 to 8 {
+      affine.for %i2 = 0 to 8 {
        %idx0 = affine.apply (d0, d1, d2) -> (d0 floordiv 4) (%i0, %i1, %i2)
        %idx1 = affine.apply (d0, d1, d2) -> (d1 mod 2) (%i0, %i1, %i2)
        %idx2 = affine.apply (d0, d1, d2) -> (d2 floordiv 4) (%i0, %i1, %i2)
@ -719,12 +719,12 @@ func @delinearize_mod_floordiv() {
  %in = alloc() : memref<2x2x3x3x16x1xi32>
  %out = alloc() : memref<64x9xi32>

-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 2 {
-      for %i2 = 0 to 3 {
-        for %i3 = 0 to 3 {
-          for %i4 = 0 to 16 {
-            for %i5 = 0 to 1 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 2 {
+      affine.for %i2 = 0 to 3 {
+        affine.for %i3 = 0 to 3 {
+          affine.for %i4 = 0 to 16 {
+            affine.for %i5 = 0 to 1 {
              store %val, %in[%i0, %i1, %i2, %i3, %i4, %i5] : memref<2x2x3x3x16x1xi32>
 // expected-note@-1 {{dependence from 0 to 0 at depth 1 = false}}
 // expected-note@-2 {{dependence from 0 to 0 at depth 2 = false}}
@ -742,8 +742,8 @@ func @delinearize_mod_floordiv() {
    }
  }

-  for %ii = 0 to 64 {
-    for %jj = 0 to 9 {
+  affine.for %ii = 0 to 64 {
+    affine.for %jj = 0 to 9 {
      %a0 = affine.apply (d0, d1) -> (d0 * (9 * 1024) + d1 * 128) (%ii, %jj)
      %a10 = affine.apply (d0) ->
        (d0 floordiv (2 * 3 * 3 * 128 * 128)) (%a0)
--- a/mlir/test/Transforms/pipeline-data-transfer.mlir
+++ b/mlir/test/Transforms/pipeline-data-transfer.mlir
@ -16,13 +16,13 @@ func @loop_nest_dma() {
  %zero = constant 0 : index
  %num_elts = constant 128 : index

-  for %i = 0 to 8 {
+  affine.for %i = 0 to 8 {
    dma_start %A[%i], %Ah[%i], %num_elts, %tag[%zero] : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
    dma_wait %tag[%zero], %num_elts : memref<1 x f32>
    %v = load %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
    %r = "compute"(%v) : (f32) -> (f32)
    store %r, %Ah[%i] : memref<32 x f32, (d0) -> (d0), 1>
-    for %j = 0 to 128 {
+    affine.for %j = 0 to 128 {
      "do_more_compute"(%i, %j) : (index, index) -> ()
    }
  }
@ -34,7 +34,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %3 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  %4 = affine.apply [[MOD_2]](%c0)
 // CHECK-NEXT:  dma_start %0[%c0], %1[%3, %c0], %c128, %2[%4, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-// CHECK-NEXT:  for %i0 = 1 to 8 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 8 {
 // CHECK-NEXT:    %5 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    %6 = affine.apply [[MOD_2]](%i0)
 // CHECK-NEXT:    dma_start %0[%i0], %1[%5, %i0], %c128, %2[%6, %c0_0] : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
@ -45,7 +45,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:    %10 = load %1[%9, %7] : memref<2x32xf32, 1>
 // CHECK-NEXT:    %11 = "compute"(%10) : (f32) -> f32
 // CHECK-NEXT:    store %11, %1[%9, %7] : memref<2x32xf32, 1>
-// CHECK-NEXT:    for %i1 = 0 to 128 {
+// CHECK-NEXT:    affine.for %i1 = 0 to 128 {
 // CHECK-NEXT:      "do_more_compute"(%7, %i1) : (index, index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
@ -56,7 +56,7 @@ func @loop_nest_dma() {
 // CHECK-NEXT:  %15 = load %1[%14, %12] : memref<2x32xf32, 1>
 // CHECK-NEXT:  %16 = "compute"(%15) : (f32) -> f32
 // CHECK-NEXT:  store %16, %1[%14, %12] : memref<2x32xf32, 1>
-// CHECK-NEXT:  for %i2 = 0 to 128 {
+// CHECK-NEXT:  affine.for %i2 = 0 to 128 {
 // CHECK-NEXT:    "do_more_compute"(%12, %i2) : (index, index) -> ()
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
@ -68,7 +68,7 @@ func @loop_step(%arg0: memref<512xf32>,
                  %arg1: memref<512xf32>) {
  %c0 = constant 0 : index
  %c4 = constant 4 : index
-  for %i0 = 0 to 512 step 4 {
+  affine.for %i0 = 0 to 512 step 4 {
    %1 = alloc() : memref<4xf32, 1>
    %2 = alloc() : memref<1xi32>
    dma_start %arg0[%i0], %1[%c0], %c4, %2[%c0]
@ -82,7 +82,7 @@ func @loop_step(%arg0: memref<512xf32>,
 // CHECK:        %2 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK:        %3 = affine.apply [[FLOOR_MOD_2]](%c0)
 // CHECK-NEXT:   dma_start %arg0[%c0], %0[%2, %c0_0], %c4, [[TAG]][%3, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
-// CHECK-NEXT:   for %i0 = 4 to 512 step 4 {
+// CHECK-NEXT:   affine.for %i0 = 4 to 512 step 4 {
 // CHECK-NEXT:     %4 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     %5 = affine.apply [[FLOOR_MOD_2]](%i0)
 // CHECK-NEXT:     dma_start %arg0[%i0], %0[%4, %c0_0], %c4, [[TAG]][%5, %c0_0] : memref<512xf32>, memref<2x4xf32, 1>, memref<2x1xi32>
@ -114,8 +114,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
  // Prologue for DMA overlap on arg2.
  // CHECK:[[TAG_ARG2:%[0-9]+]] = alloc() : memref<2x2xi32>
  // CHECK: dma_start %arg2[
-  // CHECK: for %i0 = 1 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK: affine.for %i0 = 1 to 8 {
+  affine.for %i0 = 0 to 8 {
    %6 = affine.apply #map2(%i0)
    dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
    dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -127,8 +127,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
    // CHECK: [[TAG_ARG1:%[0-9]+]] = alloc() : memref<2x2xi32>
    // CHECK: dma_start %arg0[
    // CHECK: dma_start %arg1[
-    // CHECK-NEXT for %i1 = 1 to 8 {
-    for %i1 = 0 to 8 {
+    // CHECK-NEXT affine.for %i1 = 1 to 8 {
+    affine.for %i1 = 0 to 8 {
      %7 = affine.apply #map1(%i0, %i1)
      %8 = affine.apply #map2(%i1)
      dma_start %arg0[%7, %c0], %0[%c0, %c0], %num_elts, %3[%c0] : memref<512x32xvector<8xf32>, #map0>, memref<64x4xvector<8xf32>, #map0, 2>, memref<2xi32>
@ -140,8 +140,8 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
      // CHECK: dma_start %arg1[
      // CHECK: dma_wait [[TAG_ARG0]]
      // CHECK: dma_wait [[TAG_ARG1]]
-      // CHECK-NEXT: for %i2 = 0 to 4 {
-      for %i2 = 0 to 4 {
+      // CHECK-NEXT: affine.for %i2 = 0 to 4 {
+      affine.for %i2 = 0 to 4 {
        "foo"() : () -> ()
      }
    }
@ -155,16 +155,16 @@ func @loop_dma_nested(%arg0: memref<512x32xvector<8xf32>, #map0>, %arg1: memref<
  // CHECK: [[TAG_ARG1_NESTED:%[0-9]+]] = alloc() : memref<2x2xi32>
  // CHECK:  dma_start %arg0[
  // CHECK:  dma_start %arg1[
-  // CHECK:  for %i4 = 1 to 8 {
+  // CHECK:  affine.for %i4 = 1 to 8 {
  // CHECK:    dma_start %arg0[
  // CHECK:    dma_start %arg1[
  // CHECK:    dma_wait [[TAG_ARG0_NESTED]]
  // CHECK:    dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:    for %i5 = 0 to 4 {
+  // CHECK:    affine.for %i5 = 0 to 4 {
  // CHECK:      "foo"() : () -> ()
  // CHECK:  dma_wait [[TAG_ARG0_NESTED]]
  // CHECK:  dma_wait [[TAG_ARG1_NESTED]]
-  // CHECK:  for %i6 = 0 to 4 {
+  // CHECK:  affine.for %i6 = 0 to 4 {
  }
  return
 // CHECK: }
@ -185,8 +185,8 @@ func @loop_dma_dependent(%arg2: memref<512x32xvector<8xf32>>) {
  // The two DMAs below are dependent (incoming and outgoing on the same
  // memref) in the same iteration; so no pipelining here.
  // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 8 {
-  for %i0 = 0 to 8 {
+  // CHECK: affine.for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
    %6 = affine.apply #map2(%i0)
    dma_start %arg2[%6, %c0], %2[%c0, %c0], %num_elts, %5[%c0] : memref<512x32xvector<8xf32>>, memref<64x4xvector<8xf32>, 2>, memref<2xi32>
    dma_wait %5[%c0], %num_elts : memref<2xi32>
@ -206,8 +206,8 @@ func @escaping_use(%arg0: memref<512 x 32 x f32>) {
  %tag = alloc() : memref<1 x i32>

  // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 16 {
-  for %kTT = 0 to 16 {
+  // CHECK: affine.for %i0 = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
    dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
      memref<512 x 32 x f32>,
      memref<32 x 32 x f32, 2>, memref<1 x i32>
@ -230,14 +230,14 @@ func @live_out_use(%arg0: memref<512 x 32 x f32>) -> f32 {
  %tag = alloc() : memref<1 x i32>

  // CHECK-NOT: dma_start
-  // CHECK: for %i0 = 0 to 16 {
-  for %kTT = 0 to 16 {
+  // CHECK: affine.for %i0 = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
    dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
      memref<512 x 32 x f32>,
      memref<32 x 32 x f32, 2>, memref<1 x i32>
    dma_wait %tag[%zero], %num_elt : memref<1 x i32>
  }
-  // Use live out of 'for' inst; no DMA pipelining will be done.
+  // Use live out of 'affine.for' inst; no DMA pipelining will be done.
  %v = load %Av[%zero, %zero] : memref<32 x 32 x f32, 2>
  return %v : f32
 // CHECK:      %{{[0-9]+}} = load %{{[0-9]+}}[%c0, %c0] : memref<32x32xf32, 2>
@ -261,14 +261,14 @@ func @dynamic_shape_dma_buffer(%arg0: memref<512 x 32 x f32>) {
 // CHECK:       %5 = affine.apply [[MOD_2]](%c0)
 // CHECK:       %6 = affine.apply [[MOD_2]](%c0)
 // CHECK:       dma_start %arg0[%c0_0, %c0_0], %3[%5, %c0_0, %c0_0], %c512, %4[%6, %c0_0]
-  for %kTT = 0 to 16 {
+  affine.for %kTT = 0 to 16 {
    dma_start %arg0[%zero, %zero], %Av[%zero, %zero], %num_elt, %tag[%zero] :
      memref<512 x 32 x f32>,
      memref<? x ? x f32, 2>, memref<1 x i32>
    dma_wait %tag[%zero], %num_elt : memref<1 x i32>
  }
  return
-// CHECK-NEXT:  for %i0 = 1 to 16 {
+// CHECK-NEXT:  affine.for %i0 = 1 to 16 {
 // CHECK:         %7 = affine.apply [[MOD_2]](%i0)
 // CHECK:         %8 = affine.apply [[MOD_2]](%i0)
 // CHECK:         dma_start %arg0[%c0_0, %c0_0], %3[%7, %c0_0, %c0_0], %c512, %4[%8, %c0_0]
--- a/mlir/test/Transforms/simplify-affine-structures.mlir
+++ b/mlir/test/Transforms/simplify-affine-structures.mlir
@ -73,8 +73,8 @@

 // CHECK-LABEL: func @test_gaussian_elimination_empty_set0() {
 func @test_gaussian_elimination_empty_set0() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
      if (d0, d1) : (2 == 0)(%i0, %i1) {
      }
@ -85,8 +85,8 @@ func @test_gaussian_elimination_empty_set0() {

 // CHECK-LABEL: func @test_gaussian_elimination_empty_set1() {
 func @test_gaussian_elimination_empty_set1() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: [[SET_EMPTY_2D]](%i0, %i1)
      if (d0, d1) : (1 >= 0, -1 >= 0) (%i0, %i1) {
      }
@ -97,8 +97,8 @@ func @test_gaussian_elimination_empty_set1() {

 // CHECK-LABEL: func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_non_empty_set2() {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set1(%i0, %i1)
      if #set2(%i0, %i1) {
      }
@ -111,8 +111,8 @@ func @test_gaussian_elimination_non_empty_set2() {
 func @test_gaussian_elimination_empty_set3() {
  %c7 = constant 7 : index
  %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set2(%i0, %i1)[%c7, %c11]
      if #set3(%i0, %i1)[%c7, %c11] {
      }
@ -125,8 +125,8 @@ func @test_gaussian_elimination_empty_set3() {
 func @test_gaussian_elimination_non_empty_set4() {
  %c7 = constant 7 : index
  %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set3(%i0, %i1)[%c7, %c11]
      if #set4(%i0, %i1)[%c7, %c11] {
      }
@ -139,8 +139,8 @@ func @test_gaussian_elimination_non_empty_set4() {
 func @test_gaussian_elimination_empty_set5() {
  %c7 = constant 7 : index
  %c11 = constant 11 : index
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      // CHECK: #set2(%i0, %i1)[%c7, %c11]
      if #set5(%i0, %i1)[%c7, %c11] {
      }
@ -151,8 +151,8 @@ func @test_gaussian_elimination_empty_set5() {

 // CHECK-LABEL: func @test_fuzz_explosion
 func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) {
-  for %i0 = 1 to 10 {
-    for %i1 = 1 to 100 {
+  affine.for %i0 = 1 to 10 {
+    affine.for %i1 = 1 to 100 {
      if #set_fuzz_virus(%i0, %i1, %arg0, %arg1, %arg2, %arg3) {
      }
    }
@ -163,8 +163,8 @@ func @test_fuzz_explosion(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : i

 // CHECK-LABEL: func @test_empty_set(%arg0: index) {
 func @test_empty_set(%N : index) {
-  for %i = 0 to 10 {
-    for %j = 0 to 10 {
+  affine.for %i = 0 to 10 {
+    affine.for %j = 0 to 10 {
      // CHECK: if [[SET_EMPTY_2D]](%i0, %i1)
      if (d0, d1) : (d0 - d1 >= 0, d1 - d0 - 1 >= 0)(%i, %j) {
        "foo"() : () -> ()
@ -198,8 +198,8 @@ func @test_empty_set(%N : index) {
    }
  }
  // The tests below test GCDTightenInequalities().
-  for %k = 0 to 10 {
-    for %l = 0 to 10 {
+  affine.for %k = 0 to 10 {
+    affine.for %l = 0 to 10 {
      // Empty because no multiple of 8 lies between 4 and 7.
      // CHECK: if [[SET_EMPTY_1D]](%i2)
      if (d0) : (8*d0 - 4 >= 0, -8*d0 + 7 >= 0)(%k) {
@ -226,7 +226,7 @@ func @test_empty_set(%N : index) {
    }
  }

-  for %m = 0 to 10 {
+  affine.for %m = 0 to 10 {
    // CHECK: if [[SET_EMPTY_1D]](%i{{[0-9]+}})
    if (d0) : (d0 mod 2 - 3 == 0) (%m) {
      "foo"() : () -> ()
--- a/mlir/test/Transforms/strip-debuginfo.mlir
+++ b/mlir/test/Transforms/strip-debuginfo.mlir
@ -10,7 +10,7 @@ func @inline_notation() -> i32 loc("mysource.cc":10:8) {
  %1 = "foo"() : () -> i32 loc("foo")

  // CHECK: } loc(unknown)
-  for %i0 = 0 to 8 {
+  affine.for %i0 = 0 to 8 {
  } loc(fused["foo", "mysource.cc":10:8])

  // CHECK: } loc(unknown)
--- a/mlir/test/Transforms/unroll-jam.mlir
+++ b/mlir/test/Transforms/unroll-jam.mlir
@ -7,13 +7,13 @@
 // CHECK-LABEL: func @unroll_jam_imperfect_nest() {
 func @unroll_jam_imperfect_nest() {
  // CHECK: %c100 = constant 100 : index
-  // CHECK-NEXT: for %i0 = 0 to 99 step 2 {
-  for %i = 0 to 101 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 99 step 2 {
+  affine.for %i = 0 to 101 {
    // CHECK: %0 = "addi32"(%i0, %i0) : (index, index) -> i32
    // CHECK-NEXT: %1 = affine.apply [[MAP_PLUS_1]](%i0)
    // CHECK-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
    %x = "addi32"(%i, %i) : (index, index) -> i32
-    for %j = 0 to 17 {
+    affine.for %j = 0 to 17 {
      // CHECK: %3 = "addi32"(%i0, %i0) : (index, index) -> i32
      // CHECK-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
      // CHECK-NEXT: %5 = affine.apply [[MAP_PLUS_1]](%i0)
@ -29,7 +29,7 @@ func @unroll_jam_imperfect_nest() {
  } // CHECK }
  // cleanup loop (single iteration)
  // CHECK: %11 = "addi32"(%c100, %c100) : (index, index) -> i32
-  // CHECK-NEXT: for %i2 = 0 to 17 {
+  // CHECK-NEXT: affine.for %i2 = 0 to 17 {
    // CHECK-NEXT: %12 = "addi32"(%c100, %c100) : (index, index) -> i32
    // CHECK-NEXT: %13 = "addi32"(%12, %12) : (i32, i32) -> i32
  // CHECK-NEXT: }
@ -39,8 +39,8 @@ func @unroll_jam_imperfect_nest() {

 // UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_1(%arg0: index) {
 func @loop_nest_unknown_count_1(%N : index) {
-  // UNROLL-BY-4-NEXT: for %i0 = 1 to  #map{{[0-9]+}}()[%arg0] step 4 {
-    // UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: affine.for %i0 = 1 to  #map{{[0-9]+}}()[%arg0] step 4 {
+    // UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
      // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
      // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
      // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@ -48,14 +48,14 @@ func @loop_nest_unknown_count_1(%N : index) {
    // UNROLL-BY-4-NEXT: }
  // UNROLL-BY-4-NEXT: }
  // A cleanup loop should be generated here.
-  // UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
-    // UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
+    // UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
      // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
    // UNROLL-BY-4_NEXT: }
  // UNROLL-BY-4_NEXT: }
  // Specify the lower bound in a form so that both lb and ub operands match.
-  for %i = ()[s0] -> (1)()[%N] to %N {
-    for %j = 1 to 100 {
+  affine.for %i = ()[s0] -> (1)()[%N] to %N {
+    affine.for %j = 1 to 100 {
      %x = "foo"() : () -> i32
    }
  }
@ -64,8 +64,8 @@ func @loop_nest_unknown_count_1(%N : index) {

 // UNROLL-BY-4-LABEL: func @loop_nest_unknown_count_2(%arg0: index) {
 func @loop_nest_unknown_count_2(%arg : index) {
-  // UNROLL-BY-4-NEXT: for %i0 = %arg0 to  #map{{[0-9]+}}()[%arg0] step 4 {
-    // UNROLL-BY-4-NEXT: for %i1 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: affine.for %i0 = %arg0 to  #map{{[0-9]+}}()[%arg0] step 4 {
+    // UNROLL-BY-4-NEXT: affine.for %i1 = 1 to 100 {
      // UNROLL-BY-4-NEXT: %0 = "foo"(%i0) : (index) -> i32
      // UNROLL-BY-4-NEXT: %1 = affine.apply #map{{[0-9]+}}(%i0)
      // UNROLL-BY-4-NEXT: %2 = "foo"(%1) : (index) -> i32
@ -77,12 +77,12 @@ func @loop_nest_unknown_count_2(%arg : index) {
  // UNROLL-BY-4-NEXT: }
  // The cleanup loop is a single iteration one and is promoted.
  // UNROLL-BY-4-NEXT: %7 = affine.apply [[M1:#map{{[0-9]+}}]]()[%arg0]
-  // UNROLL-BY-4-NEXT: for %i3 = 1 to 100 {
+  // UNROLL-BY-4-NEXT: affine.for %i3 = 1 to 100 {
    // UNROLL-BY-4-NEXT: %8 = "foo"() : () -> i32
  // UNROLL-BY-4_NEXT: }
  // Specify the lower bound in a form so that both lb and ub operands match.
-  for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
-    for %j = 1 to 100 {
+  affine.for %i = ()[s0] -> (s0) ()[%arg] to ()[s0] -> (s0+8) ()[%arg] {
+    affine.for %j = 1 to 100 {
      %x = "foo"(%i) : (index) -> i32
    }
  }
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@ -46,13 +46,13 @@

 // CHECK-LABEL: func @loop_nest_simplest() {
 func @loop_nest_simplest() {
-  // CHECK: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // CHECK: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
    // CHECK: %c1_i32 = constant 1 : i32
    // CHECK-NEXT: %c1_i32_0 = constant 1 : i32
    // CHECK-NEXT: %c1_i32_1 = constant 1 : i32
    // CHECK-NEXT: %c1_i32_2 = constant 1 : i32
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = constant 1 : i32
    }
  }       // CHECK:  }
@ -62,8 +62,8 @@ func @loop_nest_simplest() {
 // CHECK-LABEL: func @loop_nest_simple_iv_use() {
 func @loop_nest_simple_iv_use() {
  // CHECK: %c0 = constant 0 : index
-  // CHECK-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
    // CHECK: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
    // CHECK: %1 = affine.apply [[MAP0]](%c0)
    // CHECK-NEXT:  %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -71,7 +71,7 @@ func @loop_nest_simple_iv_use() {
    // CHECK-NEXT:  %4 = "addi32"(%3, %3) : (index, index) -> i32
    // CHECK: %5 = affine.apply [[MAP2]](%c0)
    // CHECK-NEXT:  %6 = "addi32"(%5, %5) : (index, index) -> i32
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
    }
  }       // CHECK:  }
@ -82,8 +82,8 @@ func @loop_nest_simple_iv_use() {
 // CHECK-LABEL: func @loop_nest_body_def_use() {
 func @loop_nest_body_def_use() {
  // CHECK: %c0 = constant 0 : index
-  // CHECK-NEXT: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
    // CHECK: %c0_0 = constant 0 : index
    %c0 = constant 0 : index
    // CHECK:      %0 = affine.apply [[MAP0]](%c0)
@ -97,7 +97,7 @@ func @loop_nest_body_def_use() {
    // CHECK-NEXT: %8 = affine.apply [[MAP2]](%c0)
    // CHECK-NEXT: %9 = affine.apply [[MAP0]](%8)
    // CHECK-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %y = "addi32"(%x, %c0) : (index, index) -> index
@ -110,14 +110,14 @@ func @loop_nest_body_def_use() {
 func @loop_nest_strided() {
  // CHECK: %c2 = constant 2 : index
  // CHECK-NEXT: %c2_0 = constant 2 : index
-  // CHECK-NEXT: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // CHECK:      %0 = affine.apply [[MAP0]](%c2_0)
    // CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
    // CHECK-NEXT: %2 = affine.apply [[MAP1]](%c2_0)
    // CHECK-NEXT: %3 = affine.apply [[MAP0]](%2)
    // CHECK-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index
-    for %j = 2 to 6 step 2 {
+    affine.for %j = 2 to 6 step 2 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %y = "addi32"(%x, %x) : (index, index) -> index
@ -130,7 +130,7 @@ func @loop_nest_strided() {
    // CHECK-NEXT: %10 = affine.apply [[MAP3]](%c2)
    // CHECK-NEXT: %11 = affine.apply [[MAP0]](%10)
    // CHECK-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index
-    for %k = 2 to 7 step 2 {
+    affine.for %k = 2 to 7 step 2 {
      %z = "affine.apply" (%k) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %w = "addi32"(%z, %z) : (index, index) -> index
@ -142,8 +142,8 @@ func @loop_nest_strided() {
 // CHECK-LABEL: func @loop_nest_multiple_results() {
 func @loop_nest_multiple_results() {
  // CHECK: %c0 = constant 0 : index
-  // CHECK-NEXT: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // CHECK-NEXT: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // CHECK: %0 = affine.apply [[MAP4]](%i0, %c0)
    // CHECK-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
    // CHECK-NEXT: %2 = affine.apply #map{{.*}}(%i0, %c0)
@ -153,7 +153,7 @@ func @loop_nest_multiple_results() {
    // CHECK-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index
    // CHECK-NEXT: %7 = affine.apply #map{{.*}}(%i0, %4)
    // CHECK-NEXT: %8 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index)
-    for %j = 0 to 2 step 1 {
+    affine.for %j = 0 to 2 step 1 {
      %x = affine.apply (d0, d1) -> (d0 + 1) (%i, %j)
      %y = "addi32"(%x, %x) : (index, index) -> index
      %z = affine.apply (d0, d1) -> (d0 + 3) (%i, %j)
@ -170,8 +170,8 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
  // CHECK: %c0 = constant 0 : index
  // CHECK-NEXT: %c128 = constant 128 : index
  %c128 = constant 128 : index
-  // CHECK: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // CHECK: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // CHECK: %0 = "vld"(%i0) : (index) -> i32
    %ld = "vld"(%i) : (index) -> i32
    // CHECK: %1 = affine.apply [[MAP0]](%c0)
@ -189,7 +189,7 @@ func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
    // CHECK-NEXT: %13 = affine.apply [[MAP0]](%12)
    // CHECK-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index
    // CHECK-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index
-    for %j = 0 to 4 {
+    affine.for %j = 0 to 4 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
       %y = "vmulf"(%j, %x) : (index, index) -> index
@ -218,7 +218,7 @@ func @loop_nest_seq_multiple() {
  // CHECK-NEXT: %5 = affine.apply [[MAP2]](%c0_0)
  // CHECK-NEXT: %6 = affine.apply [[MAP0]](%5)
  // CHECK-NEXT: "mul"(%6, %6) : (index, index) -> ()
-  for %j = 0 to 4 {
+  affine.for %j = 0 to 4 {
    %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
      (index) -> (index)
    "mul"(%x, %x) : (index, index) -> ()
@ -226,8 +226,8 @@ func @loop_nest_seq_multiple() {

  // CHECK: %c99 = constant 99 : index
  %k = "constant"(){value: 99} : () -> index
-  // CHECK: for %i0 = 0 to 100 step 2 {
-  for %m = 0 to 100 step 2 {
+  // CHECK: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %m = 0 to 100 step 2 {
    // CHECK: %7 = affine.apply [[MAP0]](%c0)
    // CHECK-NEXT: %8 = affine.apply [[MAP6]](%c0)[%c99]
    // CHECK-NEXT: %9 = affine.apply [[MAP0]](%c0)
@ -239,7 +239,7 @@ func @loop_nest_seq_multiple() {
    // CHECK-NEXT: %15 = affine.apply [[MAP2]](%c0)
    // CHECK-NEXT: %16 = affine.apply [[MAP0]](%15)
    // CHECK-NEXT: %17 = affine.apply [[MAP6]](%15)[%c99]
-    for %n = 0 to 4 {
+    affine.for %n = 0 to 4 {
      %y = "affine.apply" (%n) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %z = "affine.apply" (%n, %k) { map: (d0) [s0] -> (d0 + s0 + 1) } :
@ -251,16 +251,16 @@ func @loop_nest_seq_multiple() {

 // SHORT-LABEL: func @loop_nest_outer_unroll() {
 func @loop_nest_outer_unroll() {
-  // SHORT:      for %i0 = 0 to 4 {
+  // SHORT:      affine.for %i0 = 0 to 4 {
  // SHORT-NEXT:   %0 = affine.apply [[MAP0]](%i0)
  // SHORT-NEXT:   %1 = "addi32"(%0, %0) : (index, index) -> index
  // SHORT-NEXT: }
-  // SHORT-NEXT: for %i1 = 0 to 4 {
+  // SHORT-NEXT: affine.for %i1 = 0 to 4 {
  // SHORT-NEXT:   %2 = affine.apply [[MAP0]](%i1)
  // SHORT-NEXT:   %3 = "addi32"(%2, %2) : (index, index) -> index
  // SHORT-NEXT: }
-  for %i = 0 to 2 {
-    for %j = 0 to 4 {
+  affine.for %i = 0 to 2 {
+    affine.for %j = 0 to 4 {
      %x = "affine.apply" (%j) { map: (d0) -> (d0 + 1) } :
        (index) -> (index)
      %y = "addi32"(%x, %x) : (index, index) -> index
@ -284,28 +284,28 @@ func @loop_nest_seq_long() -> i32 {

  %zero_idx = constant 0 : index

-  for %n0 = 0 to 512 {
-    for %n1 = 0 to 8 {
+  affine.for %n0 = 0 to 512 {
+    affine.for %n1 = 0 to 8 {
      store %one,  %A[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
      store %two,  %B[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
      store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
    }
  }

-  for %i0 = 0 to 2 {
-    for %i1 = 0 to 2 {
-      for %i2 = 0 to 8 {
+  affine.for %i0 = 0 to 2 {
+    affine.for %i1 = 0 to 2 {
+      affine.for %i2 = 0 to 8 {
        %b2 = "affine.apply" (%i1, %i2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
        %x = load %B[%i0, %b2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
        "op1"(%x) : (i32) -> ()
      }
-      for %j1 = 0 to 8 {
-        for %j2 = 0 to 8 {
+      affine.for %j1 = 0 to 8 {
+        affine.for %j2 = 0 to 8 {
          %a2 = "affine.apply" (%i1, %j2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
          %v203 = load %A[%j1, %a2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
          "op2"(%v203) : (i32) -> ()
        }
-        for %k2 = 0 to 8 {
+        affine.for %k2 = 0 to 8 {
          %s0 = "op3"() : () -> i32
          %c2 = "affine.apply" (%i0, %k2) {map: (d0, d1) -> (16*d0 + d1)} : (index, index) -> index
          %s1 =  load %C[%j1, %c2] : memref<512 x 512 x i32, (d0, d1) -> (d0, d1), 2>
@ -322,8 +322,8 @@ func @loop_nest_seq_long() -> i32 {

 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() {
 func @unroll_unit_stride_no_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 8 step 4 {
    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -337,13 +337,13 @@ func @unroll_unit_stride_no_cleanup() {
    // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
    // UNROLL-BY-4-NEXT: }
-    for %j = 0 to 8 {
+    affine.for %j = 0 to 8 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
      %y = "addi32"(%x, %x) : (i32, i32) -> i32
    }
    // empty loop
-    // UNROLL-BY-4: for %i2 = 0 to 8 {
-    for %k = 0 to 8 {
+    // UNROLL-BY-4: affine.for %i2 = 0 to 8 {
+    affine.for %k = 0 to 8 {
    }
  }
  return
@ -351,8 +351,8 @@ func @unroll_unit_stride_no_cleanup() {

 // UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
 func @unroll_unit_stride_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 0 to 7 step 4 {
    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -370,7 +370,7 @@ func @unroll_unit_stride_cleanup() {
    // UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
    // UNROLL-BY-4-NEXT: }
-    for %j = 0 to 10 {
+    affine.for %j = 0 to 10 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
      %y = "addi32"(%x, %x) : (i32, i32) -> i32
    }
@ -380,8 +380,8 @@ func @unroll_unit_stride_cleanup() {

 // UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() {
 func @unroll_non_unit_stride_cleanup() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
    // UNROLL-BY-4: for [[L1:%i[0-9]+]] = 2 to 37 step 20 {
    // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
@ -399,7 +399,7 @@ func @unroll_non_unit_stride_cleanup() {
    // UNROLL-BY-4-NEXT: %11 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %12 = "addi32"(%11, %11) : (i32, i32) -> i32
    // UNROLL-BY-4-NEXT: }
-    for %j = 2 to 48 step 5 {
+    affine.for %j = 2 to 48 step 5 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
      %y = "addi32"(%x, %x) : (i32, i32) -> i32
    }
@ -411,8 +411,8 @@ func @unroll_non_unit_stride_cleanup() {
 func @loop_nest_single_iteration_after_unroll(%N: index) {
  // UNROLL-BY-4: %c0 = constant 0 : index
  // UNROLL-BY-4: %c4 = constant 4 : index
-  // UNROLL-BY-4: for %i0 = 0 to %arg0 {
-  for %i = 0 to %N {
+  // UNROLL-BY-4: affine.for %i0 = 0 to %arg0 {
+  affine.for %i = 0 to %N {
    // UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %1 = affine.apply [[MAP0]](%c0)
    // UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32
@ -422,7 +422,7 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
    // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
    // UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32
    // UNROLL-BY-4-NOT: for
-    for %j = 0 to 5 {
+    affine.for %j = 0 to 5 {
      %x = "addi32"(%j, %j) : (index, index) -> i32
    } // UNROLL-BY-4-NOT: }
  } // UNROLL-BY-4:  }
@ -434,8 +434,8 @@ func @loop_nest_single_iteration_after_unroll(%N: index) {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand1() {
 func @loop_nest_operand1() {
-// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
+// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   affine.for %i1 = [[MAP10]](%i0) to #map{{[0-9]+}}(%i0) step 4
 // UNROLL-BY-4-NEXT:      %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:      %2 = "foo"() : () -> i32
@ -443,8 +443,8 @@ func @loop_nest_operand1() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  for %i = 0 to 100 step 2 {
-    for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
+  affine.for %i = 0 to 100 step 2 {
+    affine.for %j = (d0) -> (0) (%i) to (d0) -> (d0 - d0 mod 4) (%i) {
      %x = "foo"() : () -> i32
    }
  }
@ -454,8 +454,8 @@ func @loop_nest_operand1() {
 // No cleanup will be generated here.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
 func @loop_nest_operand2() {
-// UNROLL-BY-4:      for %i0 = 0 to 100 step 2 {
-// UNROLL-BY-4-NEXT:   for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+// UNROLL-BY-4:      affine.for %i0 = 0 to 100 step 2 {
+// UNROLL-BY-4-NEXT:   affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
 // UNROLL-BY-4-NEXT:     %0 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %1 = "foo"() : () -> i32
 // UNROLL-BY-4-NEXT:     %2 = "foo"() : () -> i32
@ -463,8 +463,8 @@ func @loop_nest_operand2() {
 // UNROLL-BY-4-NEXT:   }
 // UNROLL-BY-4-NEXT: }
 // UNROLL-BY-4-NEXT: return
-  for %i = 0 to 100 step 2 {
-    for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
+  affine.for %i = 0 to 100 step 2 {
+    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 4) (%i) {
      %x = "foo"() : () -> i32
    }
  }
@ -475,16 +475,16 @@ func @loop_nest_operand2() {
 // factor. The cleanup loop happens to be a single iteration one and is promoted.
 // UNROLL-BY-4-LABEL: func @loop_nest_operand3() {
 func @loop_nest_operand3() {
-  // UNROLL-BY-4: for %i0 = 0 to 100 step 2 {
-  for %i = 0 to 100 step 2 {
-    // UNROLL-BY-4: for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 step 2 {
+  affine.for %i = 0 to 100 step 2 {
+    // UNROLL-BY-4: affine.for %i1 = [[MAP11]](%i0) to #map{{[0-9]+}}(%i0) step 4 {
    // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: }
    // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
-    for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
+    affine.for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 9) (%i) {
      %x = "foo"() : () -> i32
    }
  } // UNROLL-BY-4: }
@ -493,20 +493,20 @@ func @loop_nest_operand3() {

 // UNROLL-BY-4-LABEL: func @loop_nest_operand4(%arg0: index) {
 func @loop_nest_operand4(%N : index) {
-  // UNROLL-BY-4: for %i0 = 0 to 100 {
-  for %i = 0 to 100 {
-    // UNROLL-BY-4: for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
+  // UNROLL-BY-4: affine.for %i0 = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    // UNROLL-BY-4: affine.for %i1 = [[MAP12]]()[%arg0] to #map{{[0-9]+}}()[%arg0] step 4 {
    // UNROLL-BY-4: %0 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
    // UNROLL-BY-4-NEXT: }
    // A cleanup loop will be be generated here.
-    // UNROLL-BY-4-NEXT: for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
+    // UNROLL-BY-4-NEXT: affine.for %i2 = #map{{[0-9]+}}()[%arg0] to %arg0 {
    // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32
    // UNROLL-BY-4_NEXT: }
    // Specify the lower bound so that both lb and ub operands match.
-    for %j = ()[s0] -> (0)()[%N] to %N {
+    affine.for %j = ()[s0] -> (0)()[%N] to %N {
      %x = "foo"() : () -> i32
    }
  }
@ -518,7 +518,7 @@ func @loop_nest_unroll_full() {
  // CHECK-NEXT: %0 = "foo"() : () -> i32
  // CHECK-NEXT: %1 = "bar"() : () -> i32
  // CHECK-NEXT:  return
-  for %i = 0 to 1 {
+  affine.for %i = 0 to 1 {
    %x = "foo"() : () -> i32
    %y = "bar"() : () -> i32
  }
@ -527,7 +527,7 @@ func @loop_nest_unroll_full() {

 // UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop()
 func @unroll_by_one_should_promote_single_iteration_loop() {
-  for %i = 0 to 1 {
+  affine.for %i = 0 to 1 {
    %x = "foo"(%i) : (index) -> i32
  }
  return
--- a/mlir/utils/emacs/mlir-mode.el
+++ b/mlir/utils/emacs/mlir-mode.el
@ -42,7 +42,7 @@
   ;; Keywords
   `(,(regexp-opt
       '(;; Toplevel entities
-         "br" "ceildiv" "cfgfunc" "cond_br" "else" "extfunc" "false" "floordiv" "for" "if" "mlfunc" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
+         "br" "ceildiv" "func" "cond_br" "else" "extfunc" "false" "floordiv" "affine.for" "if" "mod" "return" "size" "step" "to" "true" "??" ) 'symbols) . font-lock-keyword-face))
  "Syntax highlighting for MLIR.")

 ;; Emacs 23 compatibility.
--- a/mlir/utils/vim/mlir.vim
+++ b/mlir/utils/vim/mlir.vim
@ -10,9 +10,9 @@ syn keyword mlirType index i1 i2 i4 i8 i13 i16 i32 i64
      \ f16 f32 tf_control
 syn keyword mlirType memref tensor vector

-syntax keyword mlirKeywords extfunc cfgfunc mlfunc for to step return
+syntax keyword mlirKeywords extfunc func to step return
 syntax keyword mlirConditional if else
-syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait
+syntax keyword mlirCoreOps dim addf addi subf subi mulf muli cmpi select constant affine.apply affine.for call call_indirect extract_element getTensor memref_cast tensor_cast load store alloc dealloc dma_start dma_wait

 syn match mlirInt "-\=\<\d\+\>"
 syn match mlirFloat "-\=\<\d\+\.\d\+\>"