[mlir][NFC] Update textual references of `func` to `func.func` in examples+python scripts

The special case parsing of `func` operations is being removed.
2022-04-20 16:01:51 -07:00 · 2022-04-20 16:01:51 -07:00 · 2310ced874
parent 186d5c8af5
commit 2310ced874
37 changed files with 517 additions and 515 deletions
--- a/mlir/docs/BufferDeallocationInternals.md
+++ b/mlir/docs/BufferDeallocationInternals.md
@ -39,7 +39,7 @@ writes needs to dominate all buffer reads.
 Example for breaking the invariant:

 ```mlir
-func @condBranch(%arg0: i1, %arg1: memref<2xf32>) {
+func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>) {
  %0 = memref.alloc() : memref<2xf32>
  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
@ -71,7 +71,7 @@ BufferDeallocation is fully compatible with “hybrid” setups in which tracked
 untracked allocations are mixed:

 ```mlir
-func @mixedAllocation(%arg0: i1) {
+func.func @mixedAllocation(%arg0: i1) {
   %0 = memref.alloca() : memref<2xf32>  // aliases: %2
   %1 = memref.alloc() : memref<2xf32>  // aliases: %2
   cf.cond_br %arg0, ^bb1, ^bb2
@ -128,7 +128,7 @@ BufferHoisting pass:
 ![branch_example_pre_move](/includes/img/branch_example_pre_move.svg)

 ```mlir
-func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
  cf.br ^bb3(%arg1 : memref<2xf32>)
@ -148,7 +148,7 @@ of code:
 ![branch_example_post_move](/includes/img/branch_example_post_move.svg)

 ```mlir
-func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
  %0 = memref.alloc() : memref<2xf32>  // moved to bb0
  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
@ -170,7 +170,7 @@ Due to the data dependency of the allocation to %0, we cannot move the
 allocation out of bb2 in this case:

 ```mlir
-func @condBranchDynamicType(
+func.func @condBranchDynamicType(
  %arg0: i1,
  %arg1: memref<?xf32>,
  %arg2: memref<?xf32>,
@ -199,7 +199,7 @@ copies to eliminate them. Consider the following example in which the
 allocations have already been placed:

 ```mlir
-func @branch(%arg0: i1) {
+func.func @branch(%arg0: i1) {
  %0 = memref.alloc() : memref<2xf32>  // aliases: %2
  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
@ -231,7 +231,7 @@ Applying the BufferDeallocation pass to the program above yields the following
 result:

 ```mlir
-func @branch(%arg0: i1) {
+func.func @branch(%arg0: i1) {
  %0 = memref.alloc() : memref<2xf32>
  cf.cond_br %arg0, ^bb1, ^bb2
 ^bb1:
@ -268,7 +268,7 @@ and non-critical aliases:
 ![nested_branch_example_pre_move](/includes/img/nested_branch_example_pre_move.svg)

 ```mlir
-func @condBranchDynamicTypeNested(
+func.func @condBranchDynamicTypeNested(
  %arg0: i1,
  %arg1: memref<?xf32>,  // aliases: %3, %4
  %arg2: memref<?xf32>,
@ -301,7 +301,7 @@ Applying BufferDeallocation yields the following output:
 ![nested_branch_example_post_move](/includes/img/nested_branch_example_post_move.svg)

 ```mlir
-func @condBranchDynamicTypeNested(
+func.func @condBranchDynamicTypeNested(
  %arg0: i1,
  %arg1: memref<?xf32>,
  %arg2: memref<?xf32>,
@ -379,7 +379,7 @@ the `RegionBranchOpInterface` to determine predecessors in order to infer the
 high-level control flow:

 ```mlir
-func @inner_region_control_flow(
+func.func @inner_region_control_flow(
  %arg0 : index,
  %arg1 : index) -> memref<?x?xf32> {
  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
@ -403,7 +403,7 @@ dialect-specific operations. BufferDeallocation supports this behavior via the
 operation to determine the value of %2 at runtime which creates an alias:

 ```mlir
-func @nested_region_control_flow(%arg0 : index, %arg1 : index) -> memref<?x?xf32> {
+func.func @nested_region_control_flow(%arg0 : index, %arg1 : index) -> memref<?x?xf32> {
  %0 = arith.cmpi "eq", %arg0, %arg1 : index
  %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
  %2 = scf.if %0 -> (memref<?x?xf32>) {
@ -424,7 +424,7 @@ block since it cannot be accessed by the remainder of the program. Accessing the
 %1 which does not need to be tracked.

 ```mlir
-func @nested_region_control_flow(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+func.func @nested_region_control_flow(%arg0: index, %arg1: index) -> memref<?x?xf32> {
    %0 = arith.cmpi "eq", %arg0, %arg1 : index
    %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
    %2 = scf.if %0 -> (memref<?x?xf32>) {
@ -448,7 +448,7 @@ Reconsider a slightly adapted version of the “custom.region_if” example from
 above that uses a nested allocation:

 ```mlir
-func @inner_region_control_flow_div(
+func.func @inner_region_control_flow_div(
  %arg0 : index,
  %arg1 : index) -> memref<?x?xf32> {
  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
@ -471,7 +471,7 @@ Furthermore, %arg4 is returned to its parent operation and has an alias %1. This
 causes BufferDeallocation to introduce additional copies:

 ```mlir
-func @inner_region_control_flow_div(
+func.func @inner_region_control_flow_div(
  %arg0 : index,
  %arg1 : index) -> memref<?x?xf32> {
  %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
@ -509,7 +509,7 @@ Consider the following “scf.for” use case containing a nested structured
 control-flow if:

 ```mlir
-func @loop_nested_if(
+func.func @loop_nested_if(
  %lb: index,
  %ub: index,
  %step: index,
@ -547,7 +547,7 @@ buffer, we have to free the buffer from the previous iteration to avoid memory
 leaks:

 ```mlir
-func @loop_nested_if(
+func.func @loop_nested_if(
  %lb: index,
  %ub: index,
  %step: index,
@ -624,7 +624,7 @@ analysis of this sample reveals that the highlighted operations are redundant
 and can be removed.

 ```mlir
-func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+func.func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
  %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
  %2 = bufferization.clone %1 : (memref<?x?xf32>) -> (memref<?x?xf32>)
  memref.dealloc %1 : memref<?x?xf32>
@ -635,7 +635,7 @@ func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
 Will be transformed to:

 ```mlir
-func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+func.func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
  %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
  return %1 : memref<?x?xf32>
 }
@ -656,7 +656,7 @@ merged into a single step. Canonicalization removes the clone operation and
 %temp, and replaces the uses of %temp with %result:

 ```mlir
-func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
+func.func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
  %temp = memref.alloc() : memref<2xf32>
  test.generic {
    args_in = 1 : i64,
@ -676,7 +676,7 @@ func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
 Will be transformed to:

 ```mlir
-func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
+func.func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
  test.generic {
    args_in = 1 : i64,
    args_out = 1 : i64,
--- a/mlir/docs/Diagnostics.md
+++ b/mlir/docs/Diagnostics.md
@ -243,7 +243,7 @@ diagnostic. Example usage of this handler can be seen in the `mlir-opt` tool.
 $ mlir-opt foo.mlir

 /tmp/test.mlir:6:24: error: expected non-function type
-func @foo() -> (index, ind) {
+func.func @foo() -> (index, ind) {
                       ^
 ```

@ -306,12 +306,12 @@ A few examples are shown below:

 ```mlir
 // Expect an error on the same line.
-func @bad_branch() {
+func.func @bad_branch() {
  cf.br ^missing  // expected-error {{reference to an undefined block}}
 }

 // Expect an error on an adjacent line.
-func @foo(%a : f32) {
+func.func @foo(%a : f32) {
  // expected-error@+1 {{unknown comparison predicate "foo"}}
  %result = arith.cmpf "foo", %a, %a : f32
  return
@ -320,10 +320,10 @@ func @foo(%a : f32) {
 // Expect an error on the next line that does not contain a designator.
 // expected-remark@below {{remark on function below}}
 // expected-remark@below {{another remark on function below}}
-func @bar(%a : f32)
+func.func @bar(%a : f32)

 // Expect an error on the previous line that does not contain a designator.
-func @baz(%a : f32)
+func.func @baz(%a : f32)
 // expected-remark@above {{remark on function above}}
 // expected-remark@above {{another remark on function above}}

@ -336,7 +336,7 @@ any expected diagnostics weren't.
 $ mlir-opt foo.mlir

 /tmp/test.mlir:6:24: error: unexpected error: expected non-function type
-func @foo() -> (index, ind) {
+func.func @foo() -> (index, ind) {
                       ^

 /tmp/test.mlir:15:4: error: expected remark "expected some remark" was not produced
--- a/mlir/docs/Dialects/Linalg/_index.md
+++ b/mlir/docs/Dialects/Linalg/_index.md
@ -102,7 +102,7 @@ layout, and the second one is a `memref` of 4-element vectors with a 2-strided,
 // memory layouts
 #identity = affine_map<(d0) -> (d0)>

-func @example(%A: memref<?xf32, #identity>,
+func.func @example(%A: memref<?xf32, #identity>,
              %B: memref<?xvector<4xf32>, offset: 1, strides: [2]>) {
  linalg.generic #attrs
  ins(%A: memref<?xf32, #identity>)
@ -124,7 +124,7 @@ materialized by a lowering into a form that will resemble:
 // It's syntax can be found here: https://mlir.llvm.org/docs/Dialects/SCFDialect/
 #map0 = affine_map<(d0) -> (d0 * 2 + 1)>

-func @example(%arg0: memref<?xf32>, %arg1: memref<?xvector<4xf32>, #map0>) {
+func.func @example(%arg0: memref<?xf32>, %arg1: memref<?xvector<4xf32>, #map0>) {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %0 = memref.dim %arg0, %c0 : memref<?xf32>
@ -186,7 +186,7 @@ uses an identity layout.
  iterator_types = ["parallel", "parallel"]
 }

-func @example(%A: memref<8x?xf32, offset: 0, strides: [2, 2]>,
+func.func @example(%A: memref<8x?xf32, offset: 0, strides: [2, 2]>,
              %B: memref<?xvector<4xf32>>) {
  linalg.generic #attrs
  ins(%A: memref<8x?xf32, offset: 0, strides: [2, 2]>)
@ -206,7 +206,7 @@ materialized by a lowering into a form that will resemble:
 // Run: mlir-opt example2.mlir -allow-unregistered-dialect -convert-linalg-to-loops
 #map0 = affine_map<(d0, d1) -> (d0 * 2 + d1 * 2)>

-func @example(%arg0: memref<8x?xf32, #map0>, %arg1: memref<?xvector<4xf32>>) {
+func.func @example(%arg0: memref<8x?xf32, #map0>, %arg1: memref<?xvector<4xf32>>) {
  %c8 = arith.constant 8 : index
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
@ -309,7 +309,7 @@ be when using a concrete operation `addf`:
  iterator_types = ["parallel", "parallel"]
 }

-func @example(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
+func.func @example(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
  linalg.generic #attrs
  ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
  outs(%C: memref<?x?xf32>) {
@ -329,7 +329,7 @@ The property "*The Compute Payload is Specified With a Region*" is materialized
 by a lowering into a form that will resemble:

 ```mlir
-func @example(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
+func.func @example(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
  %c0 = arith.constant 0 : index
  %c1 = arith.constant 1 : index
  %0 = memref.dim %arg0, %c0 : memref<?x?xf32>
@ -382,7 +382,7 @@ call we intend to use:
  library_call = "pointwise_add"
 }

-func @example(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
+func.func @example(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
  linalg.generic #attrs
  ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
  outs(%C: memref<?x?xf32>) {
@ -402,14 +402,14 @@ into a form that will resemble:

 #map0 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>

-func @example(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
+func.func @example(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
  %0 = memref.cast %arg0 : memref<?x?xf32> to memref<?x?xf32, #map0>
  %1 = memref.cast %arg1 : memref<?x?xf32> to memref<?x?xf32, #map0>
  %2 = memref.cast %arg2 : memref<?x?xf32> to memref<?x?xf32, #map0>
  call @pointwise_add(%0, %1, %2) : (memref<?x?xf32, #map0>, memref<?x?xf32, #map0>, memref<?x?xf32, #map0>) -> ()
  return
 }
-func @pointwise_add(memref<?x?xf32, #map0>, memref<?x?xf32, #map0>, memref<?x?xf32, #map0>) attributes {llvm.emit_c_interface}
+func.func @pointwise_add(memref<?x?xf32, #map0>, memref<?x?xf32, #map0>, memref<?x?xf32, #map0>) attributes {llvm.emit_c_interface}
 ```

 Which, after lowering to LLVM resembles:
@ -417,7 +417,7 @@ Which, after lowering to LLVM resembles:
 ```mlir
 // Run: mlir-opt example4.mlir -convert-linalg-to-std | mlir-opt -convert-func-to-llvm
 // Some generated code are omitted here.
-func @example(%arg0: !llvm<"float*">, ...) {
+func.func @example(%arg0: !llvm<"float*">, ...) {
  ...
  llvm.call @pointwise_add(...) : (!llvm<"float*">, ...) -> ()
  return
--- a/mlir/docs/Dialects/SPIR-V.md
+++ b/mlir/docs/Dialects/SPIR-V.md
@ -422,7 +422,7 @@ the SPIR-V dialect. Instead, we reuse the builtin `func` op to express functions
 more concisely:

 ```mlir
-func @f(%arg: i32) -> i32 {
+func.func @f(%arg: i32) -> i32 {
  "spv.ReturnValue"(%arg) : (i32) -> (i32)
 }
 ```
@ -580,7 +580,7 @@ void loop(bool cond) {
 It will be represented as

 ```mlir
-func @selection(%cond: i1) -> () {
+func.func @selection(%cond: i1) -> () {
  %zero = spv.Constant 0: i32
  %one = spv.Constant 1: i32
  %two = spv.Constant 2: i32
@ -668,7 +668,7 @@ void loop(int count) {
 It will be represented as

 ```mlir
-func @loop(%count : i32) -> () {
+func.func @loop(%count : i32) -> () {
  %zero = spv.Constant 0: i32
  %one = spv.Constant 1: i32
  %var = spv.Variable init(%zero) : !spv.ptr<i32, Function>
@ -728,7 +728,7 @@ example, for the following SPIR-V function `foo`:
 It will be represented as:

 ```mlir
-func @foo() -> () {
+func.func @foo() -> () {
  %var = spv.Variable : !spv.ptr<i32, Function>

  spv.mlir.selection {
--- a/mlir/docs/Dialects/ShapeDialect.md
+++ b/mlir/docs/Dialects/ShapeDialect.md
@ -97,7 +97,7 @@ separate shape function library, while here we would normally reify it as part
 of lowering, but for simplicity will show as a standalone shape function.

 ```mlir
-func @matmul_shape1(%lhs: tensor<*xf32>, %rhs: tensor<*xindex>) -> tensor<?xindex> {
+func.func @matmul_shape1(%lhs: tensor<*xf32>, %rhs: tensor<*xindex>) -> tensor<?xindex> {
  %c1 = shape.const_size 1
  %c2 = shape.const_size 2
  // We allow `shape.shape_of` to return either a `!shape.shape` or
@ -136,7 +136,7 @@ We can now hoist computations of constraint were possible (which in the case
 below is not too many as we need to verify the rank before we can split)

 ```mlir
-func @matmul_shape2(%lhs: tensor<*xf32>, %lhs: tensor<*xf32>) -> tensor<?xindex> {
+func.func @matmul_shape2(%lhs: tensor<*xf32>, %lhs: tensor<*xf32>) -> tensor<?xindex> {
  %c1 = shape.const_size 1
  %c2 = shape.const_size 2
  %lhs_shape = shape.shape_of %lhs : tensor<*xf32> -> tensor<?xindex>
@ -167,7 +167,7 @@ The above form can now be lowered to the fully imperative form (see
 for example).

 ```mlir
-func @matmul_shape3(%lhs: tensor<*xf32>, %lhs: tensor<*xf32>) -> tensor<?xindex> {
+func.func @matmul_shape3(%lhs: tensor<*xf32>, %lhs: tensor<*xf32>) -> tensor<?xindex> {
  %c1 = arith.constant 1 : index
  %c2 = arith.constant 2 : index
  %lhs_shape = shape.shape_of %lhs : tensor<*xf32> -> tensor<?xindex>
--- a/mlir/docs/LangRef.md
+++ b/mlir/docs/LangRef.md
@ -71,7 +71,7 @@ Here's an example of an MLIR module:
 // Compute A*B using an implementation of multiply kernel and print the
 // result using a TensorFlow op. The dimensions of A and B are partially
 // known. The shapes are assumed to match.
-func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf32>) {
+func.func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf32>) {
  // Compute the inner dimension of %A using the dim operation.
  %n = memref.dim %A, 1 : tensor<100x?xf32>

@ -102,7 +102,7 @@ func @mul(%A: tensor<100x?xf32>, %B: tensor<?x50xf32>) -> (tensor<100x50xf32>) {
 }

 // A function that multiplies two memrefs and returns the result.
-func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
+func.func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
          -> (memref<100x50xf32>)  {
  // Compute the inner dimension of %A.
  %n = memref.dim %A, 1 : memref<100x?xf32>
@ -389,7 +389,7 @@ Here is a simple example function showing branches, returns, and block
 arguments:

 ```mlir
-func @simple(i64, i1) -> i64 {
+func.func @simple(i64, i1) -> i64 {
 ^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
  cf.cond_br %cond, ^bb1, ^bb2

@ -529,7 +529,7 @@ region, for example if a function call does not return.
 Example:

 ```mlir
-func @accelerator_compute(i64, i1) -> i64 { // An SSACFG region
+func.func @accelerator_compute(i64, i1) -> i64 { // An SSACFG region
 ^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
  cf.cond_br %cond, ^bb1, ^bb2

--- a/mlir/docs/PDLL.md
+++ b/mlir/docs/PDLL.md
@ -406,7 +406,7 @@ the concept, let's take a look at a quick example. Consider the `.mlir` snippet
 below:

 ```mlir
-func @baz(%arg: i32) {
+func.func @baz(%arg: i32) {
  %result = my_dialect.foo %arg, %arg -> i32
 }
 ```
--- a/mlir/docs/PassManagement.md
+++ b/mlir/docs/PassManagement.md
@ -1091,7 +1091,7 @@ this instrumentation:
 $ mlir-opt foo.mlir -pass-pipeline='func.func(cse)' -mlir-print-ir-before=cse

 *** IR Dump Before CSE ***
-func @simple_constant() -> (i32, i32) {
+func.func @simple_constant() -> (i32, i32) {
  %c1_i32 = arith.constant 1 : i32
  %c1_i32_0 = arith.constant 1 : i32
  return %c1_i32, %c1_i32_0 : i32, i32
@ -1107,7 +1107,7 @@ func @simple_constant() -> (i32, i32) {
 $ mlir-opt foo.mlir -pass-pipeline='func.func(cse)' -mlir-print-ir-after=cse

 *** IR Dump After CSE ***
-func @simple_constant() -> (i32, i32) {
+func.func @simple_constant() -> (i32, i32) {
  %c1_i32 = arith.constant 1 : i32
  return %c1_i32, %c1_i32 : i32, i32
 }
@ -1128,7 +1128,7 @@ func @simple_constant() -> (i32, i32) {
 $ mlir-opt foo.mlir -pass-pipeline='func.func(cse,cse)' -mlir-print-ir-after=cse -mlir-print-ir-after-change

 *** IR Dump After CSE ***
-func @simple_constant() -> (i32, i32) {
+func.func @simple_constant() -> (i32, i32) {
  %c1_i32 = arith.constant 1 : i32
  return %c1_i32, %c1_i32 : i32, i32
 }
@ -1143,7 +1143,7 @@ func @simple_constant() -> (i32, i32) {
 $ mlir-opt foo.mlir -pass-pipeline='func.func(cse,bad-pass)' -mlir-print-ir-after-failure

 *** IR Dump After BadPass Failed ***
-func @simple_constant() -> (i32, i32) {
+func.func @simple_constant() -> (i32, i32) {
  %c1_i32 = arith.constant 1 : i32
  return %c1_i32, %c1_i32 : i32, i32
 }
@ -1159,22 +1159,22 @@ func @simple_constant() -> (i32, i32) {
 $ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse)' -mlir-print-ir-after=cse -mlir-print-ir-module-scope

 *** IR Dump After CSE ***  ('func.func' operation: @bar)
-func @bar(%arg0: f32, %arg1: f32) -> f32 {
+func.func @bar(%arg0: f32, %arg1: f32) -> f32 {
  ...
 }

-func @simple_constant() -> (i32, i32) {
+func.func @simple_constant() -> (i32, i32) {
  %c1_i32 = arith.constant 1 : i32
  %c1_i32_0 = arith.constant 1 : i32
  return %c1_i32, %c1_i32_0 : i32, i32
 }

 *** IR Dump After CSE ***  ('func.func' operation: @simple_constant)
-func @bar(%arg0: f32, %arg1: f32) -> f32 {
+func.func @bar(%arg0: f32, %arg1: f32) -> f32 {
  ...
 }

-func @simple_constant() -> (i32, i32) {
+func.func @simple_constant() -> (i32, i32) {
  %c1_i32 = arith.constant 1 : i32
  return %c1_i32, %c1_i32 : i32, i32
 }
@ -1196,7 +1196,7 @@ reproducible may have the form:
 // configuration: -pass-pipeline='func.func(cse,canonicalize),inline' -verify-each

 module {
-  func @foo() {
+  func.func @foo() {
    ...
  }
 }
@ -1231,7 +1231,7 @@ the following reproducer will be generated:
 // configuration: -pass-pipeline='func.func(canonicalize)' -verify-each -mlir-disable-threading

 module {
-  func @foo() {
+  func.func @foo() {
    ...
  }
 }
--- a/mlir/docs/Rationale/MLIRForGraphAlgorithms.md
+++ b/mlir/docs/Rationale/MLIRForGraphAlgorithms.md
@ -155,7 +155,7 @@ turned into zero:

 ```mlir
  // RUN: mlir-opt %s -canonicalize | FileCheck %s
-  func @test_subi_zero_cfg(%arg0: i32) -> i32 {
+  func.func @test_subi_zero_cfg(%arg0: i32) -> i32 {
    %y = arith.subi %arg0, %arg0 : i32
    return %y: i32
  }
@ -209,7 +209,7 @@ write tests like this:

 ```mlir
  // RUN: mlir-opt %s -memref-dependence-check -verify-diagnostics
-  func @different_memrefs() {
+  func.func @different_memrefs() {
    %m.a = memref.alloc() : memref<100xf32>
    %m.b = memref.alloc() : memref<100xf32>
    %c0 = arith.constant 0 : index
--- a/mlir/docs/Rationale/Rationale.md
+++ b/mlir/docs/Rationale/Rationale.md
@ -137,13 +137,13 @@ unknown dimension can be queried using the "dim" builtin as shown below.
 Example:

 ```mlir
-func foo(...) {
+func.func foo(...) {
  %A = memref.alloc <8x?xf32, #lmap> (%N)
  ...
  call bar(%A) : (memref<8x?xf32, #lmap>)
 }

-func bar(%A : memref<8x?xf32, #lmap>) {
+func.func bar(%A : memref<8x?xf32, #lmap>) {
  // Type of %A indicates that %A has dynamic shape with 8 rows
  // and unknown number of columns. The number of columns is queried
  // dynamically using dim instruction.
@ -549,7 +549,7 @@ The presence of dynamic control flow leads to an inner non-affine function
 nested in an outer function that uses affine loops.

 ```mlir
-func @search(%A: memref<?x?xi32>, %S: <?xi32>, %key : i32) {
+func.func @search(%A: memref<?x?xi32>, %S: <?xi32>, %key : i32) {
  %ni = memref.dim %A, 0 : memref<?x?xi32>
  // This loop can be parallelized
  affine.for %i = 0 to %ni {
@ -558,7 +558,7 @@ func @search(%A: memref<?x?xi32>, %S: <?xi32>, %key : i32) {
  return
 }

-func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32, %i : i32) {
+func.func @search_body(%A: memref<?x?xi32>, %S: memref<?xi32>, %key: i32, %i : i32) {
  %nj = memref.dim %A, 1 : memref<?x?xi32>
  cf.br ^bb1(0)

@ -607,7 +607,7 @@ for (i = 0; i < N; i++)
 ```

 ```mlir
-func @outer_nest(%n : index) {
+func.func @outer_nest(%n : index) {
  affine.for %i = 0 to %n {
    affine.for %j = 0 to %n {
      %pow = call @pow(2, %j) : (index, index) ->  index
@ -617,7 +617,7 @@ func @outer_nest(%n : index) {
  return
 }

-func @inner_nest(%m : index, %n : index) {
+func.func @inner_nest(%m : index, %n : index) {
  affine.for %k = 0 to %m {
    affine.for %l = 0 to %n {
      ...
@ -658,7 +658,7 @@ in a dilated convolution.
 // input:   [batch, input_height, input_width, input_feature]
 // kernel: [kernel_height, kernel_width, input_feature, output_feature]
 // output: [batch, output_height, output_width, output_feature]
-func @conv2d(%input: memref<16x1024x1024x3xf32, #lm0, /*scratchpad=*/1>,
+func.func @conv2d(%input: memref<16x1024x1024x3xf32, #lm0, /*scratchpad=*/1>,
             %kernel: memref<5x5x3x32xf32, #lm0, /*scratchpad=*/1>,
             %output: memref<16x512x512x32xf32, #lm0, /*scratchpad=*/1>) {
  affine.for %b = 0 to %batch {
@ -756,7 +756,7 @@ instruction that appears in that branch. Each leaf node is an ML Instruction.
 #intset_ij = (i, j) [M, N, K]  : i >= 0, -i + N - 1 >= 0, j >= 0, -j + N-1 >= 0
 #intset_ijk = (i, j, k) [M, N, K] : i >= 0, -i + N - 1 >= 0, j >= 0,
                                     -j +  M-1 >= 0, k >= 0, -k + N - 1 >= 0)
-func @matmul(%A, %B, %C, %M, %N, %K) : (...)  { // %M, N, K are symbols
+func.func @matmul(%A, %B, %C, %M, %N, %K) : (...)  { // %M, N, K are symbols
  // t1, t2, t3, t4, t5, t6  are abstract polyhedral loops
  mldim %t1 : {S1,S2,S3,S4,S5}  floordiv (i, 128) {
    mldim %t2 : {S1,S2,S3,S4,S5}  floordiv (j, 128) {
@ -838,7 +838,7 @@ Example:
 // read relation: two elements ( d0 <= r0 <= d0+1 )
 ##aff_rel9 = (d0) -> (r0) : r0 - d0 >= 0, d0 - r0 + 1 >= 0

-func @count (%A : memref<128xf32>, %pos : i32) -> f32
+func.func @count (%A : memref<128xf32>, %pos : i32) -> f32
  reads: {%A ##aff_rel9 (%pos)}
  writes: /* empty */
  may_reads: /* empty */
@ -913,7 +913,7 @@ Example:
 ```mlir
 ##rel9 ( ) [s0] -> (r0, r1) : 0 <= r0 <= 1023, 0 <= r1 <= s0 - 1

-func @cblas_reduce_ffi(%M: memref<1024 x ? x f32, #layout_map0, /*mem=*/0>)
+func.func @cblas_reduce_ffi(%M: memref<1024 x ? x f32, #layout_map0, /*mem=*/0>)
  -> f32 [
  reads: {%M, ##rel9() }
  writes: /* empty */
@ -921,7 +921,7 @@ func @cblas_reduce_ffi(%M: memref<1024 x ? x f32, #layout_map0, /*mem=*/0>)
  may_writes: /* empty */
 ]

-func @dma_mem_to_scratchpad(%a : memref<1024 x f32, #layout_map0, /*mem=*/0>,
+func.func @dma_mem_to_scratchpad(%a : memref<1024 x f32, #layout_map0, /*mem=*/0>,
    %b : memref<1024 x f32, #layout_map0, 1>, %c : memref<1024 x f32,
    #layout_map0>) [
  reads: {%M, ##rel9() }
@ -986,7 +986,7 @@ Example:

 ```mlir
 // Return sum of elements in 1-dimensional mref A
-func i32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
+func.func i32 @sum(%A : memref<?xi32>, %N : i32) -> (i32) {
   %init = 0
   %result = affine.for %i = 0 to N with %tmp(%init) {
      %value = affine.load %A[%i]
@ -1016,7 +1016,7 @@ Example:

 ```mlir
 // Compute sum of half of the array
-func i32 @sum_half(%A : memref<?xi32>, %N : i32) -> (i32) {
+func.func i32 @sum_half(%A : memref<?xi32>, %N : i32) -> (i32) {
   %s0 = 0
   %s1 = affine.for %i = 1 ... N step 1 with %s2 (%s0) {
       %s3 = if (%i >= %N / 2) {
--- a/mlir/docs/SPIRVToLLVMDialectConversion.md
+++ b/mlir/docs/SPIRVToLLVMDialectConversion.md
@ -858,7 +858,7 @@ gpu.module @foo {
  }
 }

-func @main() {
+func.func @main() {
  // Fill the buffer with some data
  %buffer = memref.alloc : memref<8xi32>
  %data = ...
@ -881,7 +881,7 @@ spv.module @__spv__foo /*VCE triple and other metadata here*/ {
  spv.EntryPoint @bar, ...
 }

-func @main() {
+func.func @main() {
  // Fill the buffer with some data.
  %buffer = memref.alloc : memref<8xi32>
  %data = ...
--- a/mlir/docs/SymbolsAndSymbolTables.md
+++ b/mlir/docs/SymbolsAndSymbolTables.md
@ -78,7 +78,7 @@ Below is an example of how an operation can reference a symbol operation:

 ```mlir
 // This `func.func` operation defines a symbol named `symbol`.
-func @symbol()
+func.func @symbol()

 // Our `foo.user` operation contains a SymbolRefAttr with the name of the
 // `symbol` func.
@ -86,7 +86,7 @@ func @symbol()

 // Symbol references resolve to the nearest parent operation that defines a
 // symbol table, so we can have references with arbitrary nesting levels.
-func @other_symbol() {
+func.func @other_symbol() {
  affine.for %i0 = 0 to 10 {
    // Our `foo.user` operation resolves to the same `symbol` func as defined
    // above.
@ -107,7 +107,7 @@ module {
 // a symbol.
 module @module_symbol {
  // This `func.func` operation defines a symbol named `nested_symbol`.
-  func @nested_symbol()
+  func.func @nested_symbol()
 }

 // Our `foo.user` operation may refer to the nested symbol, by resolving through
@ -207,17 +207,17 @@ quote. A few examples of what this looks like in the IR are shown below:
 module @public_module {
  // This function can be accessed by 'live.user', but cannot be referenced
  // externally; all uses are known to reside within parent regions.
-  func nested @nested_function()
+  func.func nested @nested_function()

  // This function cannot be accessed outside of 'public_module'.
-  func private @private_function()
+  func.func private @private_function()
 }

 // This function can only be accessed from within the top-level module.
-func private @private_function()
+func.func private @private_function()

 // This function may be referenced externally.
-func @public_function()
+func.func @public_function()

 "live.user"() {uses = [
  @public_module::@nested_function,
--- a/mlir/docs/TargetLLVMIR.md
+++ b/mlir/docs/TargetLLVMIR.md
@ -301,10 +301,10 @@ defines and uses of the values being returned.
 Example:

 ```mlir
-func @foo(%arg0: i32, %arg1: i64) -> (i32, i64) {
+func.func @foo(%arg0: i32, %arg1: i64) -> (i32, i64) {
  return %arg0, %arg1 : i32, i64
 }
-func @bar() {
+func.func @bar() {
  %0 = arith.constant 42 : i32
  %1 = arith.constant 17 : i64
  %2:2 = call @foo(%0, %1) : (i32, i64) -> (i32, i64)
@ -360,7 +360,7 @@ aliasing attributes on the raw pointers underpinning the memref.
 Examples:

 ```mlir
-func @foo(%arg0: memref<?xf32>) -> () {
+func.func @foo(%arg0: memref<?xf32>) -> () {
  "use"(%arg0) : (memref<?xf32>) -> ()
  return
 }
@ -390,7 +390,7 @@ llvm.func @foo(%arg0: !llvm.ptr<f32>,  // Allocated pointer.
 ```

 ```mlir
-func @bar() {
+func.func @bar() {
  %0 = "get"() : () -> (memref<?xf32>)
  call @foo(%0) : (memref<?xf32>) -> ()
  return
@ -503,9 +503,9 @@ to the following.
 Examples:

 ```
-func @callee(memref<2x4xf32>) {
+func.func @callee(memref<2x4xf32>) {

-func @caller(%0 : memref<2x4xf32>) {
+func.func @caller(%0 : memref<2x4xf32>) {
  call @callee(%0) : (memref<2x4xf32>) -> ()
 }

@ -615,7 +615,7 @@ Examples:

 ```mlir

-func @qux(%arg0: memref<?x?xf32>)
+func.func @qux(%arg0: memref<?x?xf32>)

 // Gets converted into the following
 // (using type alias for brevity):
@ -659,7 +659,7 @@ llvm.func @_mlir_ciface_qux(!llvm.ptr<struct<(ptr<f32>, ptr<f32>, i64,
 ```

 ```mlir
-func @foo(%arg0: memref<?x?xf32>) {
+func.func @foo(%arg0: memref<?x?xf32>) {
  return
 }

@ -698,7 +698,7 @@ llvm.func @_mlir_ciface_foo(%arg0: !llvm.memref_2d_ptr) {
 ```

 ```mlir
-func @foo(%arg0: memref<?x?xf32>) -> memref<?x?xf32> {
+func.func @foo(%arg0: memref<?x?xf32>) -> memref<?x?xf32> {
  return %arg0 : memref<?x?xf32>
 }

--- a/mlir/docs/Tutorials/QuickstartRewrites.md
+++ b/mlir/docs/Tutorials/QuickstartRewrites.md
@ -275,7 +275,7 @@ the legalization pass test in TensorFlow Lite) such as:
 ```mlir
 // RUN: mlir-opt -tfl-legalize-tf %s | FileCheck %s

-func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> {
+func.func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> {
  %2 = "tf.LeakyRelu"(%arg0) {alpha: 0.1} : (tensor<1xf32>) -> tensor<1xf32>
  return %2: tensor<1xf32>

--- a/mlir/docs/Tutorials/Toy/Ch-2.md
+++ b/mlir/docs/Tutorials/Toy/Ch-2.md
@ -124,7 +124,7 @@ operation. For example, we could place our Toy operation from above into an
 related dialect:

 ```mlir
-func @toy_func(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> {
+func.func @toy_func(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> {
  %t_tensor = "toy.transpose"(%tensor) { inplace = true } : (tensor<2x3xf64>) -> tensor<3x2xf64>
  return %t_tensor : tensor<3x2xf64>
 }
@ -144,7 +144,7 @@ This handling can be observed by crafting what should be an invalid IR for Toy
 and seeing it round-trip without tripping the verifier:

 ```mlir
-func @main() {
+func.func @main() {
  %0 = "toy.print"() : () -> tensor<2x3xf64>
 }
 ```
--- a/mlir/docs/Tutorials/Toy/Ch-5.md
+++ b/mlir/docs/Tutorials/Toy/Ch-5.md
@ -243,7 +243,7 @@ toy.func @main() {
 With affine lowering added to our pipeline, we can now generate:

 ```mlir
-func @main() {
+func.func @main() {
  %cst = arith.constant 1.000000e+00 : f64
  %cst_0 = arith.constant 2.000000e+00 : f64
  %cst_1 = arith.constant 3.000000e+00 : f64
@ -301,7 +301,7 @@ help clean this up. Adding the `LoopFusion` and `MemRefDataFlowOpt` passes to
 the pipeline gives the following result:

 ```mlir
-func @main() {
+func.func @main() {
  %cst = arith.constant 1.000000e+00 : f64
  %cst_0 = arith.constant 2.000000e+00 : f64
  %cst_1 = arith.constant 3.000000e+00 : f64
--- a/mlir/examples/standalone/test/Standalone/dummy.mlir
+++ b/mlir/examples/standalone/test/Standalone/dummy.mlir
@ -2,7 +2,7 @@

 module {
    // CHECK-LABEL: func @bar()
-    func @bar() {
+    func.func @bar() {
        %0 = arith.constant 1 : i32
        // CHECK: %{{.*}} = standalone.foo %{{.*}} : i32
        %res = standalone.foo %0 : i32
--- a/mlir/examples/toy/Ch2/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch2/include/toy/Ops.td
@ -276,7 +276,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
    the operation. For example:

    ```mlir
-      func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf64> {
        ...
        toy.return %0 : tensor<2xf64>
      }
--- a/mlir/examples/toy/Ch3/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch3/include/toy/Ops.td
@ -278,7 +278,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
    the operation. For example:

    ```mlir
-      func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf64> {
        ...
        toy.return %0 : tensor<2xf64>
      }
--- a/mlir/examples/toy/Ch4/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch4/include/toy/Ops.td
@ -310,7 +310,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
    the operation. For example:

    ```mlir
-      func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf64> {
        ...
        toy.return %0 : tensor<2xf64>
      }
--- a/mlir/examples/toy/Ch5/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch5/include/toy/Ops.td
@ -311,7 +311,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
    the operation. For example:

    ```mlir
-      func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf64> {
        ...
        toy.return %0 : tensor<2xf64>
      }
--- a/mlir/examples/toy/Ch6/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch6/include/toy/Ops.td
@ -311,7 +311,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
    the operation. For example:

    ```mlir
-      func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf64> {
        ...
        toy.return %0 : tensor<2xf64>
      }
--- a/mlir/examples/toy/Ch7/include/toy/Ops.td
+++ b/mlir/examples/toy/Ch7/include/toy/Ops.td
@ -331,7 +331,7 @@ def ReturnOp : Toy_Op<"return", [NoSideEffect, HasParent<"FuncOp">,
    the operation. For example:

    ```mlir
-      func @foo() -> tensor<2xf64> {
+      toy.func @foo() -> tensor<2xf64> {
        ...
        toy.return %0 : tensor<2xf64>
      }
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SDDMM.py
@ -24,37 +24,37 @@ def sddmm_dsl(
    B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
    S=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N),
    C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)):
-  C[dsl.D.m,
-    dsl.D.n] += S[dsl.D.m, dsl.D.n] * A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
+    C[dsl.D.m,
+      dsl.D.n] += S[dsl.D.m, dsl.D.n] * A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]


 def build_SDDMM(attr: st.EncodingAttr):
-  """Build SDDMM kernel.
+    """Build SDDMM kernel.

  This method generates a linalg op with for matrix multiplication using
  just the Python API. Effectively, a generic linalg op is constructed
  that computes C(i,j) += S(i,j) SUM_k A(i,k) B(k,j) for sparse S.
  """
-  module = ir.Module.create()
-  f64 = ir.F64Type.get()
-  a = ir.RankedTensorType.get([8, 8], f64)
-  b = ir.RankedTensorType.get([8, 8], f64)
-  c = ir.RankedTensorType.get([8, 8], f64)
-  s = ir.RankedTensorType.get([8, 8], f64, attr)
-  arguments = [a, b, s, c]
-  with ir.InsertionPoint(module.body):
+    module = ir.Module.create()
+    f64 = ir.F64Type.get()
+    a = ir.RankedTensorType.get([8, 8], f64)
+    b = ir.RankedTensorType.get([8, 8], f64)
+    c = ir.RankedTensorType.get([8, 8], f64)
+    s = ir.RankedTensorType.get([8, 8], f64, attr)
+    arguments = [a, b, s, c]
+    with ir.InsertionPoint(module.body):

-    @func.FuncOp.from_py_func(*arguments)
-    def sddmm(*args):
-      return sddmm_dsl(args[0], args[1], args[2], outs=[args[3]])
+        @func.FuncOp.from_py_func(*arguments)
+        def sddmm(*args):
+            return sddmm_dsl(args[0], args[1], args[2], outs=[args[3]])

-  return module
+    return module


 def boilerplate(attr: st.EncodingAttr):
-  """Returns boilerplate code for main driver."""
-  return f"""
-func @main(%a: tensor<8x8xf64>,
+    """Returns boilerplate code for main driver."""
+    return f"""
+func.func @main(%a: tensor<8x8xf64>,
           %b: tensor<8x8xf64>,
           %c: tensor<8x8xf64>) -> tensor<8x8xf64> attributes {{ llvm.emit_c_interface }} {{
  %t = arith.constant sparse<[[0,0], [0,2], [4,1]], [1.0, 2.0, 3.0]> : tensor<8x8xf64>
@ -69,96 +69,96 @@ func @main(%a: tensor<8x8xf64>,


 def build_compile_and_run_SDDMMM(attr: st.EncodingAttr, compiler):
-  # Build.
-  module = build_SDDMM(attr)
-  func = str(module.operation.regions[0].blocks[0].operations[0].operation)
-  module = ir.Module.parse(func + boilerplate(attr))
+    # Build.
+    module = build_SDDMM(attr)
+    func = str(module.operation.regions[0].blocks[0].operations[0].operation)
+    module = ir.Module.parse(func + boilerplate(attr))

-  # Compile.
-  engine = compiler.compile_and_jit(module)
+    # Compile.
+    engine = compiler.compile_and_jit(module)

-  # Set up numpy input and buffer for output.
-  a = np.array([[1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1],
-                [1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2],
-                [1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3],
-                [1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4],
-                [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5],
-                [1.6, 2.6, 3.6, 4.6, 5.6, 6.6, 7.6, 8.6],
-                [1.7, 2.7, 3.7, 4.7, 5.7, 6.7, 7.7, 8.7],
-                [1.8, 2.8, 3.8, 4.8, 5.8, 6.8, 7.8, 8.8]], np.float64)
-  b = np.ones((8, 8), np.float64)
-  c = np.zeros((8, 8), np.float64)
+    # Set up numpy input and buffer for output.
+    a = np.array([[1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1],
+                  [1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2, 8.2],
+                  [1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3],
+                  [1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4],
+                  [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5],
+                  [1.6, 2.6, 3.6, 4.6, 5.6, 6.6, 7.6, 8.6],
+                  [1.7, 2.7, 3.7, 4.7, 5.7, 6.7, 7.7, 8.7],
+                  [1.8, 2.8, 3.8, 4.8, 5.8, 6.8, 7.8, 8.8]], np.float64)
+    b = np.ones((8, 8), np.float64)
+    c = np.zeros((8, 8), np.float64)

-  mem_a = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(a)))
-  mem_b = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(b)))
-  mem_c = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(c)))
+    mem_a = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(a)))
+    mem_b = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(b)))
+    mem_c = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(c)))

-  # Allocate a MemRefDescriptor to receive the output tensor.
-  # The buffer itself is allocated inside the MLIR code generation.
-  ref_out = rt.make_nd_memref_descriptor(2, ctypes.c_double)()
-  mem_out = ctypes.pointer(ctypes.pointer(ref_out))
+    # Allocate a MemRefDescriptor to receive the output tensor.
+    # The buffer itself is allocated inside the MLIR code generation.
+    ref_out = rt.make_nd_memref_descriptor(2, ctypes.c_double)()
+    mem_out = ctypes.pointer(ctypes.pointer(ref_out))

-  # Invoke the kernel and get numpy output.
-  # Built-in bufferization uses in-out buffers.
-  # TODO: replace with inplace comprehensive bufferization.
-  engine.invoke('main', mem_out, mem_a, mem_b, mem_c)
+    # Invoke the kernel and get numpy output.
+    # Built-in bufferization uses in-out buffers.
+    # TODO: replace with inplace comprehensive bufferization.
+    engine.invoke('main', mem_out, mem_a, mem_b, mem_c)

-  # Sanity check on computed result. Only a few elements
-  # are sampled from the full dense matrix multiplication.
-  full_matmul = np.matmul(a, b)
-  expected = np.zeros((8, 8), np.float64)
-  expected[0, 0] = 1.0 * full_matmul[0, 0]
-  expected[0, 2] = 2.0 * full_matmul[0, 2]
-  expected[4, 1] = 3.0 * full_matmul[4, 1]
-  c = rt.ranked_memref_to_numpy(mem_out[0])
-  if np.allclose(c, expected):
-    pass
-  else:
-    quit(f'FAILURE')
+    # Sanity check on computed result. Only a few elements
+    # are sampled from the full dense matrix multiplication.
+    full_matmul = np.matmul(a, b)
+    expected = np.zeros((8, 8), np.float64)
+    expected[0, 0] = 1.0 * full_matmul[0, 0]
+    expected[0, 2] = 2.0 * full_matmul[0, 2]
+    expected[4, 1] = 3.0 * full_matmul[4, 1]
+    c = rt.ranked_memref_to_numpy(mem_out[0])
+    if np.allclose(c, expected):
+        pass
+    else:
+        quit(f'FAILURE')


 def main():
-  support_lib = os.getenv('SUPPORT_LIB')
-  assert support_lib is not None, 'SUPPORT_LIB is undefined'
-  if not os.path.exists(support_lib):
-    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
-                            support_lib)
+    support_lib = os.getenv('SUPPORT_LIB')
+    assert support_lib is not None, 'SUPPORT_LIB is undefined'
+    if not os.path.exists(support_lib):
+        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
+                                support_lib)

-  # CHECK-LABEL: TEST: testSDDMMM
-  print('\nTEST: testSDDMMM')
-  with ir.Context() as ctx, ir.Location.unknown():
-    count = 0
-    # Loop over various ways to compile and annotate the SDDMM kernel with
-    # a *single* sparse tensor. Note that we deliberate do not exhaustively
-    # search the full state space to reduce runtime of the test. It is
-    # straightforward to adapt the code below to explore more combinations.
-    levels = [[st.DimLevelType.dense, st.DimLevelType.dense],
-              [st.DimLevelType.dense, st.DimLevelType.compressed],
-              [st.DimLevelType.compressed, st.DimLevelType.dense],
-              [st.DimLevelType.compressed, st.DimLevelType.compressed]]
-    orderings = [
-        ir.AffineMap.get_permutation([0, 1]),
-        ir.AffineMap.get_permutation([1, 0])
-    ]
-    for level in levels:
-      for ordering in orderings:
-        for pwidth in [32]:
-          for iwidth in [32]:
-            for par in [0]:
-              for vec in [0, 1]:
-                for e in [True]:
-                  vl = 1 if vec == 0 else 16
-                  attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth)
-                  opt = (f'parallelization-strategy={par} '
-                         f'vectorization-strategy={vec} '
-                         f'vl={vl} enable-simd-index32={e}')
-                  compiler = sparse_compiler.SparseCompiler(
-                      options=opt, opt_level=0, shared_libs=[support_lib])
-                  build_compile_and_run_SDDMMM(attr, compiler)
-                  count = count + 1
-  # CHECK: Passed 16 tests
-  print('Passed ', count, 'tests')
+    # CHECK-LABEL: TEST: testSDDMMM
+    print('\nTEST: testSDDMMM')
+    with ir.Context() as ctx, ir.Location.unknown():
+        count = 0
+        # Loop over various ways to compile and annotate the SDDMM kernel with
+        # a *single* sparse tensor. Note that we deliberate do not exhaustively
+        # search the full state space to reduce runtime of the test. It is
+        # straightforward to adapt the code below to explore more combinations.
+        levels = [[st.DimLevelType.dense, st.DimLevelType.dense],
+                  [st.DimLevelType.dense, st.DimLevelType.compressed],
+                  [st.DimLevelType.compressed, st.DimLevelType.dense],
+                  [st.DimLevelType.compressed, st.DimLevelType.compressed]]
+        orderings = [
+            ir.AffineMap.get_permutation([0, 1]),
+            ir.AffineMap.get_permutation([1, 0])
+        ]
+        for level in levels:
+            for ordering in orderings:
+                for pwidth in [32]:
+                    for iwidth in [32]:
+                        for par in [0]:
+                            for vec in [0, 1]:
+                                for e in [True]:
+                                    vl = 1 if vec == 0 else 16
+                                    attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth)
+                                    opt = (f'parallelization-strategy={par} '
+                                           f'vectorization-strategy={vec} '
+                                           f'vl={vl} enable-simd-index32={e}')
+                                    compiler = sparse_compiler.SparseCompiler(
+                                        options=opt, opt_level=0, shared_libs=[support_lib])
+                                    build_compile_and_run_SDDMMM(attr, compiler)
+                                    count = count + 1
+    # CHECK: Passed 16 tests
+    print('Passed ', count, 'tests')


 if __name__ == '__main__':
-  main()
+    main()
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_SpMM.py
@ -23,41 +23,41 @@ def matmul_dsl(
    A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
    B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
    C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)):
-  C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
+    C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]


 def build_SpMM(attr: st.EncodingAttr):
-  """Build SpMM kernel.
+    """Build SpMM kernel.

  This method generates a linalg op with for matrix multiplication using
  just the Python API. Effectively, a generic linalg op is constructed
  that computes C(i,j) += A(i,k) * B(k,j) for annotated matrix A.
  """
-  module = ir.Module.create()
-  f64 = ir.F64Type.get()
-  a = ir.RankedTensorType.get([3, 4], f64, attr)
-  b = ir.RankedTensorType.get([4, 2], f64)
-  c = ir.RankedTensorType.get([3, 2], f64)
-  arguments = [a, b, c]
-  with ir.InsertionPoint(module.body):
+    module = ir.Module.create()
+    f64 = ir.F64Type.get()
+    a = ir.RankedTensorType.get([3, 4], f64, attr)
+    b = ir.RankedTensorType.get([4, 2], f64)
+    c = ir.RankedTensorType.get([3, 2], f64)
+    arguments = [a, b, c]
+    with ir.InsertionPoint(module.body):

-    @func.FuncOp.from_py_func(*arguments)
-    def spMxM(*args):
-      return matmul_dsl(args[0], args[1], outs=[args[2]])
+        @func.FuncOp.from_py_func(*arguments)
+        def spMxM(*args):
+            return matmul_dsl(args[0], args[1], outs=[args[2]])

-  return module
+    return module


 def boilerplate(attr: st.EncodingAttr):
-  """Returns boilerplate main method.
+    """Returns boilerplate main method.

  This method sets up a boilerplate main method that takes three tensors
  (a, b, c), converts the first tensor a into s sparse tensor, and then
  calls the sparse kernel for matrix multiplication. For convenience,
  this part is purely done as string input.
  """
-  return f"""
-func @main(%ad: tensor<3x4xf64>, %b: tensor<4x2xf64>, %c: tensor<3x2xf64>) -> tensor<3x2xf64>
+    return f"""
+func.func @main(%ad: tensor<3x4xf64>, %b: tensor<4x2xf64>, %c: tensor<3x2xf64>) -> tensor<3x2xf64>
  attributes {{ llvm.emit_c_interface }} {{
  %a = sparse_tensor.convert %ad : tensor<3x4xf64> to tensor<3x4xf64, {attr}>
  %0 = call @spMxM(%a, %b, %c) : (tensor<3x4xf64, {attr}>,
@ -69,85 +69,85 @@ func @main(%ad: tensor<3x4xf64>, %b: tensor<4x2xf64>, %c: tensor<3x2xf64>) -> te


 def build_compile_and_run_SpMM(attr: st.EncodingAttr, compiler):
-  # Build.
-  module = build_SpMM(attr)
-  func = str(module.operation.regions[0].blocks[0].operations[0].operation)
-  module = ir.Module.parse(func + boilerplate(attr))
+    # Build.
+    module = build_SpMM(attr)
+    func = str(module.operation.regions[0].blocks[0].operations[0].operation)
+    module = ir.Module.parse(func + boilerplate(attr))

-  # Compile.
-  engine = compiler.compile_and_jit(module)
+    # Compile.
+    engine = compiler.compile_and_jit(module)

-  # Set up numpy input and buffer for output.
-  a = np.array(
-      [[1.1, 0.0, 0.0, 1.4], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.3, 0.0]],
-      np.float64)
-  b = np.array([[1.0, 2.0], [4.0, 3.0], [5.0, 6.0], [8.0, 7.0]], np.float64)
-  c = np.zeros((3, 2), np.float64)
+    # Set up numpy input and buffer for output.
+    a = np.array(
+        [[1.1, 0.0, 0.0, 1.4], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 3.3, 0.0]],
+        np.float64)
+    b = np.array([[1.0, 2.0], [4.0, 3.0], [5.0, 6.0], [8.0, 7.0]], np.float64)
+    c = np.zeros((3, 2), np.float64)

-  mem_a = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(a)))
-  mem_b = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(b)))
-  mem_c = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(c)))
-  # Allocate a MemRefDescriptor to receive the output tensor.
-  # The buffer itself is allocated inside the MLIR code generation.
-  ref_out = rt.make_nd_memref_descriptor(2, ctypes.c_double)()
-  mem_out = ctypes.pointer(ctypes.pointer(ref_out))
+    mem_a = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(a)))
+    mem_b = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(b)))
+    mem_c = ctypes.pointer(ctypes.pointer(rt.get_ranked_memref_descriptor(c)))
+    # Allocate a MemRefDescriptor to receive the output tensor.
+    # The buffer itself is allocated inside the MLIR code generation.
+    ref_out = rt.make_nd_memref_descriptor(2, ctypes.c_double)()
+    mem_out = ctypes.pointer(ctypes.pointer(ref_out))

-  # Invoke the kernel and get numpy output.
-  # Built-in bufferization uses in-out buffers.
-  # TODO: replace with inplace comprehensive bufferization.
-  engine.invoke('main', mem_out, mem_a, mem_b, mem_c)
+    # Invoke the kernel and get numpy output.
+    # Built-in bufferization uses in-out buffers.
+    # TODO: replace with inplace comprehensive bufferization.
+    engine.invoke('main', mem_out, mem_a, mem_b, mem_c)

-  # Sanity check on computed result.
-  expected = np.matmul(a, b);
-  c = rt.ranked_memref_to_numpy(mem_out[0])
-  if np.allclose(c, expected):
-    pass
-  else:
-    quit(f'FAILURE')
+    # Sanity check on computed result.
+    expected = np.matmul(a, b);
+    c = rt.ranked_memref_to_numpy(mem_out[0])
+    if np.allclose(c, expected):
+        pass
+    else:
+        quit(f'FAILURE')


 def main():
-  support_lib = os.getenv('SUPPORT_LIB')
-  assert support_lib is not None, 'SUPPORT_LIB is undefined'
-  if not os.path.exists(support_lib):
-    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), support_lib)
+    support_lib = os.getenv('SUPPORT_LIB')
+    assert support_lib is not None, 'SUPPORT_LIB is undefined'
+    if not os.path.exists(support_lib):
+        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), support_lib)

-  # CHECK-LABEL: TEST: testSpMM
-  print('\nTEST: testSpMM')
-  with ir.Context() as ctx, ir.Location.unknown():
-    count = 0
-    # Loop over various ways to compile and annotate the SpMM kernel with
-    # a *single* sparse tensor. Note that we deliberate do not exhaustively
-    # search the full state space to reduce runtime of the test. It is
-    # straightforward to adapt the code below to explore more combinations.
-    par = 0
-    vec = 0
-    vl = 1
-    e = False
-    opt = (f'parallelization-strategy={par} '
-           f'vectorization-strategy={vec} '
-           f'vl={vl} enable-simd-index32={e}')
-    levels = [[st.DimLevelType.dense, st.DimLevelType.dense],
-              [st.DimLevelType.dense, st.DimLevelType.compressed],
-              [st.DimLevelType.compressed, st.DimLevelType.dense],
-              [st.DimLevelType.compressed, st.DimLevelType.compressed]]
-    orderings = [
-        ir.AffineMap.get_permutation([0, 1]),
-        ir.AffineMap.get_permutation([1, 0])
-    ]
-    bitwidths = [0]
-    compiler = sparse_compiler.SparseCompiler(
-        options=opt, opt_level=0, shared_libs=[support_lib])
-    for level in levels:
-      for ordering in orderings:
-        for pwidth in bitwidths:
-          for iwidth in bitwidths:
-            attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth)
-            build_compile_and_run_SpMM(attr, compiler)
-            count = count + 1
-    # CHECK: Passed 8 tests
-    print('Passed ', count, 'tests')
+    # CHECK-LABEL: TEST: testSpMM
+    print('\nTEST: testSpMM')
+    with ir.Context() as ctx, ir.Location.unknown():
+        count = 0
+        # Loop over various ways to compile and annotate the SpMM kernel with
+        # a *single* sparse tensor. Note that we deliberate do not exhaustively
+        # search the full state space to reduce runtime of the test. It is
+        # straightforward to adapt the code below to explore more combinations.
+        par = 0
+        vec = 0
+        vl = 1
+        e = False
+        opt = (f'parallelization-strategy={par} '
+               f'vectorization-strategy={vec} '
+               f'vl={vl} enable-simd-index32={e}')
+        levels = [[st.DimLevelType.dense, st.DimLevelType.dense],
+                  [st.DimLevelType.dense, st.DimLevelType.compressed],
+                  [st.DimLevelType.compressed, st.DimLevelType.dense],
+                  [st.DimLevelType.compressed, st.DimLevelType.compressed]]
+        orderings = [
+            ir.AffineMap.get_permutation([0, 1]),
+            ir.AffineMap.get_permutation([1, 0])
+        ]
+        bitwidths = [0]
+        compiler = sparse_compiler.SparseCompiler(
+            options=opt, opt_level=0, shared_libs=[support_lib])
+        for level in levels:
+            for ordering in orderings:
+                for pwidth in bitwidths:
+                    for iwidth in bitwidths:
+                        attr = st.EncodingAttr.get(level, ordering, pwidth, iwidth)
+                        build_compile_and_run_SpMM(attr, compiler)
+                        count = count + 1
+        # CHECK: Passed 8 tests
+        print('Passed ', count, 'tests')


 if __name__ == '__main__':
-  main()
+    main()
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
@ -33,7 +33,7 @@ _KERNEL_STR = """
  doc = "X(i,j) = A(i,j) + B(i,j)"
 }

-func @sparse_add_elt(
+func.func @sparse_add_elt(
    %arga: tensor<3x4xf64, #DCSR>, %argb: tensor<3x4xf64, #DCSR>) -> tensor<3x4xf64, #DCSR> {
  %c3 = arith.constant 3 : index
  %c4 = arith.constant 4 : index
@ -48,7 +48,7 @@ func @sparse_add_elt(
  return %0 : tensor<3x4xf64, #DCSR>
 }

-func @main(%ad: tensor<3x4xf64>, %bd: tensor<3x4xf64>) -> tensor<3x4xf64, #DCSR>
+func.func @main(%ad: tensor<3x4xf64>, %bd: tensor<3x4xf64>) -> tensor<3x4xf64, #DCSR>
  attributes { llvm.emit_c_interface } {
  %a = sparse_tensor.convert %ad : tensor<3x4xf64> to tensor<3x4xf64, #DCSR>
  %b = sparse_tensor.convert %bd : tensor<3x4xf64> to tensor<3x4xf64, #DCSR>
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py
@ -18,9 +18,9 @@ from tools import sparse_compiler

 # TODO: move more into actual IR building.
 def boilerplate(attr: st.EncodingAttr):
-  """Returns boilerplate main method."""
-  return f"""
-func @main(%p : !llvm.ptr<i8>) -> () attributes {{ llvm.emit_c_interface }} {{
+    """Returns boilerplate main method."""
+    return f"""
+func.func @main(%p : !llvm.ptr<i8>) -> () attributes {{ llvm.emit_c_interface }} {{
  %d = arith.constant sparse<[[0, 0], [1, 1], [0, 9], [9, 0], [4, 4]],
                             [1.0, 2.0, 3.0, 4.0, 5.0]> : tensor<10x10xf64>
  %a = sparse_tensor.convert %d : tensor<10x10xf64> to tensor<10x10xf64, {attr}>
@ -31,13 +31,13 @@ func @main(%p : !llvm.ptr<i8>) -> () attributes {{ llvm.emit_c_interface }} {{


 def expected():
-  """Returns expected contents of output.
+    """Returns expected contents of output.

  Regardless of the dimension ordering, compression, and bitwidths that are
  used in the sparse tensor, the output is always lexicographically sorted
  by natural index order.
  """
-  return f"""; extended FROSTT format
+    return f"""; extended FROSTT format
 2 5
 10 10
 1 1 1
@ -49,53 +49,53 @@ def expected():


 def build_compile_and_run_output(attr: st.EncodingAttr, compiler):
-  # Build and Compile.
-  module = ir.Module.parse(boilerplate(attr))
-  engine = compiler.compile_and_jit(module)
+    # Build and Compile.
+    module = ir.Module.parse(boilerplate(attr))
+    engine = compiler.compile_and_jit(module)

-  # Invoke the kernel and compare output.
-  with tempfile.TemporaryDirectory() as test_dir:
-    out = os.path.join(test_dir, 'out.tns')
-    buf = out.encode('utf-8')
-    mem_a = ctypes.pointer(ctypes.pointer(ctypes.create_string_buffer(buf)))
-    engine.invoke('main', mem_a)
+    # Invoke the kernel and compare output.
+    with tempfile.TemporaryDirectory() as test_dir:
+        out = os.path.join(test_dir, 'out.tns')
+        buf = out.encode('utf-8')
+        mem_a = ctypes.pointer(ctypes.pointer(ctypes.create_string_buffer(buf)))
+        engine.invoke('main', mem_a)

-    actual = open(out).read()
-    if actual != expected():
-      quit('FAILURE')
+        actual = open(out).read()
+        if actual != expected():
+            quit('FAILURE')


 def main():
-  support_lib = os.getenv('SUPPORT_LIB')
-  assert support_lib is not None, 'SUPPORT_LIB is undefined'
-  if not os.path.exists(support_lib):
-    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
-                            support_lib)
+    support_lib = os.getenv('SUPPORT_LIB')
+    assert support_lib is not None, 'SUPPORT_LIB is undefined'
+    if not os.path.exists(support_lib):
+        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
+                                support_lib)

-  # CHECK-LABEL: TEST: test_output
-  print('\nTEST: test_output')
-  count = 0
-  with ir.Context() as ctx, ir.Location.unknown():
-    # Loop over various sparse types: CSR, DCSR, CSC, DCSC.
-    levels = [[st.DimLevelType.dense, st.DimLevelType.compressed],
-              [st.DimLevelType.compressed, st.DimLevelType.compressed]]
-    orderings = [
-        ir.AffineMap.get_permutation([0, 1]),
-        ir.AffineMap.get_permutation([1, 0])
-    ]
-    bitwidths = [8, 16, 32, 64]
-    compiler = sparse_compiler.SparseCompiler(
-        options='', opt_level=2, shared_libs=[support_lib])
-    for level in levels:
-      for ordering in orderings:
-        for bwidth in bitwidths:
-          attr = st.EncodingAttr.get(level, ordering, bwidth, bwidth)
-          build_compile_and_run_output(attr, compiler)
-          count = count + 1
+    # CHECK-LABEL: TEST: test_output
+    print('\nTEST: test_output')
+    count = 0
+    with ir.Context() as ctx, ir.Location.unknown():
+        # Loop over various sparse types: CSR, DCSR, CSC, DCSC.
+        levels = [[st.DimLevelType.dense, st.DimLevelType.compressed],
+                  [st.DimLevelType.compressed, st.DimLevelType.compressed]]
+        orderings = [
+            ir.AffineMap.get_permutation([0, 1]),
+            ir.AffineMap.get_permutation([1, 0])
+        ]
+        bitwidths = [8, 16, 32, 64]
+        compiler = sparse_compiler.SparseCompiler(
+            options='', opt_level=2, shared_libs=[support_lib])
+        for level in levels:
+            for ordering in orderings:
+                for bwidth in bitwidths:
+                    attr = st.EncodingAttr.get(level, ordering, bwidth, bwidth)
+                    build_compile_and_run_output(attr, compiler)
+                    count = count + 1

-  # CHECK: Passed 16 tests
-  print('Passed', count, 'tests')
+    # CHECK: Passed 16 tests
+    print('Passed', count, 'tests')


 if __name__ == '__main__':
-  main()
+    main()
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
@ -37,29 +37,29 @@ _ENTRY_NAME = "main"

@functools.lru_cache()
 def _get_support_lib_name() -> str:
-  """Gets the string name for the supporting C shared library."""
-  return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB)
+    """Gets the string name for the supporting C shared library."""
+    return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB)


@functools.lru_cache()
 def _get_sparse_compiler() -> mlir_sparse_compiler.SparseCompiler:
-  """Gets the MLIR sparse compiler with default setting."""
-  return mlir_sparse_compiler.SparseCompiler(
-      options="", opt_level=_OPT_LEVEL, shared_libs=[_get_support_lib_name()])
+    """Gets the MLIR sparse compiler with default setting."""
+    return mlir_sparse_compiler.SparseCompiler(
+        options="", opt_level=_OPT_LEVEL, shared_libs=[_get_support_lib_name()])


 def _record_support_funcs(
    ty: np.dtype, to_func: _SupportFunc, from_func: _SupportFunc,
    ty_to_funcs: Dict[np.dtype, Tuple[_SupportFunc, _SupportFunc]]) -> None:
-  """Records the two supporting functions for a given data type."""
-  to_func.restype = ctypes.c_void_p
-  from_func.restype = ctypes.c_void_p
-  ty_to_funcs[ty] = (to_func, from_func)
+    """Records the two supporting functions for a given data type."""
+    to_func.restype = ctypes.c_void_p
+    from_func.restype = ctypes.c_void_p
+    ty_to_funcs[ty] = (to_func, from_func)


@functools.lru_cache()
 def _get_support_func_locator() -> _SupportFuncLocator:
-  """Constructs a function to locate the supporting functions for a data type.
+    """Constructs a function to locate the supporting functions for a data type.

  Loads the supporting C shared library with the needed routines. Constructs a
  dictionary from the supported data types to the routines for the data types,
@ -75,36 +75,36 @@ def _get_support_func_locator() -> _SupportFuncLocator:
    OSError: If there is any problem in loading the shared library.
    ValueError: If the shared library doesn't contain the needed routines.
  """
-  # This raises OSError exception if there is any problem in loading the shared
-  # library.
-  c_lib = ctypes.CDLL(_get_support_lib_name())
+    # This raises OSError exception if there is any problem in loading the shared
+    # library.
+    c_lib = ctypes.CDLL(_get_support_lib_name())

-  type_to_funcs = {}
-  try:
-    _record_support_funcs(np.float32, c_lib.convertToMLIRSparseTensorF32,
-                          c_lib.convertFromMLIRSparseTensorF32, type_to_funcs)
-  except Exception as e:
-    raise ValueError(f"Missing supporting function: {e}") from e
+    type_to_funcs = {}
+    try:
+        _record_support_funcs(np.float32, c_lib.convertToMLIRSparseTensorF32,
+                              c_lib.convertFromMLIRSparseTensorF32, type_to_funcs)
+    except Exception as e:
+        raise ValueError(f"Missing supporting function: {e}") from e

-  try:
-    _record_support_funcs(np.float64, c_lib.convertToMLIRSparseTensorF64,
-                          c_lib.convertFromMLIRSparseTensorF64, type_to_funcs)
-  except Exception as e:
-    raise ValueError(f"Missing supporting function: {e}") from e
+    try:
+        _record_support_funcs(np.float64, c_lib.convertToMLIRSparseTensorF64,
+                              c_lib.convertFromMLIRSparseTensorF64, type_to_funcs)
+    except Exception as e:
+        raise ValueError(f"Missing supporting function: {e}") from e

-  def get_support_funcs(ty: np.dtype):
-    funcs = type_to_funcs[ty]
-    assert funcs is not None
-    return funcs
+    def get_support_funcs(ty: np.dtype):
+        funcs = type_to_funcs[ty]
+        assert funcs is not None
+        return funcs

-  return get_support_funcs
+    return get_support_funcs


 def sparse_tensor_to_coo_tensor(
    sparse_tensor: ctypes.c_void_p,
    dtype: np.dtype,
 ) -> Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]:
-  """Converts an MLIR sparse tensor to a COO-flavored format tensor.
+    """Converts an MLIR sparse tensor to a COO-flavored format tensor.

  Args:
     sparse_tensor: A ctypes.c_void_p to the MLIR sparse tensor descriptor.
@ -124,26 +124,26 @@ def sparse_tensor_to_coo_tensor(
    OSError: If there is any problem in loading the shared library.
    ValueError: If the shared library doesn't contain the needed routines.
  """
-  convert_from = _get_support_func_locator()(dtype)[1]
-  rank = ctypes.c_ulonglong(0)
-  nse = ctypes.c_ulonglong(0)
-  shape = ctypes.POINTER(ctypes.c_ulonglong)()
-  values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))()
-  indices = ctypes.POINTER(ctypes.c_ulonglong)()
-  convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse),
-               ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices))
+    convert_from = _get_support_func_locator()(dtype)[1]
+    rank = ctypes.c_ulonglong(0)
+    nse = ctypes.c_ulonglong(0)
+    shape = ctypes.POINTER(ctypes.c_ulonglong)()
+    values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))()
+    indices = ctypes.POINTER(ctypes.c_ulonglong)()
+    convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse),
+                 ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices))

-  # Convert the returned values to the corresponding numpy types.
-  shape = np.ctypeslib.as_array(shape, shape=[rank.value])
-  values = np.ctypeslib.as_array(values, shape=[nse.value])
-  indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value])
-  return rank.value, nse.value, shape, values, indices
+    # Convert the returned values to the corresponding numpy types.
+    shape = np.ctypeslib.as_array(shape, shape=[rank.value])
+    values = np.ctypeslib.as_array(values, shape=[nse.value])
+    indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value])
+    return rank.value, nse.value, shape, values, indices


 def coo_tensor_to_sparse_tensor(np_shape: np.ndarray, np_values: np.ndarray,
                                np_indices: np.ndarray, np_perm: np.ndarray,
                                np_sparse: np.ndarray) -> int:
-  """Converts a COO-flavored format sparse tensor to an MLIR sparse tensor.
+    """Converts a COO-flavored format sparse tensor to an MLIR sparse tensor.

  Args:
     np_shape: A 1D numpy array of integers, for the shape of the tensor.
@ -164,26 +164,26 @@ def coo_tensor_to_sparse_tensor(np_shape: np.ndarray, np_values: np.ndarray,
    ValueError: If the shared library doesn't contain the needed routines.
  """

-  r = len(np_shape)
-  rank = ctypes.c_ulonglong(r)
-  nse = ctypes.c_ulonglong(len(np_values))
-  shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
-  values = np_values.ctypes.data_as(
-      ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype)))
-  indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
+    r = len(np_shape)
+    rank = ctypes.c_ulonglong(r)
+    nse = ctypes.c_ulonglong(len(np_values))
+    shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
+    values = np_values.ctypes.data_as(
+        ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype)))
+    indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))

-  perm = np_perm.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
-  sparse = np_sparse.ctypes.data_as(ctypes.POINTER(ctypes.c_uint8))
+    perm = np_perm.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
+    sparse = np_sparse.ctypes.data_as(ctypes.POINTER(ctypes.c_uint8))

-  convert_to = _get_support_func_locator()(np_values.dtype.type)[0]
-  ptr = convert_to(rank, nse, shape, values, indices, perm, sparse)
-  assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64"
-  return ptr
+    convert_to = _get_support_func_locator()(np_values.dtype.type)[0]
+    ptr = convert_to(rank, nse, shape, values, indices, perm, sparse)
+    assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64"
+    return ptr


 def compile_and_build_engine(
    module: ir.Module) -> execution_engine.ExecutionEngine:
-  """Compiles an MLIR module and builds a JIT execution engine.
+    """Compiles an MLIR module and builds a JIT execution engine.

  Args:
    module: The MLIR module.
@ -192,22 +192,22 @@ def compile_and_build_engine(
    A JIT execution engine for the MLIR module.

  """
-  return _get_sparse_compiler().compile_and_jit(module)
+    return _get_sparse_compiler().compile_and_jit(module)


 class _SparseTensorDescriptor(ctypes.Structure):
-  """A C structure for an MLIR sparse tensor."""
-  _fields_ = [
-      # A pointer for the MLIR sparse tensor storage.
-      ("storage", ctypes.POINTER(ctypes.c_ulonglong)),
-      # An MLIR MemRef descriptor for the shape of the sparse tensor.
-      ("shape", runtime.make_nd_memref_descriptor(1, ctypes.c_ulonglong)),
-  ]
+    """A C structure for an MLIR sparse tensor."""
+    _fields_ = [
+        # A pointer for the MLIR sparse tensor storage.
+        ("storage", ctypes.POINTER(ctypes.c_ulonglong)),
+        # An MLIR MemRef descriptor for the shape of the sparse tensor.
+        ("shape", runtime.make_nd_memref_descriptor(1, ctypes.c_ulonglong)),
+    ]


 def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str:
-  """Produces the MLIR text code to output the size for the given dimension."""
-  return f"""
+    """Produces the MLIR text code to output the size for the given dimension."""
+    return f"""
  %c{dim} = arith.constant {dim} : index
  %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}x{type}, #enc>
  memref.store %d{dim}, %b[%c{dim}] : memref<{rank}xindex>
@ -222,30 +222,30 @@ def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str:
 #     when tensor.dim supports non-constant dimension value.
 def _get_create_sparse_tensor_kernel(
    sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
-  """Creates an MLIR text kernel to contruct a sparse tensor from a file.
+    """Creates an MLIR text kernel to contruct a sparse tensor from a file.

  The kernel returns a _SparseTensorDescriptor structure.
  """
-  rank = len(sparsity_codes)
+    rank = len(sparsity_codes)

-  # Use ? to represent a dimension in the dynamic shape string representation.
-  shape = "x".join(map(lambda d: "?", range(rank)))
+    # Use ? to represent a dimension in the dynamic shape string representation.
+    shape = "x".join(map(lambda d: "?", range(rank)))

-  # Convert the encoded sparsity values to a string representation.
-  sparsity = ", ".join(
-      map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes))
+    # Convert the encoded sparsity values to a string representation.
+    sparsity = ", ".join(
+        map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes))

-  # Get the MLIR text code to write the dimension sizes to the output buffer.
-  output_dims = "\n".join(
-      map(lambda d: _output_one_dim(d, rank, shape, type), range(rank)))
+    # Get the MLIR text code to write the dimension sizes to the output buffer.
+    output_dims = "\n".join(
+        map(lambda d: _output_one_dim(d, rank, shape, type), range(rank)))

-  # Return the MLIR text kernel.
-  return f"""
+    # Return the MLIR text kernel.
+    return f"""
 !Ptr = type !llvm.ptr<i8>
 #enc = #sparse_tensor.encoding<{{
  dimLevelType = [ {sparsity} ]
 }}>
-func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>)
+func.func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>)
 attributes {{ llvm.emit_c_interface }} {{
  %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}x{type}, #enc>
  %b = memref.alloc() : memref<{rank}xindex>
@ -257,7 +257,7 @@ attributes {{ llvm.emit_c_interface }} {{
 def create_sparse_tensor(filename: str,
                         sparsity: Sequence[sparse_tensor.DimLevelType],
                         type: str) -> Tuple[ctypes.c_void_p, np.ndarray]:
-  """Creates an MLIR sparse tensor from the input file.
+    """Creates an MLIR sparse tensor from the input file.

  Args:
    filename: A string for the name of the file that contains the tensor data in
@ -274,52 +274,54 @@ def create_sparse_tensor(filename: str,
    OSError: If there is any problem in loading the supporting C shared library.
    ValueError:  If the shared library doesn't contain the needed routine.
  """
-  with ir.Context() as ctx, ir.Location.unknown():
-    module = _get_create_sparse_tensor_kernel(sparsity, type)
-    module = ir.Module.parse(module)
-    engine = compile_and_build_engine(module)
+    with ir.Context() as ctx, ir.Location.unknown():
+        module = _get_create_sparse_tensor_kernel(sparsity, type)
+        module = ir.Module.parse(module)
+        engine = compile_and_build_engine(module)

-  # A sparse tensor descriptor to receive the kernel result.
-  c_tensor_desc = _SparseTensorDescriptor()
-  # Convert the filename to a byte stream.
-  c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))
+    # A sparse tensor descriptor to receive the kernel result.
+    c_tensor_desc = _SparseTensorDescriptor()
+    # Convert the filename to a byte stream.
+    c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))

-  arg_pointers = [
-      ctypes.byref(ctypes.pointer(c_tensor_desc)),
-      ctypes.byref(c_filename)
-  ]
+    arg_pointers = [
+        ctypes.byref(ctypes.pointer(c_tensor_desc)),
+        ctypes.byref(c_filename)
+    ]

-  # Invoke the execution engine to run the module and return the result.
-  engine.invoke(_ENTRY_NAME, *arg_pointers)
-  shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape))
-  return c_tensor_desc.storage, shape
+    # Invoke the execution engine to run the module and return the result.
+    engine.invoke(_ENTRY_NAME, *arg_pointers)
+    shape = runtime.ranked_memref_to_numpy(ctypes.pointer(c_tensor_desc.shape))
+    return c_tensor_desc.storage, shape


 # TODO: With better support from MLIR, we may improve the current implementation
 # by using Python code to generate the kernel instead of doing MLIR text code
 # stitching.
 def _get_output_sparse_tensor_kernel(
-    sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
-  """Creates an MLIR text kernel to output a sparse tensor to a file.
+        sparsity_codes: Sequence[sparse_tensor.DimLevelType],
+        type: str) -> str:
+    """Creates an MLIR text kernel to output a sparse tensor to a file.

  The kernel returns void.
  """
-  rank = len(sparsity_codes)
+    rank = len(sparsity_codes)

-  # Use ? to represent a dimension in the dynamic shape string representation.
-  shape = "x".join(map(lambda d: "?", range(rank)))
+    # Use ? to represent a dimension in the dynamic shape string representation.
+    shape = "x".join(map(lambda d: "?", range(rank)))

-  # Convert the encoded sparsity values to a string representation.
-  sparsity = ", ".join(
-      map(lambda s: '"compressed"' if s.value else '"dense"', sparsity_codes))
+    # Convert the encoded sparsity values to a string representation.
+    sparsity = ", ".join(
+        map(lambda s: '"compressed"'
+            if s.value else '"dense"', sparsity_codes))

-  # Return the MLIR text kernel.
-  return f"""
+    # Return the MLIR text kernel.
+    return f"""
 !Ptr = type !llvm.ptr<i8>
 #enc = #sparse_tensor.encoding<{{
  dimLevelType = [ {sparsity} ]
 }}>
-func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr)
+func.func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr)
 attributes {{ llvm.emit_c_interface }} {{
  sparse_tensor.out %t, %filename : tensor<{shape}x{type}, #enc>, !Ptr
  func.return
@ -329,7 +331,7 @@ attributes {{ llvm.emit_c_interface }} {{
 def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str,
                         sparsity: Sequence[sparse_tensor.DimLevelType],
                         type: str) -> None:
-  """Outputs an MLIR sparse tensor to the given file.
+    """Outputs an MLIR sparse tensor to the given file.

  Args:
    tensor: A C pointer to the MLIR sparse tensor.
@ -343,18 +345,18 @@ def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str,
    OSError: If there is any problem in loading the supporting C shared library.
    ValueError:  If the shared library doesn't contain the needed routine.
  """
-  with ir.Context() as ctx, ir.Location.unknown():
-    module = _get_output_sparse_tensor_kernel(sparsity, type)
-    module = ir.Module.parse(module)
-    engine = compile_and_build_engine(module)
+    with ir.Context() as ctx, ir.Location.unknown():
+        module = _get_output_sparse_tensor_kernel(sparsity, type)
+        module = ir.Module.parse(module)
+        engine = compile_and_build_engine(module)

-  # Convert the filename to a byte stream.
-  c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))
+    # Convert the filename to a byte stream.
+    c_filename = ctypes.c_char_p(bytes(filename, "utf-8"))

-  arg_pointers = [
-      ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)),
-      ctypes.byref(c_filename)
-  ]
+    arg_pointers = [
+        ctypes.byref(ctypes.cast(tensor, ctypes.c_void_p)),
+        ctypes.byref(c_filename)
+    ]

-  # Invoke the execution engine to run the module and return the result.
-  engine.invoke(_ENTRY_NAME, *arg_pointers)
+    # Invoke the execution engine to run the module and return the result.
+    engine.invoke(_ENTRY_NAME, *arg_pointers)
--- a/mlir/test/python/dialects/memref.py
+++ b/mlir/test/python/dialects/memref.py
@ -17,7 +17,7 @@ def testSubViewAccessors():
  ctx = Context()
  module = Module.parse(
      r"""
-    func @f1(%arg0: memref<?x?xf32>) {
+    func.func @f1(%arg0: memref<?x?xf32>) {
      %0 = arith.constant 0 : index
      %1 = arith.constant 1 : index
      %2 = arith.constant 2 : index
@ -59,7 +59,7 @@ def testSubViewAccessors():
 def testCustomBuidlers():
  with Context() as ctx, Location.unknown(ctx):
    module = Module.parse(r"""
-      func @f1(%arg0: memref<?x?xf32>, %arg1: index, %arg2: index) {
+      func.func @f1(%arg0: memref<?x?xf32>, %arg1: index, %arg2: index) {
        return
      }
    """)
--- a/mlir/test/python/execution_engine.py
+++ b/mlir/test/python/execution_engine.py
@ -49,7 +49,7 @@ def testInvalidModule():
  with Context():
    # Builtin function
    module = Module.parse(r"""
-    func @foo() { return }
+    func.func @foo() { return }
    """)
    # CHECK: Got RuntimeError:  Failure while creating the ExecutionEngine.
    try:
@ -74,7 +74,7 @@ def lowerToLLVM(module):
 def testInvokeVoid():
  with Context():
    module = Module.parse(r"""
-func @void() attributes { llvm.emit_c_interface } {
+func.func @void() attributes { llvm.emit_c_interface } {
  return
 }
    """)
@ -91,7 +91,7 @@ run(testInvokeVoid)
 def testInvokeFloatAdd():
  with Context():
    module = Module.parse(r"""
-func @add(%arg0: f32, %arg1: f32) -> f32 attributes { llvm.emit_c_interface } {
+func.func @add(%arg0: f32, %arg1: f32) -> f32 attributes { llvm.emit_c_interface } {
  %add = arith.addf %arg0, %arg1 : f32
  return %add : f32
 }
@ -122,11 +122,11 @@ def testBasicCallback():
  with Context():
    # The module just forwards to a runtime function known as "some_callback_into_python".
    module = Module.parse(r"""
-func @add(%arg0: f32, %arg1: i32) -> f32 attributes { llvm.emit_c_interface } {
+func.func @add(%arg0: f32, %arg1: i32) -> f32 attributes { llvm.emit_c_interface } {
  %resf = call @some_callback_into_python(%arg0, %arg1) : (f32, i32) -> (f32)
  return %resf : f32
 }
-func private @some_callback_into_python(f32, i32) -> f32 attributes { llvm.emit_c_interface }
+func.func private @some_callback_into_python(f32, i32) -> f32 attributes { llvm.emit_c_interface }
    """)
    execution_engine = ExecutionEngine(lowerToLLVM(module))
    execution_engine.register_runtime("some_callback_into_python", callback)
@ -159,11 +159,11 @@ def testUnrankedMemRefCallback():
  with Context():
    # The module just forwards to a runtime function known as "some_callback_into_python".
    module = Module.parse(r"""
-func @callback_memref(%arg0: memref<*xf32>) attributes { llvm.emit_c_interface } {
+func.func @callback_memref(%arg0: memref<*xf32>) attributes { llvm.emit_c_interface } {
  call @some_callback_into_python(%arg0) : (memref<*xf32>) -> ()
  return
 }
-func private @some_callback_into_python(memref<*xf32>) -> () attributes { llvm.emit_c_interface }
+func.func private @some_callback_into_python(memref<*xf32>) -> () attributes { llvm.emit_c_interface }
 """)
    execution_engine = ExecutionEngine(lowerToLLVM(module))
    execution_engine.register_runtime("some_callback_into_python", callback)
@ -210,11 +210,11 @@ def testRankedMemRefCallback():
  with Context():
    # The module just forwards to a runtime function known as "some_callback_into_python".
    module = Module.parse(r"""
-func @callback_memref(%arg0: memref<2x2xf32>) attributes { llvm.emit_c_interface } {
+func.func @callback_memref(%arg0: memref<2x2xf32>) attributes { llvm.emit_c_interface } {
  call @some_callback_into_python(%arg0) : (memref<2x2xf32>) -> ()
  return
 }
-func private @some_callback_into_python(memref<2x2xf32>) -> () attributes { llvm.emit_c_interface }
+func.func private @some_callback_into_python(memref<2x2xf32>) -> () attributes { llvm.emit_c_interface }
 """)
    execution_engine = ExecutionEngine(lowerToLLVM(module))
    execution_engine.register_runtime("some_callback_into_python", callback)
@ -235,8 +235,8 @@ run(testRankedMemRefCallback)
 def testMemrefAdd():
  with Context():
    module = Module.parse("""
-      module  {
-      func @main(%arg0: memref<1xf32>, %arg1: memref<f32>, %arg2: memref<1xf32>) attributes { llvm.emit_c_interface } {
+    module  {
+      func.func @main(%arg0: memref<1xf32>, %arg1: memref<f32>, %arg2: memref<1xf32>) attributes { llvm.emit_c_interface } {
        %0 = arith.constant 0 : index
        %1 = memref.load %arg0[%0] : memref<1xf32>
        %2 = memref.load %arg1[] : memref<f32>
@ -244,7 +244,7 @@ def testMemrefAdd():
        memref.store %3, %arg2[%0] : memref<1xf32>
        return
      }
-     } """)
+    } """)
    arg1 = np.array([32.5]).astype(np.float32)
    arg2 = np.array(6).astype(np.float32)
    res = np.array([0]).astype(np.float32)
@ -272,7 +272,7 @@ def testDynamicMemrefAdd2D():
  with Context():
    module = Module.parse("""
      module  {
-        func @memref_add_2d(%arg0: memref<2x2xf32>, %arg1: memref<?x?xf32>, %arg2: memref<2x2xf32>) attributes {llvm.emit_c_interface} {
+        func.func @memref_add_2d(%arg0: memref<2x2xf32>, %arg1: memref<?x?xf32>, %arg2: memref<2x2xf32>) attributes {llvm.emit_c_interface} {
          %c0 = arith.constant 0 : index
          %c2 = arith.constant 2 : index
          %c1 = arith.constant 1 : index
@ -330,7 +330,7 @@ def testSharedLibLoad():
  with Context():
    module = Module.parse("""
      module  {
-      func @main(%arg0: memref<1xf32>) attributes { llvm.emit_c_interface } {
+      func.func @main(%arg0: memref<1xf32>) attributes { llvm.emit_c_interface } {
        %c0 = arith.constant 0 : index
        %cst42 = arith.constant 42.0 : f32
        memref.store %cst42, %arg0[%c0] : memref<1xf32>
@ -338,7 +338,7 @@ def testSharedLibLoad():
        call @print_memref_f32(%u_memref) : (memref<*xf32>) -> ()
        return
      }
-      func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
+      func.func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface }
     } """)
    arg0 = np.array([0.0]).astype(np.float32)

@ -366,7 +366,7 @@ def testNanoTime():
  with Context():
    module = Module.parse("""
      module {
-      func @main() attributes { llvm.emit_c_interface } {
+      func.func @main() attributes { llvm.emit_c_interface } {
        %now = call @nano_time() : () -> i64
        %memref = memref.alloca() : memref<1xi64>
        %c0 = arith.constant 0 : index
@ -375,8 +375,8 @@ def testNanoTime():
        call @print_memref_i64(%u_memref) : (memref<*xi64>) -> ()
        return
      }
-      func private @nano_time() -> i64 attributes { llvm.emit_c_interface }
-      func private @print_memref_i64(memref<*xi64>) attributes { llvm.emit_c_interface }
+      func.func private @nano_time() -> i64 attributes { llvm.emit_c_interface }
+      func.func private @print_memref_i64(memref<*xi64>) attributes { llvm.emit_c_interface }
    }""")

    execution_engine = ExecutionEngine(
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@ -20,7 +20,7 @@ def log(*args):


 elemwise_boiler = """
-func @main() -> f32 attributes {llvm.emit_c_interface} {
+func.func @main() -> f32 attributes {llvm.emit_c_interface} {
  %v0 = arith.constant 0.0 : f32
  %v1 = arith.constant 1.0 : f32
  %v2 = arith.constant 2.0 : f32
@ -51,7 +51,7 @@ func @main() -> f32 attributes {llvm.emit_c_interface} {
 """

 matmul_boiler = """
-func @main() -> f32 attributes {llvm.emit_c_interface} {
+func.func @main() -> f32 attributes {llvm.emit_c_interface} {
  %v0 = arith.constant 0.0 : f32
  %v1 = arith.constant -1 : i8
  %v2 = arith.constant 2.0 : f32
@ -82,7 +82,7 @@ func @main() -> f32 attributes {llvm.emit_c_interface} {
 """

 fill_boiler = """
-func @main() -> i32 attributes {llvm.emit_c_interface} {
+func.func @main() -> i32 attributes {llvm.emit_c_interface} {
  %O0 = memref.alloc() : memref<i32>
  %O1 = memref.alloc() : memref<16xi32>
  %O2 = memref.alloc() : memref<4x16xi32>
@ -111,7 +111,7 @@ func @main() -> i32 attributes {llvm.emit_c_interface} {
 """

 fill_rng_boiler = """
-func @main() -> i32 attributes {llvm.emit_c_interface} {
+func.func @main() -> i32 attributes {llvm.emit_c_interface} {
  %O = memref.alloc() : memref<4x16xi32>
  %min = arith.constant -1000.0 : f64
  %max = arith.constant 1000.0 : f64
@ -129,7 +129,7 @@ func @main() -> i32 attributes {llvm.emit_c_interface} {
 """

 conv_boiler = """
-func @main() -> i32 attributes {llvm.emit_c_interface} {
+func.func @main() -> i32 attributes {llvm.emit_c_interface} {
  %v0 = arith.constant 0 : i32
  %v1 = arith.constant 1.0 : f64
  %v2 = arith.constant 2.0 : f64
@ -153,7 +153,7 @@ func @main() -> i32 attributes {llvm.emit_c_interface} {
 """

 pooling_boiler = """
-func @main() -> i32 attributes {llvm.emit_c_interface} {
+func.func @main() -> i32 attributes {llvm.emit_c_interface} {
  %v0 = arith.constant 0 : i32
  %v42 = arith.constant 42.0 : f64
  %v77 = arith.constant 77.0 : f64
--- a/mlir/test/python/ir/insertion_point.py
+++ b/mlir/test/python/ir/insertion_point.py
@ -18,7 +18,7 @@ def test_insert_at_block_end():
  ctx.allow_unregistered_dialects = True
  with Location.unknown(ctx):
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        "custom.op1"() : () -> ()
      }
    """)
@ -38,7 +38,7 @@ def test_insert_before_operation():
  ctx.allow_unregistered_dialects = True
  with Location.unknown(ctx):
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        "custom.op1"() : () -> ()
        "custom.op2"() : () -> ()
      }
@ -60,7 +60,7 @@ def test_insert_at_block_begin():
  ctx.allow_unregistered_dialects = True
  with Location.unknown(ctx):
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        "custom.op2"() : () -> ()
      }
    """)
@ -88,7 +88,7 @@ def test_insert_at_terminator():
  ctx.allow_unregistered_dialects = True
  with Location.unknown(ctx):
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        "custom.op1"() : () -> ()
        return
      }
@ -109,7 +109,7 @@ def test_insert_at_block_terminator_missing():
  ctx.allow_unregistered_dialects = True
  with ctx:
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        "custom.op1"() : () -> ()
      }
    """)
@ -130,7 +130,7 @@ def test_insert_at_end_with_terminator_errors():
  with Context() as ctx, Location.unknown():
    ctx.allow_unregistered_dialects = True
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        return
      }
    """)
@ -151,7 +151,7 @@ def test_insertion_point_context():
  ctx.allow_unregistered_dialects = True
  with Location.unknown(ctx):
    module = Module.parse(r"""
-      func @foo() -> () {
+      func.func @foo() -> () {
        "custom.op1"() : () -> ()
      }
    """)
--- a/mlir/test/python/ir/module.py
+++ b/mlir/test/python/ir/module.py
@ -64,7 +64,7 @@ def testCreateEmpty():
 def testRoundtripUnicode():
  ctx = Context()
  module = Module.parse(r"""
-    func private @roundtripUnicode() attributes { foo = "😊" }
+    func.func private @roundtripUnicode() attributes { foo = "😊" }
  """, ctx)
  print(str(module))

@ -79,7 +79,7 @@ def testRoundtripUnicode():
 def testRoundtripBinary():
  with Context():
    module = Module.parse(r"""
-      func private @roundtripUnicode() attributes { foo = "😊" }
+      func.func private @roundtripUnicode() attributes { foo = "😊" }
    """)
    binary_asm = module.operation.get_asm(binary=True)
    assert isinstance(binary_asm, bytes)
--- a/mlir/test/python/ir/operation.py
+++ b/mlir/test/python/ir/operation.py
@ -30,7 +30,7 @@ def testTraverseOpRegionBlockIterators():
  ctx.allow_unregistered_dialects = True
  module = Module.parse(
      r"""
-    func @f1(%arg0: i32) -> i32 {
+    func.func @f1(%arg0: i32) -> i32 {
      %1 = "custom.addi"(%arg0, %arg0) : (i32, i32) -> i32
      return %1 : i32
    }
@ -87,7 +87,7 @@ def testTraverseOpRegionBlockIndices():
  ctx.allow_unregistered_dialects = True
  module = Module.parse(
      r"""
-    func @f1(%arg0: i32) -> i32 {
+    func.func @f1(%arg0: i32) -> i32 {
      %1 = "custom.addi"(%arg0, %arg0) : (i32, i32) -> i32
      return %1 : i32
    }
@ -147,7 +147,7 @@ def testBlockArgumentList():
  with Context() as ctx:
    module = Module.parse(
        r"""
-      func @f1(%arg0: i32, %arg1: f64, %arg2: index) {
+      func.func @f1(%arg0: i32, %arg1: f64, %arg2: index) {
        return
      }
    """, ctx)
@ -192,7 +192,7 @@ def testOperationOperands():
  with Context() as ctx:
    ctx.allow_unregistered_dialects = True
    module = Module.parse(r"""
-      func @f1(%arg0: i32) {
+      func.func @f1(%arg0: i32) {
        %0 = "test.producer"() : () -> i64
        "test.consumer"(%arg0, %0) : (i32, i64) -> ()
        return
@ -215,7 +215,7 @@ def testOperationOperandsSlice():
  with Context() as ctx:
    ctx.allow_unregistered_dialects = True
    module = Module.parse(r"""
-      func @f1() {
+      func.func @f1() {
        %0 = "test.producer0"() : () -> i64
        %1 = "test.producer1"() : () -> i64
        %2 = "test.producer2"() : () -> i64
@ -273,7 +273,7 @@ def testOperationOperandsSet():
  with Context() as ctx, Location.unknown(ctx):
    ctx.allow_unregistered_dialects = True
    module = Module.parse(r"""
-      func @f1() {
+      func.func @f1() {
        %0 = "test.producer0"() : () -> i64
        %1 = "test.producer1"() : () -> i64
        %2 = "test.producer2"() : () -> i64
@ -328,7 +328,7 @@ def testOperationInsertionPoint():
  ctx.allow_unregistered_dialects = True
  module = Module.parse(
      r"""
-    func @f1(%arg0: i32) -> i32 {
+    func.func @f1(%arg0: i32) -> i32 {
      %1 = "custom.addi"(%arg0, %arg0) : (i32, i32) -> i32
      return %1 : i32
    }
@ -383,7 +383,7 @@ def testOperationWithRegion():
    # TODO: Also verify accessing the terminator once both parents are nulled
    # out.
    module = Module.parse(r"""
-      func @f1(%arg0: i32) -> i32 {
+      func.func @f1(%arg0: i32) -> i32 {
        %1 = "custom.addi"(%arg0, %arg0) : (i32, i32) -> i32
        return %1 : i32
      }
@ -405,11 +405,11 @@ def testOperationResultList():
  ctx = Context()
  module = Module.parse(
      r"""
-    func @f1() {
+    func.func @f1() {
      %0:3 = call @f2() : () -> (i32, f64, index)
      return
    }
-    func private @f2() -> (i32, f64, index)
+    func.func private @f2() -> (i32, f64, index)
  """, ctx)
  caller = module.body.operations[0]
  call = caller.regions[0].blocks[0].operations[0]
@ -437,7 +437,7 @@ def testOperationResultListSlice():
  with Context() as ctx:
    ctx.allow_unregistered_dialects = True
    module = Module.parse(r"""
-      func @f1() {
+      func.func @f1() {
        "some.op"() : () -> (i1, i2, i3, i4, i5)
        return
      }
@ -534,7 +534,7 @@ def testOperationPrint():
  ctx = Context()
  module = Module.parse(
      r"""
-    func @f1(%arg0: i32) -> i32 {
+    func.func @f1(%arg0: i32) -> i32 {
      %0 = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
      return %arg0 : i32
    }
@ -805,8 +805,8 @@ def testModuleMerge():
  with Context():
    m1 = Module.parse("func private @foo()")
    m2 = Module.parse("""
-      func private @bar()
-      func private @qux()
+      func.func private @bar()
+      func.func private @qux()
    """)
    foo = m1.body.operations[0]
    bar = m2.body.operations[0]
--- a/mlir/test/python/ir/symbol_table.py
+++ b/mlir/test/python/ir/symbol_table.py
@ -20,11 +20,11 @@ def testSymbolTableInsert():
  with Context() as ctx:
    ctx.allow_unregistered_dialects = True
    m1 = Module.parse("""
-      func private @foo()
-      func private @bar()""")
+      func.func private @foo()
+      func.func private @bar()""")
    m2 = Module.parse("""
-      func private @qux()
-      func private @foo()
+      func.func private @qux()
+      func.func private @foo()
      "foo.bar"() : () -> ()""")

    symbol_table = SymbolTable(m1.operation)
@ -92,11 +92,11 @@ def testSymbolTableInsert():
 def testSymbolTableRAUW():
  with Context() as ctx:
    m = Module.parse("""
-      func private @foo() {
+      func.func private @foo() {
        call @bar() : () -> ()
        return
      }
-      func private @bar()
+      func.func private @bar()
      """)
    foo, bar = list(m.operation.regions[0].blocks[0].operations)[0:2]
    SymbolTable.set_symbol_name(bar, "bam")
@ -117,7 +117,7 @@ def testSymbolTableRAUW():
 def testSymbolTableVisibility():
  with Context() as ctx:
    m = Module.parse("""
-      func private @foo() {
+      func.func private @foo() {
        return
      }
      """)
--- a/mlir/test/python/ir/value.py
+++ b/mlir/test/python/ir/value.py
@ -44,7 +44,7 @@ def testValueIsInstance():
  ctx.allow_unregistered_dialects = True
  module = Module.parse(
      r"""
-    func @foo(%arg0: f32) {
+    func.func @foo(%arg0: f32) {
      %0 = "some_dialect.some_op"() : () -> f64
      return
    }""", ctx)
@ -64,7 +64,7 @@ def testValueHash():
  ctx.allow_unregistered_dialects = True
  module = Module.parse(
      r"""
-    func @foo(%arg0: f32) -> f32 {
+    func.func @foo(%arg0: f32) -> f32 {
      %0 = "some_dialect.some_op"(%arg0) : (f32) -> f32
      return %0 : f32
    }""", ctx)
--- a/mlir/utils/jupyter/mlir_opt_kernel/kernel.py
+++ b/mlir/utils/jupyter/mlir_opt_kernel/kernel.py
@ -42,7 +42,7 @@ class MlirOptKernel(Kernel):

    ```mlir
    // configuration: --pass
-    func @foo(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> { ... }
+    func.func @foo(%tensor: tensor<2x3xf64>) -> tensor<3x2xf64> { ... }
    ```

    ```mlir