Add a new interface allowing to set a default dialect to be used for printing/parsing regions

Currently the builtin dialect is the default namespace used for parsing and printing. As such module and func don't need to be prefixed. In the case of some dialects that defines new regions for their own purpose (like SpirV modules for example), it can be beneficial to change the default dialect in order to improve readability. Differential Revision: https://reviews.llvm.org/D107236
2021-08-28 03:03:49 +00:00 · 2021-08-28 03:03:49 +00:00 · 387f95541b
parent c41b16c26b
commit 387f95541b
35 changed files with 223 additions and 118 deletions
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVStructureOps.td
@ -24,7 +24,8 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 // -----

 def SPV_AddressOfOp : SPV_Op<"mlir.addressof",
-    [DeclareOpInterfaceMethods<OpAsmOpInterface>, InFunctionScope, NoSideEffect]> {
+    [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+     InFunctionScope, NoSideEffect]> {
  let summary = "Get the address of a global variable.";

  let description = [{
@ -70,7 +71,9 @@ def SPV_AddressOfOp : SPV_Op<"mlir.addressof",
 // -----

 def SPV_ConstantOp : SPV_Op<"Constant",
-    [ConstantLike, DeclareOpInterfaceMethods<OpAsmOpInterface>, NoSideEffect]> {
+    [ConstantLike,
+     DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+     NoSideEffect]> {
  let summary = "The op that declares a SPIR-V normal constant";

  let description = [{
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
@ -135,7 +135,7 @@ def Shape_ConstShapeOp : Shape_Op<"const_shape",
 def Shape_ConstSizeOp : Shape_Op<"const_size", [
    ConstantLike,
    NoSideEffect,
-    DeclareOpInterfaceMethods<OpAsmOpInterface>
+    DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>
  ]> {
  let summary = "Creates a constant of type `shape.size`";
  let description = [{
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@ -1037,7 +1037,8 @@ def CondBranchOp : Std_Op<"cond_br",
 //===----------------------------------------------------------------------===//

 def ConstantOp : Std_Op<"constant",
-    [ConstantLike, NoSideEffect, DeclareOpInterfaceMethods<OpAsmOpInterface>]> {
+    [ConstantLike, NoSideEffect,
+     DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
  let summary = "constant";
  let description = [{
    Syntax:
--- a/mlir/include/mlir/IR/BuiltinOps.h
+++ b/mlir/include/mlir/IR/BuiltinOps.h
@ -14,6 +14,7 @@
 #define MLIR_IR_BUILTINOPS_H_

 #include "mlir/IR/FunctionSupport.h"
+#include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/OwningOpRef.h"
 #include "mlir/IR/RegionKindInterface.h"
 #include "mlir/IR/SymbolTable.h"
--- a/mlir/include/mlir/IR/BuiltinOps.td
+++ b/mlir/include/mlir/IR/BuiltinOps.td
@ -15,6 +15,7 @@
 #define BUILTIN_OPS

 include "mlir/IR/BuiltinDialect.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/RegionKindInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/CallInterfaces.td"
@ -160,8 +161,9 @@ def FuncOp : Builtin_Op<"func", [
 //===----------------------------------------------------------------------===//

 def ModuleOp : Builtin_Op<"module", [
-  AffineScope, IsolatedFromAbove, NoRegionArguments, SymbolTable, Symbol] 
-  # GraphRegionNoTerminator.traits> {
+    AffineScope, IsolatedFromAbove, NoRegionArguments, SymbolTable, Symbol,
+    OpAsmOpInterface
+  ] # GraphRegionNoTerminator.traits> {
  let summary = "A top level container operation";
  let description = [{
    A `module` represents a top-level container operation. It contains a single
@ -206,6 +208,14 @@ def ModuleOp : Builtin_Op<"module", [
    //===------------------------------------------------------------------===//

    DataLayoutSpecInterface getDataLayoutSpec();
+
+    //===------------------------------------------------------------------===//
+    // OpAsmOpInterface Methods
+    //===------------------------------------------------------------------===//
+
+    static ::llvm::StringRef getDefaultDialect() {
+      return "builtin";
+    }
  }];
  let verifier = [{ return ::verify(*this); }];

--- a/mlir/include/mlir/IR/OpAsmInterface.td
+++ b/mlir/include/mlir/IR/OpAsmInterface.td
@ -47,7 +47,19 @@ def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> {
          %first_result, %middle_results:2, %0 = "my.op" ...
        ```
      }],
-      "void", "getAsmResultNames", (ins "::mlir::OpAsmSetValueNameFn":$setNameFn)
+      "void", "getAsmResultNames",
+      (ins "::mlir::OpAsmSetValueNameFn":$setNameFn),
+      "", ";"
+    >,
+    StaticInterfaceMethod<[{
+      Return the default dialect used when printing/parsing operations in
+      regions nested under this operation. This allows for eliding the dialect
+      prefix from the operation name, for example it would be possible to omit
+      the `spv.` prefix from all operations within a SpirV module if this method
+      returned `spv`. The default implementation returns an empty string which
+      is ignored.
+      }],
+      "StringRef", "getDefaultDialect", (ins), "", "return \"\";"
    >,
  ];
 }
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@ -180,7 +180,8 @@ protected:

  // The fallback for the printer is to print it the generic assembly form.
  static void print(Operation *op, OpAsmPrinter &p);
-  static void printOpName(Operation *op, OpAsmPrinter &p);
+  static void printOpName(Operation *op, OpAsmPrinter &p,
+                          StringRef defaultDialect);

  /// Mutability management is handled by the OpWrapper/OpConstWrapper classes,
  /// so we can cast it away here.
@ -1777,7 +1778,7 @@ private:
  static std::enable_if_t<!detect_has_print<ConcreteOpT>::value,
                          AbstractOperation::PrintAssemblyFn>
  getPrintAssemblyFnImpl() {
-    return [](Operation *op, OpAsmPrinter &printer) {
+    return [](Operation *op, OpAsmPrinter &printer, StringRef defaultDialect) {
      return OpState::print(op, printer);
    };
  }
@ -1789,8 +1790,9 @@ private:
  getPrintAssemblyFnImpl() {
    return &printAssembly;
  }
-  static void printAssembly(Operation *op, OpAsmPrinter &p) {
-    OpState::printOpName(op, p);
+  static void printAssembly(Operation *op, OpAsmPrinter &p,
+                            StringRef defaultDialect) {
+    OpState::printOpName(op, p, defaultDialect);
    return cast<ConcreteType>(op).print(p);
  }
  /// Implementation of `VerifyInvariantsFn` AbstractOperation hook.
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@ -77,7 +77,7 @@ public:
  using ParseAssemblyFn =
      llvm::unique_function<ParseResult(OpAsmParser &, OperationState &) const>;
  using PrintAssemblyFn =
-      llvm::unique_function<void(Operation *, OpAsmPrinter &) const>;
+      llvm::unique_function<void(Operation *, OpAsmPrinter &, StringRef) const>;
  using VerifyInvariantsFn =
      llvm::unique_function<LogicalResult(Operation *) const>;

@ -97,8 +97,9 @@ public:
  const ParseAssemblyFn &getParseAssemblyFn() const { return parseAssemblyFn; }

  /// This hook implements the AsmPrinter for this operation.
-  void printAssembly(Operation *op, OpAsmPrinter &p) const {
-    return printAssemblyFn(op, p);
+  void printAssembly(Operation *op, OpAsmPrinter &p,
+                     StringRef defaultDialect) const {
+    return printAssemblyFn(op, p, defaultDialect);
  }

  /// This hook implements the verifier for this operation.  It should emits an
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@ -27,6 +27,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
@ -371,7 +372,7 @@ public:
      // Check to see if this is a known operation.  If so, use the registered
      // custom printer hook.
      if (auto *opInfo = op->getAbstractOperation()) {
-        opInfo->printAssembly(op, *this);
+        opInfo->printAssembly(op, *this, /*defaultDialect=*/"");
        return;
      }
    }
@ -2424,6 +2425,13 @@ public:
  }

 private:
+  // Contains the stack of default dialects to use when printing regions.
+  // A new dialect is pushed to the stack before parsing regions nested under an
+  // operation implementing `OpAsmOpInterface`, and popped when done. At the
+  // top-level we start with "builtin" as the default, so that the top-level
+  // `module` operation prints as-is.
+  SmallVector<StringRef> defaultDialectStack{"builtin"};
+
  /// The number of spaces used for indenting nested operations.
  const static unsigned indentWidth = 2;

@ -2503,7 +2511,7 @@ void OperationPrinter::printOperation(Operation *op) {
    // Check to see if this is a known operation.  If so, use the registered
    // custom printer hook.
    if (auto *opInfo = op->getAbstractOperation()) {
-      opInfo->printAssembly(op, *this);
+      opInfo->printAssembly(op, *this, defaultDialectStack.back());
      return;
    }
    // Otherwise try to dispatch to the dialect, if available.
@ -2511,6 +2519,7 @@ void OperationPrinter::printOperation(Operation *op) {
      if (auto opPrinter = dialect->getOperationPrinter(op)) {
        // Print the op name first.
        StringRef name = op->getName().getStringRef();
+        name.consume_front((defaultDialectStack.back() + ".").str());
        printEscapedString(name, os);
        // Print the rest of the op now.
        opPrinter(op, *this);
@ -2657,6 +2666,13 @@ void OperationPrinter::printRegion(Region &region, bool printEntryBlockArgs,
                                   bool printEmptyBlock) {
  os << " {" << newLine;
  if (!region.empty()) {
+    auto restoreDefaultDialect =
+        llvm::make_scope_exit([&]() { defaultDialectStack.pop_back(); });
+    if (auto iface = dyn_cast<OpAsmOpInterface>(region.getParentOp()))
+      defaultDialectStack.push_back(iface.getDefaultDialect());
+    else
+      defaultDialectStack.push_back("");
+
    auto *entryBlock = &region.front();
    // Force printing the block header if printEmptyBlock is set and the block
    // is empty or if printEntryBlockArgs is set and there are arguments to
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@ -643,9 +643,13 @@ ParseResult OpState::parse(OpAsmParser &parser, OperationState &result) {
 // The fallback for the printer is to print in the generic assembly form.
 void OpState::print(Operation *op, OpAsmPrinter &p) { p.printGenericOp(op); }
 // The fallback for the printer is to print in the generic assembly form.
-void OpState::printOpName(Operation *op, OpAsmPrinter &p) {
+void OpState::printOpName(Operation *op, OpAsmPrinter &p,
+                          StringRef defaultDialect) {
  StringRef name = op->getName().getStringRef();
-  if (name.startswith("std."))
+  if (name.startswith((defaultDialect + ".").str()))
+    name = name.drop_front(defaultDialect.size() + 1);
+  // TODO: remove this special case.
+  else if (name.startswith("std."))
    name = name.drop_front(4);
  p.getStream() << name;
 }
--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@ -18,6 +18,7 @@
 #include "mlir/Parser.h"
 #include "mlir/Parser/AsmParserState.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/Support/PrettyStackTrace.h"
@ -1842,31 +1843,36 @@ private:
 Operation *
 OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
  llvm::SMLoc opLoc = getToken().getLoc();
-  StringRef opName = getTokenSpelling();
+  std::string opName = getTokenSpelling().str();
  auto *opDefinition = AbstractOperation::lookup(opName, getContext());
+  StringRef defaultDialect = getState().defaultDialectStack.back();
  Dialect *dialect = nullptr;
  if (opDefinition) {
    dialect = &opDefinition->dialect;
  } else {
-    if (opName.contains('.')) {
+    if (StringRef(opName).contains('.')) {
      // This op has a dialect, we try to check if we can register it in the
      // context on the fly.
-      StringRef dialectName = opName.split('.').first;
+      StringRef dialectName = StringRef(opName).split('.').first;
      dialect = getContext()->getLoadedDialect(dialectName);
      if (!dialect && (dialect = getContext()->getOrLoadDialect(dialectName)))
        opDefinition = AbstractOperation::lookup(opName, getContext());
    } else {
-      // If the operation name has no namespace prefix we treat it as a builtin
-      // or standard operation and prefix it with "builtin" or "std".
-      // TODO: Remove the special casing here.
-      opDefinition = AbstractOperation::lookup(Twine("builtin." + opName).str(),
-                                               getContext());
+      // If the operation name has no namespace prefix we lookup the current
+      // default dialect (set through OpAsmOpInterface).
+      opDefinition = AbstractOperation::lookup(
+          Twine(defaultDialect + "." + opName).str(), getContext());
      if (!opDefinition && getContext()->getOrLoadDialect("std")) {
        opDefinition = AbstractOperation::lookup(Twine("std." + opName).str(),
                                                 getContext());
      }
-      if (opDefinition)
-        opName = opDefinition->name.strref();
+      if (opDefinition) {
+        dialect = &opDefinition->dialect;
+        opName = opDefinition->name.str();
+      } else if (!defaultDialect.empty()) {
+        dialect = getContext()->getOrLoadDialect(defaultDialect);
+        opName = (defaultDialect + "." + opName).str();
+      }
    }
  }

@ -1876,10 +1882,14 @@ OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
  function_ref<ParseResult(OpAsmParser &, OperationState &)> parseAssemblyFn;
  bool isIsolatedFromAbove = false;

+  defaultDialect = "";
  if (opDefinition) {
    parseAssemblyFn = opDefinition->getParseAssemblyFn();
    isIsolatedFromAbove =
        opDefinition->hasTrait<OpTrait::IsIsolatedFromAbove>();
+    auto *iface = opDefinition->getInterface<OpAsmOpInterface>();
+    if (iface && !iface->getDefaultDialect().empty())
+      defaultDialect = iface->getDefaultDialect();
  } else {
    Optional<Dialect::ParseOpHook> dialectHook;
    if (dialect)
@ -1890,14 +1900,16 @@ OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
    }
    parseAssemblyFn = *dialectHook;
  }
+  getState().defaultDialectStack.push_back(defaultDialect);
+  auto restoreDefaultDialect = llvm::make_scope_exit(
+      [&]() { getState().defaultDialectStack.pop_back(); });

  consumeToken();

  // If the custom op parser crashes, produce some indication to help
  // debugging.
-  std::string opNameStr = opName.str();
  llvm::PrettyStackTraceFormat fmt("MLIR Parser: custom op parser '%s'",
-                                   opNameStr.c_str());
+                                   opName.c_str());

  // Get location information for the operation.
  auto srcLocation = getEncodedSourceLocation(opLoc);
--- a/mlir/lib/Parser/ParserState.h
+++ b/mlir/lib/Parser/ParserState.h
@ -82,6 +82,13 @@ struct ParserState {
  /// An optional pointer to a struct containing high level parser state to be
  /// populated during parsing.
  AsmParserState *asmState;
+
+  // Contains the stack of default dialect to use when parsing regions.
+  // A new dialect get pushed to the stack before parsing regions nested
+  // under an operation implementing `OpAsmOpInterface`, and
+  // popped when done. At the top-level we start with "builtin" as the
+  // default, so that the top-level `module` operation parses as-is.
+  SmallVector<StringRef> defaultDialectStack{"builtin"};
 };

 } // end namespace detail
--- a/mlir/test/Analysis/test-shape-fn-report.mlir
+++ b/mlir/test/Analysis/test-shape-fn-report.mlir
@ -15,7 +15,7 @@ func @tanh(%arg: tensor<10x20xf32>) -> tensor<10x20xf32>
 // The shape function library with some local functions.
 shape.function_library @shape_lib {
  // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
    %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
    return %0 : !shape.shape
  }
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@ -4,7 +4,7 @@
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_ops()
  // CHECK32-LABEL: func @gpu_index_ops()
-  func @gpu_index_ops()
+  builtin.func @gpu_index_ops()
      -> (index, index, index, index, index, index,
          index, index, index, index, index, index) {
    // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_comp
  // CHECK32-LABEL: func @gpu_index_comp
-  func @gpu_index_comp(%idx : index) -> index {
+  builtin.func @gpu_index_comp(%idx : index) -> index {
    // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
    // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
    %0 = addi %idx, %idx : index
@ -109,7 +109,7 @@ gpu.module @test_module {

 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_shuffle()
-  func @gpu_shuffle() -> (f32) {
+  builtin.func @gpu_shuffle() -> (f32) {
    // CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
    %arg0 = constant 1.0 : f32
    // CHECK: %[[#OFFSET:]] = llvm.mlir.constant(4 : i32) : i32
@ -133,7 +133,7 @@ gpu.module @test_module {

 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_sync()
-  func @gpu_sync() {
+  builtin.func @gpu_sync() {
    // CHECK: nvvm.barrier0
    gpu.barrier
    std.return
@ -146,7 +146,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_fabsf(f32) -> f32
  // CHECK: llvm.func @__nv_fabs(f64) -> f64
  // CHECK-LABEL: func @gpu_fabs
-  func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = std.absf %arg_f32 : f32
    // CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (f32) -> f32
    %result64 = std.absf %arg_f64 : f64
@ -161,7 +161,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_ceilf(f32) -> f32
  // CHECK: llvm.func @__nv_ceil(f64) -> f64
  // CHECK-LABEL: func @gpu_ceil
-  func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = std.ceilf %arg_f32 : f32
    // CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (f32) -> f32
    %result64 = std.ceilf %arg_f64 : f64
@ -176,7 +176,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_floorf(f32) -> f32
  // CHECK: llvm.func @__nv_floor(f64) -> f64
  // CHECK-LABEL: func @gpu_floor
-  func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = std.floorf %arg_f32 : f32
    // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (f32) -> f32
    %result64 = std.floorf %arg_f64 : f64
@ -191,7 +191,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_cosf(f32) -> f32
  // CHECK: llvm.func @__nv_cos(f64) -> f64
  // CHECK-LABEL: func @gpu_cos
-  func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.cos %arg_f32 : f32
    // CHECK: llvm.call @__nv_cosf(%{{.*}}) : (f32) -> f32
    %result64 = math.cos %arg_f64 : f64
@ -205,7 +205,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_expf(f32) -> f32
  // CHECK: llvm.func @__nv_exp(f64) -> f64
  // CHECK-LABEL: func @gpu_exp
-  func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.exp %arg_f32 : f32
    // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
    %result64 = math.exp %arg_f64 : f64
@ -219,7 +219,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_exp2f(f32) -> f32
  // CHECK: llvm.func @__nv_exp2(f64) -> f64
  // CHECK-LABEL: func @gpu_exp2
-  func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.exp2 %arg_f32 : f32
    // CHECK: llvm.call @__nv_exp2f(%{{.*}}) : (f32) -> f32
    %result64 = math.exp2 %arg_f64 : f64
@ -234,7 +234,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_logf(f32) -> f32
  // CHECK: llvm.func @__nv_log(f64) -> f64
  // CHECK-LABEL: func @gpu_log
-  func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log %arg_f32 : f32
    // CHECK: llvm.call @__nv_logf(%{{.*}}) : (f32) -> f32
    %result64 = math.log %arg_f64 : f64
@ -249,7 +249,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_log10f(f32) -> f32
  // CHECK: llvm.func @__nv_log10(f64) -> f64
  // CHECK-LABEL: func @gpu_log10
-  func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log10 %arg_f32 : f32
    // CHECK: llvm.call @__nv_log10f(%{{.*}}) : (f32) -> f32
    %result64 = math.log10 %arg_f64 : f64
@ -264,7 +264,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_log1pf(f32) -> f32
  // CHECK: llvm.func @__nv_log1p(f64) -> f64
  // CHECK-LABEL: func @gpu_log1p
-  func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log1p %arg_f32 : f32
    // CHECK: llvm.call @__nv_log1pf(%{{.*}}) : (f32) -> f32
    %result64 = math.log1p %arg_f64 : f64
@ -279,7 +279,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_log2f(f32) -> f32
  // CHECK: llvm.func @__nv_log2(f64) -> f64
  // CHECK-LABEL: func @gpu_log2
-  func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log2 %arg_f32 : f32
    // CHECK: llvm.call @__nv_log2f(%{{.*}}) : (f32) -> f32
    %result64 = math.log2 %arg_f64 : f64
@ -294,7 +294,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_sinf(f32) -> f32
  // CHECK: llvm.func @__nv_sin(f64) -> f64
  // CHECK-LABEL: func @gpu_sin
-  func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.sin %arg_f32 : f32
    // CHECK: llvm.call @__nv_sinf(%{{.*}}) : (f32) -> f32
    %result64 = math.sin %arg_f64 : f64
@ -309,7 +309,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_tanhf(f32) -> f32
  // CHECK: llvm.func @__nv_tanh(f64) -> f64
  // CHECK-LABEL: func @gpu_tanh
-  func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+  builtin.func @gpu_tanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
    %result16 = math.tanh %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
    // CHECK-NEXT: llvm.call @__nv_tanhf(%{{.*}}) : (f32) -> f32
@ -328,7 +328,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_rsqrtf(f32) -> f32
  // CHECK: llvm.func @__nv_rsqrt(f64) -> f64
  // CHECK-LABEL: func @gpu_rsqrt
-  func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.rsqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -348,7 +348,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_sqrtf(f32) -> f32
  // CHECK: llvm.func @__nv_sqrt(f64) -> f64
  // CHECK-LABEL: func @gpu_sqrt
-  func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.sqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -368,7 +368,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_atanf(f32) -> f32
  // CHECK: llvm.func @__nv_atan(f64) -> f64
  // CHECK-LABEL: func @gpu_atan
-  func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.atan %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -388,7 +388,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_atan2f(f32, f32) -> f32
  // CHECK: llvm.func @__nv_atan2(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_atan2
-  func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.atan2 %arg_f16, %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -412,7 +412,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_expf(f32) -> f32
  // CHECK: llvm.func @__nv_exp(f64) -> f64
  // CHECK-LABEL: func @gpu_exp
-    func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
      %result32 = math.exp %arg_f32 : f32
      // CHECK: llvm.call @__nv_expf(%{{.*}}) : (f32) -> f32
      %result64 = math.exp %arg_f64 : f64
@ -429,7 +429,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_expm1f(f32) -> f32
  // CHECK: llvm.func @__nv_expm1(f64) -> f64
  // CHECK-LABEL: func @gpu_expm1
-  func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.expm1 %arg_f32 : f32
    // CHECK: llvm.call @__nv_expm1f(%{{.*}}) : (f32) -> f32
    %result64 = math.expm1 %arg_f64 : f64
@ -444,7 +444,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__nv_powf(f32, f32) -> f32
  // CHECK: llvm.func @__nv_pow(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_pow
-  func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.powf %arg_f32, %arg_f32 : f32
    // CHECK: llvm.call @__nv_powf(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
    %result64 = math.powf %arg_f64, %arg_f64 : f64
--- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir
@ -4,7 +4,7 @@ gpu.module @test_module {

  // CHECK-LABEL: func @gpu_wmma_load_op() ->
  // CHECK-SAME: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> {
-  func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
+  builtin.func @gpu_wmma_load_op() -> (!gpu.mma_matrix<16x16xf16, "AOp">) {
    %wg = memref.alloca() {alignment = 32} : memref<32x32xf16, 3>
    %i = constant 16 : index
    %j = constant 16 : index
@ -31,7 +31,7 @@ gpu.module @test_module {

  // CHECK-LABEL: func @gpu_wmma_store_op
  // CHECK-SAME: (%[[D:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) {
-  func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
+  builtin.func @gpu_wmma_store_op(%arg0 : !gpu.mma_matrix<16x16xf16, "COp">) -> () {
    %sg = memref.alloca(){alignment = 32} : memref<32x32xf16, 3>
    %i = constant 16 : index
    %j = constant 16 : index
@ -62,7 +62,7 @@ gpu.module @test_module {

  // CHECK-LABEL: func @gpu_wmma_mma_op
  // CHECK-SAME: (%[[A:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[B:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>, %[[C:.*]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>)
-  func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
+  builtin.func @gpu_wmma_mma_op(%A : !gpu.mma_matrix<16x16xf16, "AOp">, %B : !gpu.mma_matrix<16x16xf16, "BOp">, %C : !gpu.mma_matrix<16x16xf16, "COp">) -> (!gpu.mma_matrix<16x16xf16, "COp">) {
    %D = gpu.subgroup_mma_compute %A, %B, %C : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp">
    // CHECK:  %[[A1:.*]] = llvm.extractvalue %[[A]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
    // CHECK:  %[[A2:.*]] = llvm.extractvalue %[[A]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
@ -131,7 +131,7 @@ gpu.module @test_module {
 //       CHECK:   %90 = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK:   nvvm.wmma.m16n16k16.store.d.f16.row.stride %86, %87, %88, %89, %90, %79 : !llvm.ptr<i32>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, i32

-  func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
+  builtin.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) {
      %c0 = constant 0 : index
      %c128 = constant 128 : index
      %c32 = constant 32 : index
@ -170,7 +170,7 @@ gpu.module @test_module {
 //       CHECK: %[[M3:.+]] = llvm.insertvalue %[[V2]], %[[M2]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: %[[M4:.+]] = llvm.insertvalue %[[V2]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
 //       CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>
-  func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
+  builtin.func @gpu_wmma_constant_op()  ->(!gpu.mma_matrix<16x16xf16, "COp">) {
    %cst = constant 1.0 : f16
    %C = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf16, "COp">
    return %C : !gpu.mma_matrix<16x16xf16, "COp">
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@ -4,7 +4,7 @@
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_ops()
  // CHECK32-LABEL: func @gpu_index_ops()
-  func @gpu_index_ops()
+  builtin.func @gpu_index_ops()
      -> (index, index, index, index, index, index,
          index, index, index, index, index, index) {
    // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
@ -61,7 +61,7 @@ gpu.module @test_module {
 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_index_comp
  // CHECK32-LABEL: func @gpu_index_comp
-  func @gpu_index_comp(%idx : index) -> index {
+  builtin.func @gpu_index_comp(%idx : index) -> index {
    // CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
    // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
    %0 = addi %idx, %idx : index
@ -75,7 +75,7 @@ gpu.module @test_module {

 gpu.module @test_module {
  // CHECK-LABEL: func @gpu_sync()
-  func @gpu_sync() {
+  builtin.func @gpu_sync() {
    // CHECK: rocdl.barrier
    gpu.barrier
    std.return
@ -88,7 +88,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_fabs_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_fabs_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_fabs
-  func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = std.absf %arg_f32 : f32
    // CHECK: llvm.call @__ocml_fabs_f32(%{{.*}}) : (f32) -> f32
    %result64 = std.absf %arg_f64 : f64
@ -103,7 +103,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_ceil_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_ceil_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_ceil
-  func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = std.ceilf %arg_f32 : f32
    // CHECK: llvm.call @__ocml_ceil_f32(%{{.*}}) : (f32) -> f32
    %result64 = std.ceilf %arg_f64 : f64
@ -118,7 +118,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_floor_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_floor_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_floor
-  func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = std.floorf %arg_f32 : f32
    // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (f32) -> f32
    %result64 = std.floorf %arg_f64 : f64
@ -133,7 +133,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_cos_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_cos_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_cos
-  func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.cos %arg_f32 : f32
    // CHECK: llvm.call @__ocml_cos_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.cos %arg_f64 : f64
@ -148,7 +148,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_exp
-  func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %exp_f32 = math.exp %arg_f32 : f32
    // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
    %result32 = math.exp %exp_f32 : f32
@ -165,7 +165,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_exp2_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_exp2_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_exp2
-  func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %exp2_f32 = math.exp2 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_exp2_f32(%{{.*}}) : (f32) -> f32
    %result32 = math.exp2 %exp2_f32 : f32
@ -185,7 +185,7 @@ gpu.module @test_module {
    // CHECK: llvm.func @__ocml_exp_f32(f32) -> f32
    // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
    // CHECK-LABEL: func @gpu_exp
-    func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+    builtin.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
      %exp_f32 = math.exp %arg_f32 : f32
      // CHECK: llvm.call @__ocml_exp_f32(%{{.*}}) : (f32) -> f32
      %result32 = math.exp %exp_f32 : f32
@ -204,7 +204,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_expm1_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_expm1_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_expm1
-  func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %expm1_f32 = math.expm1 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_expm1_f32(%{{.*}}) : (f32) -> f32
    %result32 = math.expm1 %expm1_f32 : f32
@ -221,7 +221,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log
-  func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log %arg_f64 : f64
@ -236,7 +236,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log1p_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log1p_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log1p
-  func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log1p %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log1p_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log1p %arg_f64 : f64
@ -251,7 +251,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log10_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log10_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log10
-  func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log10 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log10_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log10 %arg_f64 : f64
@ -266,7 +266,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_log2_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_log2_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_log2
-  func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.log2 %arg_f32 : f32
    // CHECK: llvm.call @__ocml_log2_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.log2 %arg_f64 : f64
@ -281,7 +281,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_rsqrt_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_rsqrt_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_rsqrt
-  func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_rsqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.rsqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -301,7 +301,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_sqrt_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_sqrt_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_sqrt
-  func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
+  builtin.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64)
      -> (f16, f32, f64) {
    %result16 = math.sqrt %arg_f16 : f16
    // CHECK: llvm.fpext %{{.*}} : f16 to f32
@ -321,7 +321,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_tanh_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_tanh_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_tanh
-  func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.tanh %arg_f32 : f32
    // CHECK: llvm.call @__ocml_tanh_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.tanh %arg_f64 : f64
@ -336,7 +336,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_atan_f32(f32) -> f32
  // CHECK: llvm.func @__ocml_atan_f64(f64) -> f64
  // CHECK-LABEL: func @gpu_atan
-  func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.atan %arg_f32 : f32
    // CHECK: llvm.call @__ocml_atan_f32(%{{.*}}) : (f32) -> f32
    %result64 = math.atan %arg_f64 : f64
@ -351,7 +351,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_atan2_f32(f32, f32) -> f32
  // CHECK: llvm.func @__ocml_atan2_f64(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_atan2
-  func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_atan2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.atan2 %arg_f32, %arg_f32 : f32
    // CHECK: llvm.call @__ocml_atan2_f32(%{{.*}}) : (f32, f32) -> f32
    %result64 = math.atan2 %arg_f64, %arg_f64 : f64
@ -366,7 +366,7 @@ gpu.module @test_module {
  // CHECK: llvm.func @__ocml_pow_f32(f32, f32) -> f32
  // CHECK: llvm.func @__ocml_pow_f64(f64, f64) -> f64
  // CHECK-LABEL: func @gpu_pow
-  func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+  builtin.func @gpu_pow(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
    %result32 = math.powf %arg_f32, %arg_f32 : f32
    // CHECK: llvm.call @__ocml_pow_f32(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
    %result64 = math.powf %arg_f64, %arg_f64 : f64
--- a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
+++ b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir
@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -convert-vector-to-rocdl | FileCheck %s

 gpu.module @test_read{
-func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
+builtin.func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
  %f0 = constant 0.0: f32
  %f = vector.transfer_read %A[%base], %f0
      {permutation_map = affine_map<(d0) -> (d0)>} :
@ -11,7 +11,7 @@ func @transfer_readx2(%A : memref<?xf32>, %base: index) -> vector<2xf32> {
 // CHECK-LABEL: @transfer_readx2
 // CHECK: rocdl.buffer.load {{.*}} vector<2xf32>

-func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
+builtin.func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
  %f0 = constant 0.0: f32
  %f = vector.transfer_read %A[%base], %f0
      {permutation_map = affine_map<(d0) -> (d0)>} :
@ -21,7 +21,7 @@ func @transfer_readx4(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
 // CHECK-LABEL: @transfer_readx4
 // CHECK: rocdl.buffer.load {{.*}} vector<4xf32>

-func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
+builtin.func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf32> {
  %f0 = constant 0.0: f32
  %f = vector.transfer_read %A[%base], %f0
      {permutation_map = affine_map<(d0) -> (d0)>} :
@ -36,7 +36,7 @@ func @transfer_read_dwordConfig(%A : memref<?xf32>, %base: index) -> vector<4xf3
 }

 gpu.module @test_write{
-func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
+builtin.func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
  vector.transfer_write %B, %A[%base]
      {permutation_map = affine_map<(d0) -> (d0)>} :
    vector<2xf32>, memref<?xf32>
@ -45,7 +45,7 @@ func @transfer_writex2(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
 // CHECK-LABEL: @transfer_writex2
 // CHECK: rocdl.buffer.store {{.*}} vector<2xf32>

-func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
+builtin.func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
  vector.transfer_write %B, %A[%base]
      {permutation_map = affine_map<(d0) -> (d0)>} :
    vector<4xf32>, memref<?xf32>
@ -54,7 +54,7 @@ func @transfer_writex4(%A : memref<?xf32>, %B : vector<4xf32>, %base: index) {
 // CHECK-LABEL: @transfer_writex4
 // CHECK: rocdl.buffer.store {{.*}} vector<4xf32>

-func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
+builtin.func @transfer_write_dwordConfig(%A : memref<?xf32>, %B : vector<2xf32>, %base: index) {
  vector.transfer_write %B, %A[%base]
      {permutation_map = affine_map<(d0) -> (d0)>} :
    vector<2xf32>, memref<?xf32>
--- a/mlir/test/Dialect/Builtin/canonicalize.mlir
+++ b/mlir/test/Dialect/Builtin/canonicalize.mlir
@ -10,8 +10,8 @@
 func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
  // CHECK-NOT: unrealized_conversion_cast
  // CHECK: return %[[ARG0]], %[[ARG1]]
-  %inputs:2 = unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
-  %outputs:2 = unrealized_conversion_cast %inputs#0, %inputs#1 : i64, i64 to i32, i32
+  %inputs:2 = builtin.unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
+  %outputs:2 = builtin.unrealized_conversion_cast %inputs#0, %inputs#1 : i64, i64 to i32, i32
  return %outputs#0, %outputs#1 : i32, i32
 }

@ -19,7 +19,7 @@ func @multiple_conversion_casts(%arg0: i32, %arg1: i32) -> (i32, i32) {
 func @multiple_conversion_casts_failure(%arg0: i32, %arg1: i32, %arg2: i64) -> (i32, i32) {
  // CHECK: unrealized_conversion_cast
  // CHECK: unrealized_conversion_cast
-  %inputs:2 = unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
-  %outputs:2 = unrealized_conversion_cast %arg2, %inputs#1 : i64, i64 to i32, i32
+  %inputs:2 = builtin.unrealized_conversion_cast %arg0, %arg1 : i32, i32 to i64, i64
+  %outputs:2 = builtin.unrealized_conversion_cast %arg2, %inputs#1 : i64, i64 to i32, i32
  return %outputs#0, %outputs#1 : i32, i32
 }
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@ -782,7 +782,7 @@ func @input_stays_same(%arg0 : memref<?x1x?xf32, #map0>, %arg1 : f32, %shape: me
 // CHECK:     #[[MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, 0, d2)>
 // CHECK:     #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> ()>
 // CHECK:     #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-// CHECK:     builtin.func @input_stays_same(
+// CHECK:     func @input_stays_same(
 // CHECK-SAME:  %[[ARG0:.*]]: memref<?x1x?xf32, #[[MAP0]]>,
 // CHECK-SAME:  %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref<?x1x?x1x?xf32>)
 // CHECK-SAME   -> memref<?x1x?x1x?xf32> {
--- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
@ -20,7 +20,7 @@ func @control_producer_reshape_fusion(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<
 }
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1) -> (d0, d1)>
 //  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (d1)>
-//      CHECK: builtin.func @control_producer_reshape_fusion
+//      CHECK: func @control_producer_reshape_fusion
 // CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?xf32>
 //  CHECK-DAG:   %[[C0:.+]] = constant 0 : index
@ -54,7 +54,7 @@ func @control_consumer_reshape_fusion(%arg0 : tensor<1x?x?xf32>, %arg1 : tensor<
  return %1 : tensor<1x?x?xf32>
 }
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)
-//      CHECK: builtin.func @control_consumer_reshape_fusion
+//      CHECK: func @control_consumer_reshape_fusion
 //      CHECK:   %[[FILL:.+]] = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP]]]
 // CHECK-SAME:       outs(%{{.+}} : tensor<1x?x?xf32>)
--- a/mlir/test/Dialect/Shape/invalid.mlir
+++ b/mlir/test/Dialect/Shape/invalid.mlir
@ -172,7 +172,7 @@ module attributes {shape.lib = [@shape_lib, "shape_lib"]} {

 shape.function_library @shape_lib {
  // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
    %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
    return %0 : !shape.shape
  }
@ -192,7 +192,7 @@ module attributes {shape.lib = [@shape_lib, @shape_lib]} {

 shape.function_library @shape_lib {
  // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
    %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
    return %0 : !shape.shape
  }
@ -212,7 +212,7 @@ module attributes {shape.lib = [@shape_lib]} {

 shape.function_library @shape_lib {
  // Test shape function that returns the shape of input arg as result shape.
-  func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
+  builtin.func @same_result_shape(%arg: !shape.value_shape) -> !shape.shape {
    %0 = shape.shape_of %arg : !shape.value_shape -> !shape.shape
    return %0 : !shape.shape
  }
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@ -14,7 +14,7 @@
  iterator_types = ["parallel", "parallel", "parallel"]
 }

-// CHECK-LABEL:   builtin.func @sparse_static_dims(
+// CHECK-LABEL:   func @sparse_static_dims(
 // CHECK-SAME:                          %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> {
 // CHECK:           %[[VAL_2:.*]] = constant 20 : index
@ -52,7 +52,7 @@ func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
  return %0 : tensor<20x30x10xf32>
 }

-// CHECK-LABEL:   builtin.func @sparse_dynamic_dims(
+// CHECK-LABEL:   func @sparse_dynamic_dims(
 // CHECK-SAME:                          %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
 // CHECK:           %[[VAL_2:.*]] = constant 2 : index
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@ -16,7 +16,7 @@
  iterator_types = ["reduction", "reduction", "reduction"]
 }

-// CHECK-HIR-LABEL:   builtin.func @sparse_dynamic_dims(
+// CHECK-HIR-LABEL:   func @sparse_dynamic_dims(
 // CHECK-HIR-SAME:                                      %[[VAL_0:.*]]: tensor<?x?x?xf32,  #sparse_tensor.encoding<{{{.*}}}>>,
 // CHECK-HIR-SAME:                                      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-HIR-DAG:       %[[C0:.*]] = constant 0 : index
@ -48,7 +48,7 @@
 // CHECK-HIR:           return %[[VAL_24]] : tensor<f32>
 // CHECK-HIR:         }
 //
-// CHECK-MIR-LABEL:   builtin.func @sparse_dynamic_dims(
+// CHECK-MIR-LABEL:   func @sparse_dynamic_dims(
 // CHECK-MIR-SAME:                                      %[[VAL_0:.*]]: !llvm.ptr<i8>,
 // CHECK-MIR-SAME:                                      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-MIR-DAG:       %[[C0:.*]] = constant 0 : index
--- a/mlir/test/IR/invalid-func-op.mlir
+++ b/mlir/test/IR/invalid-func-op.mlir
@ -4,7 +4,7 @@

 func @func_op() {
  // expected-error@+1 {{expected valid '@'-identifier for symbol name}}
-  func missingsigil() -> (i1, index, f32)
+  builtin.func missingsigil() -> (i1, index, f32)
  return
 }

@ -12,7 +12,7 @@ func @func_op() {

 func @func_op() {
  // expected-error@+1 {{expected type instead of SSA identifier}}
-  func @mixed_named_arguments(f32, %a : i32) {
+  builtin.func @mixed_named_arguments(f32, %a : i32) {
    return
  }
  return
@ -22,7 +22,7 @@ func @func_op() {

 func @func_op() {
  // expected-error@+1 {{expected SSA identifier}}
-  func @mixed_named_arguments(%a : i32, f32) -> () {
+  builtin.func @mixed_named_arguments(%a : i32, f32) -> () {
    return
  }
  return
@ -32,7 +32,7 @@ func @func_op() {

 func @func_op() {
  // expected-error@+1 {{entry block must have 1 arguments to match function signature}}
-  func @mixed_named_arguments(f32) {
+  builtin.func @mixed_named_arguments(f32) {
  ^entry:
    return
  }
@ -43,7 +43,7 @@ func @func_op() {

 func @func_op() {
  // expected-error@+1 {{type of entry block argument #0('i32') must match the type of the corresponding argument in function signature('f32')}}
-  func @mixed_named_arguments(f32) {
+  builtin.func @mixed_named_arguments(f32) {
  ^entry(%arg : i32):
    return
  }
--- a/mlir/test/IR/invalid-module-op.mlir
+++ b/mlir/test/IR/invalid-module-op.mlir
@ -4,7 +4,7 @@

 func @module_op() {
  // expected-error@+1 {{Operations with a 'SymbolTable' must have exactly one block}}
-  module {
+  builtin.module {
  ^bb1:
    "test.dummy"() : () -> ()
  ^bb2:
@ -17,7 +17,7 @@ func @module_op() {

 func @module_op() {
  // expected-error@+1 {{region should have no arguments}}
-  module {
+  builtin.module {
  ^bb1(%arg: i32):
  }
  return
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@ -70,7 +70,7 @@ func @affine_apply_wrong_result_count() {

 func @unknown_custom_op() {
 ^bb0:
-  %i = crazyThing() {value = 0} : () -> index  // expected-error {{custom op 'crazyThing' is unknown}}
+  %i = test.crazyThing() {value = 0} : () -> index  // expected-error {{custom op 'test.crazyThing' is unknown}}
  return
 }

--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@ -263,7 +263,7 @@ func @for_negative_stride() {
 // -----

 func @non_operation() {
-  asd   // expected-error {{custom op 'asd' is unknown}}
+  test.asd   // expected-error {{custom op 'test.asd' is unknown}}
 }

 // -----
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@ -1307,6 +1307,28 @@ func @pretty_names() {
  return
 }

+
+// This tests the behavior of "default dialect":
+// operations like `test.default_dialect` can define a default dialect
+// used in nested region.
+// CHECK-LABEL: func @default_dialect
+func @default_dialect() {
+  test.default_dialect {
+    // The test dialect is the default in this region, the following two
+    // operations are parsed identically.
+    // CHECK-NOT: test.parse_integer_literal
+    parse_integer_literal : 5
+    // CHECK: parse_integer_literal : 6
+    test.parse_integer_literal : 6
+    // Verify that only an op prefix is stripped, not an attribute value for
+    // example.
+    // CHECK:  "test.op_with_attr"() {test.attr = "test.value"} : () -> ()
+    "test.op_with_attr"() {test.attr = "test.value"} : () -> ()
+    "test.terminator"() : ()->()
+  }
+  return
+}
+
 // CHECK-LABEL: func @unreachable_dominance_violation_ok
 func @unreachable_dominance_violation_ok() -> i1 {
 // CHECK:   [[VAL:%.*]] = constant false
--- a/mlir/test/IR/traits.mlir
+++ b/mlir/test/IR/traits.mlir
@ -506,7 +506,7 @@ func @failedHasDominanceScopeOutsideDominanceFreeScope() -> () {
 // checked for dominance
 func @illegalInsideDominanceFreeScope() -> () {
  test.graph_region {
-    func @test() -> i1 {
+    builtin.func @test() -> i1 {
    ^bb1:
      // expected-error @+1 {{operand #0 does not dominate this use}}
      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
@ -525,7 +525,7 @@ func @illegalInsideDominanceFreeScope() -> () {
 // checked for dominance
 func @illegalCDFGInsideDominanceFreeScope() -> () {
  test.graph_region {
-    func @test() -> i1 {
+    builtin.func @test() -> i1 {
    ^bb1:
      // expected-error @+1 {{operand #0 does not dominate this use}}
      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
--- a/mlir/test/Transforms/canonicalize-dce.mlir
+++ b/mlir/test/Transforms/canonicalize-dce.mlir
@ -82,7 +82,7 @@ func @f(%arg0: f32, %pred: i1) {
 // CHECK-NEXT:     return

 func @f(%arg0: f32) {
-  func @g(%arg1: f32) {
+  builtin.func @g(%arg1: f32) {
    %0 = "std.addf"(%arg1, %arg1) : (f32, f32) -> f32
    return
  }
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@ -412,7 +412,7 @@ func @write_only_alloca_fold(%v: f32) {
 // CHECK-LABEL: func @dead_block_elim
 func @dead_block_elim() {
  // CHECK-NOT: ^bb
-  func @nested() {
+  builtin.func @nested() {
    return

  ^bb1:
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@ -746,7 +746,7 @@ func @fold_rank_memref(%arg0 : memref<?x?xf32>) -> (index) {
 func @nested_isolated_region() {
  // CHECK-NEXT: func @isolated_op
  // CHECK-NEXT: constant 2
-  func @isolated_op() {
+  builtin.func @isolated_op() {
    %0 = constant 1 : i32
    %2 = addi %0, %0 : i32
    "foo.yield"(%2) : (i32) -> ()
--- a/mlir/test/Transforms/cse.mlir
+++ b/mlir/test/Transforms/cse.mlir
@ -229,7 +229,7 @@ func @nested_isolated() -> i32 {
  %0 = constant 1 : i32

  // CHECK-NEXT: @nested_func
-  func @nested_func() {
+  builtin.func @nested_func() {
    // CHECK-NEXT: constant 1
    %foo = constant 1 : i32
    "foo.yield"(%foo) : (i32) -> ()
--- a/mlir/test/Transforms/test-legalizer-full.mlir
+++ b/mlir/test/Transforms/test-legalizer-full.mlir
@ -32,12 +32,12 @@ func @replace_non_root_illegal_op() {
 // Test that children of recursively legal operations are ignored.
 func @recursively_legal_invalid_op() {
  /// Operation that is statically legal.
-  module attributes {test.recursively_legal} {
+  builtin.module attributes {test.recursively_legal} {
    %ignored = "test.illegal_op_f"() : () -> (i32)
  }
  /// Operation that is dynamically legal, i.e. the function has a pattern
  /// applied to legalize the argument type before it becomes recursively legal.
-  func @dynamic_func(%arg: i64) attributes {test.recursively_legal} {
+  builtin.func @dynamic_func(%arg: i64) attributes {test.recursively_legal} {
    %ignored = "test.illegal_op_f"() : () -> (i32)
    "test.return"() : () -> ()
  }
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@ -626,7 +626,7 @@ def AttrSizedResultOp : TEST_Op<"attr_sized_results",
 // pretty printed value name.
 def StringAttrPrettyNameOp
 : TEST_Op<"string_attr_pretty_name",
-           [DeclareOpInterfaceMethods<OpAsmOpInterface>]> {
+           [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
  let arguments = (ins StrArrayAttr:$names);
  let results = (outs Variadic<I32>:$r);

@ -634,6 +634,20 @@ def StringAttrPrettyNameOp
  let parser = [{ return ::parse$cppClass(parser, result); }];
 }

+// This is used to test the OpAsmOpInterface::getDefaultDialect() feature:
+// operations nested in a region under this op will drop the "test." dialect
+// prefix.
+def DefaultDialectOp : TEST_Op<"default_dialect", [OpAsmOpInterface]> {
+ let regions = (region AnyRegion:$body);
+  let extraClassDeclaration = [{
+    static ::llvm::StringRef getDefaultDialect() {
+      return "test";
+    }
+    void getAsmResultNames(::llvm::function_ref<void(::mlir::Value, ::llvm::StringRef)> setNameFn) {}
+  }];
+  let assemblyFormat = "regions attr-dict-with-keyword";
+}
+
 //===----------------------------------------------------------------------===//
 // Test Locations
 //===----------------------------------------------------------------------===//