[mlir][spirv] Use spv.entry_point_abi in GPU to SPIR-V conversions

We have spv.entry_point_abi for specifying the local workgroup size. It should be decorated onto input gpu.func ops to drive the SPIR-V CodeGen to generate the proper SPIR-V module execution mode. Compared to using command-line options for specifying the configuration, using attributes also has the benefits that 1) we are now able to use different local workgroup for different entry points and 2) the tests contains the configuration directly. Differential Revision: https://reviews.llvm.org/D74012
2020-02-04 20:58:10 -05:00 · 2020-02-04 20:58:10 -05:00 · 50aeeed8a2
parent 9559834a5c
commit 50aeeed8a2
11 changed files with 105 additions and 98 deletions
--- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
+++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
@ -17,13 +17,13 @@

 namespace mlir {
 class SPIRVTypeConverter;
+
 /// Appends to a pattern list additional patterns for translating GPU Ops to
-/// SPIR-V ops. Needs the workgroup size as input since SPIR-V/Vulkan requires
-/// the workgroup size to be statically specified.
+/// SPIR-V ops. For a gpu.func to be converted, it should have a
+/// spv.entry_point_abi attribute.
 void populateGPUToSPIRVPatterns(MLIRContext *context,
                                SPIRVTypeConverter &typeConverter,
-                                OwningRewritePatternList &patterns,
-                                ArrayRef<int64_t> workGroupSize);
+                                OwningRewritePatternList &patterns);
 } // namespace mlir

 #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H
--- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
+++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
@ -22,10 +22,9 @@ namespace mlir {
 class ModuleOp;
 template <typename T> class OpPassBase;

-/// Pass to convert GPU Ops to SPIR-V ops.  Needs the workgroup size as input
-/// since SPIR-V/Vulkan requires the workgroup size to be statically specified.
-std::unique_ptr<OpPassBase<ModuleOp>>
-createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize);
+/// Pass to convert GPU Ops to SPIR-V ops. For a gpu.func to be converted, it
+/// should have a spv.entry_point_abi attribute.
+std::unique_ptr<OpPassBase<ModuleOp>> createConvertGPUToSPIRVPass();

 } // namespace mlir
 #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
--- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h
+++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h
@ -112,6 +112,15 @@ StringRef getEntryPointABIAttrName();
 EntryPointABIAttr getEntryPointABIAttr(ArrayRef<int32_t> localSize,
                                       MLIRContext *context);

+/// Queries the entry point ABI on the nearest function-like op containing the
+/// given `op`. Returns null attribute if not found.
+EntryPointABIAttr lookupEntryPointABI(Operation *op);
+
+/// Queries the local workgroup size from entry point ABI on the nearest
+/// function-like op containing the given `op`. Returns null attribute if not
+/// found.
+DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op);
+
 /// Returns a default resource limits attribute that uses numbers from
 /// "Table 46. Required Limits" of the Vulkan spec.
 ResourceLimitsAttr getDefaultResourceLimits(MLIRContext *context);
@ -128,11 +137,6 @@ TargetEnvAttr getDefaultTargetEnv(MLIRContext *context);
 /// extensions) if not provided.
 TargetEnvAttr lookupTargetEnvOrDefault(Operation *op);

-/// Queries the local workgroup size from entry point ABI on the nearest
-/// function-like op containing the given `op`. Returns null attribute if not
-/// found.
-DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op);
-
 } // namespace spirv
 } // namespace mlir

--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
@ -82,16 +82,9 @@ public:
 };

 /// Pattern to convert a kernel function in GPU dialect within a spv.module.
-class KernelFnConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
+class GPUFuncOpConversion final : public SPIRVOpLowering<gpu::GPUFuncOp> {
 public:
-  KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
-                     ArrayRef<int64_t> workGroupSize,
-                     PatternBenefit benefit = 1)
-      : SPIRVOpLowering<gpu::GPUFuncOp>(context, converter, benefit) {
-    auto config = workGroupSize.take_front(3);
-    workGroupSizeAsInt32.assign(config.begin(), config.end());
-    workGroupSizeAsInt32.resize(3, 1);
-  }
+  using SPIRVOpLowering<gpu::GPUFuncOp>::SPIRVOpLowering;

  PatternMatchResult
  matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef<Value> operands,
@ -352,13 +345,11 @@ lowerAsEntryFunction(gpu::GPUFuncOp funcOp, SPIRVTypeConverter &typeConverter,
  return newFuncOp;
 }

-PatternMatchResult
-KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
-                                    ArrayRef<Value> operands,
-                                    ConversionPatternRewriter &rewriter) const {
-  if (!gpu::GPUDialect::isKernel(funcOp)) {
+PatternMatchResult GPUFuncOpConversion::matchAndRewrite(
+    gpu::GPUFuncOp funcOp, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  if (!gpu::GPUDialect::isKernel(funcOp))
    return matchFailure();
-  }

  SmallVector<spirv::InterfaceVarABIAttr, 4> argABI;
  for (auto argNum : llvm::seq<unsigned>(0, funcOp.getNumArguments())) {
@ -366,14 +357,15 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
        0, argNum, spirv::StorageClass::StorageBuffer, rewriter.getContext()));
  }

-  auto context = rewriter.getContext();
-  auto entryPointAttr =
-      spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
-  FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter,
-                                          entryPointAttr, argABI);
-  if (!newFuncOp) {
+  auto entryPointAttr = spirv::lookupEntryPointABI(funcOp);
+  if (!entryPointAttr) {
+    funcOp.emitRemark("match failure: missing 'spv.entry_point_abi' attribute");
    return matchFailure();
  }
+  FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter,
+                                          entryPointAttr, argABI);
+  if (!newFuncOp)
+    return matchFailure();
  newFuncOp.removeAttr(Identifier::get(gpu::GPUDialect::getKernelFuncAttrName(),
                                       rewriter.getContext()));
  return matchSuccess();
@ -429,13 +421,11 @@ namespace {

 void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
                                      SPIRVTypeConverter &typeConverter,
-                                      OwningRewritePatternList &patterns,
-                                      ArrayRef<int64_t> workGroupSize) {
+                                      OwningRewritePatternList &patterns) {
  populateWithGenerated(context, &patterns);
-  patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
  patterns.insert<
-      ForOpConversion, GPUModuleConversion, GPUReturnOpConversion,
-      IfOpConversion,
+      ForOpConversion, GPUFuncOpConversion, GPUModuleConversion,
+      GPUReturnOpConversion, IfOpConversion,
      LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
      LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
      LaunchConfigConversion<gpu::ThreadIdOp,
--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
@ -24,33 +24,17 @@
 using namespace mlir;

 namespace {
-/// Pass to lower GPU Dialect to SPIR-V. The pass only converts those functions
-/// that have the "gpu.kernel" attribute, i.e. those functions that are
-/// referenced in gpu::LaunchKernelOp operations. For each such function
+/// Pass to lower GPU Dialect to SPIR-V. The pass only converts the gpu.func ops
+/// inside gpu.module ops. i.e., the function that are referenced in
+/// gpu.launch_func ops. For each such function
 ///
 /// 1) Create a spirv::ModuleOp, and clone the function into spirv::ModuleOp
 /// (the original function is still needed by the gpu::LaunchKernelOp, so cannot
 /// replace it).
 ///
 /// 2) Lower the body of the spirv::ModuleOp.
-class GPUToSPIRVPass : public ModulePass<GPUToSPIRVPass> {
-public:
-  GPUToSPIRVPass() = default;
-  GPUToSPIRVPass(const GPUToSPIRVPass &) {}
-  GPUToSPIRVPass(ArrayRef<int64_t> workGroupSize) {
-    this->workGroupSize = workGroupSize;
-  }
-
+struct GPUToSPIRVPass : public ModulePass<GPUToSPIRVPass> {
  void runOnModule() override;
-
-private:
-  /// Command line option to specify the workgroup size.
-  ListOption<int64_t> workGroupSize{
-      *this, "workgroup-size",
-      llvm::cl::desc(
-          "Workgroup Sizes in the SPIR-V module for x, followed by y, followed "
-          "by z dimension of the dispatch (others will be ignored)"),
-      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
 };
 } // namespace

@ -70,7 +54,7 @@ void GPUToSPIRVPass::runOnModule() {

  SPIRVTypeConverter typeConverter;
  OwningRewritePatternList patterns;
-  populateGPUToSPIRVPatterns(context, typeConverter, patterns, workGroupSize);
+  populateGPUToSPIRVPatterns(context, typeConverter, patterns);
  populateStandardToSPIRVPatterns(context, typeConverter, patterns);

  std::unique_ptr<ConversionTarget> target = spirv::SPIRVConversionTarget::get(
@ -84,9 +68,8 @@ void GPUToSPIRVPass::runOnModule() {
  }
 }

-std::unique_ptr<OpPassBase<ModuleOp>>
-mlir::createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize) {
-  return std::make_unique<GPUToSPIRVPass>(workGroupSize);
+std::unique_ptr<OpPassBase<ModuleOp>> mlir::createConvertGPUToSPIRVPass() {
+  return std::make_unique<GPUToSPIRVPass>();
 }

 static PassRegistration<GPUToSPIRVPass>
--- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp
+++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp
@ -158,6 +158,26 @@ spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize, MLIRContext *context) {
      context);
 }

+spirv::EntryPointABIAttr spirv::lookupEntryPointABI(Operation *op) {
+  while (op && !op->hasTrait<OpTrait::FunctionLike>())
+    op = op->getParentOp();
+  if (!op)
+    return {};
+
+  if (auto attr = op->getAttrOfType<spirv::EntryPointABIAttr>(
+          spirv::getEntryPointABIAttrName()))
+    return attr;
+
+  return {};
+}
+
+DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) {
+  if (auto entryPoint = spirv::lookupEntryPointABI(op))
+    return entryPoint.local_size();
+
+  return {};
+}
+
 spirv::ResourceLimitsAttr
 spirv::getDefaultResourceLimits(MLIRContext *context) {
  auto i32Type = IntegerType::get(32, context);
@ -187,16 +207,3 @@ spirv::TargetEnvAttr spirv::lookupTargetEnvOrDefault(Operation *op) {
    return attr;
  return getDefaultTargetEnv(op->getContext());
 }
-
-DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) {
-  while (op && !op->hasTrait<OpTrait::FunctionLike>())
-    op = op->getParentOp();
-  if (!op)
-    return {};
-
-  if (auto attr = op->getAttrOfType<spirv::EntryPointABIAttr>(
-          spirv::getEntryPointABIAttrName()))
-    return attr.local_size();
-
-  return {};
-}
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s
+// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv %s -o - | FileCheck %s

 module attributes {gpu.container_module} {
  func @builtin() {
@ -11,7 +11,7 @@ module attributes {gpu.container_module} {
  // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
  gpu.module @kernels {
    gpu.func @builtin_workgroup_id_x()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
@ -34,7 +34,7 @@ module attributes {gpu.container_module} {
  // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
  gpu.module @kernels {
    gpu.func @builtin_workgroup_id_y()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
@ -57,7 +57,7 @@ module attributes {gpu.container_module} {
  // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
  gpu.module @kernels {
    gpu.func @builtin_workgroup_id_z()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
@ -79,8 +79,11 @@ module attributes {gpu.container_module} {
  // CHECK-LABEL:  spv.module "Logical" "GLSL450"
  gpu.module @kernels {
    gpu.func @builtin_workgroup_size_x()
-      attributes {gpu.kernel} {
-      // The constant value is obtained fomr the command line option above.
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>}} {
+      // The constant value is obtained from the spv.entry_point_abi.
+      // Note that this ignores the workgroup size specification in gpu.launch.
+      // We may want to define gpu.workgroup_size and convert it to the entry
+      // point ABI we want here.
      // CHECK: spv.constant 32 : i32
      %0 = "gpu.block_dim"() {dimension = "x"} : () -> index
      gpu.return
@ -100,8 +103,8 @@ module attributes {gpu.container_module} {
  // CHECK-LABEL:  spv.module "Logical" "GLSL450"
  gpu.module @kernels {
    gpu.func @builtin_workgroup_size_y()
-      attributes {gpu.kernel} {
-      // The constant value is obtained fomr the command line option above.
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+      // The constant value is obtained from the spv.entry_point_abi.
      // CHECK: spv.constant 4 : i32
      %0 = "gpu.block_dim"() {dimension = "y"} : () -> index
      gpu.return
@ -121,8 +124,8 @@ module attributes {gpu.container_module} {
  // CHECK-LABEL:  spv.module "Logical" "GLSL450"
  gpu.module @kernels {
    gpu.func @builtin_workgroup_size_z()
-      attributes {gpu.kernel} {
-      // The constant value is obtained fomr the command line option above (1 is default).
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
+      // The constant value is obtained from the spv.entry_point_abi.
      // CHECK: spv.constant 1 : i32
      %0 = "gpu.block_dim"() {dimension = "z"} : () -> index
      gpu.return
@ -143,7 +146,7 @@ module attributes {gpu.container_module} {
  // CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
  gpu.module @kernels {
    gpu.func @builtin_local_id_x()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
@ -166,7 +169,7 @@ module attributes {gpu.container_module} {
  // CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
  gpu.module @kernels {
    gpu.func @builtin_num_workgroups_x()
-      attributes {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@ -10,7 +10,7 @@ module attributes {gpu.container_module} {
  gpu.module @kernels {
    // CHECK-LABEL: @kernel_simple_selection
    gpu.func @kernel_simple_selection(%arg2 : memref<10xf32>, %arg3 : i1)
-    attributes {gpu.kernel} {
+    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      %value = constant 0.0 : f32
      %i = constant 0 : index

@ -31,7 +31,7 @@ module attributes {gpu.container_module} {

    // CHECK-LABEL: @kernel_nested_selection
    gpu.func @kernel_nested_selection(%arg3 : memref<10xf32>, %arg4 : memref<10xf32>, %arg5 : i1, %arg6 : i1)
-    attributes {gpu.kernel} {
+    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      %i = constant 0 : index
      %j = constant 9 : index

--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@ -29,7 +29,7 @@ module attributes {gpu.container_module} {
    // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = {binding = 5 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
    // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = {binding = 6 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
    gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index)
-      attributes  {gpu.kernel} {
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]]
      // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]]
      // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}}
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@ -9,7 +9,7 @@ module attributes {gpu.container_module} {

  gpu.module @kernels {
    gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
-    attributes {gpu.kernel} {
+    attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} {
      // CHECK: [[LB:%.*]] = spv.constant 4 : i32
      %lb = constant 4 : index
      // CHECK: [[UB:%.*]] = spv.constant 42 : i32
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@ -1,25 +1,46 @@
-// RUN: mlir-opt -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s
+// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s

 module attributes {gpu.container_module} {
-
  gpu.module @kernels {
    // CHECK:       spv.module "Logical" "GLSL450" {
-    // CHECK-LABEL: func @kernel_1
+    // CHECK-LABEL: func @basic_module_structure
    // CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
    // CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32 [4]> [0]>, StorageBuffer> {spv.interface_var_abi = {binding = 1 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
    // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
-    gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
+    gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>)
+      attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
      // CHECK: spv.Return
      gpu.return
    }
    // CHECK: attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]}
  }

-  func @foo() {
+  func @main() {
    %0 = "op"() : () -> (f32)
    %1 = "op"() : () -> (memref<12xf32>)
    %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel_1", kernel_module = @kernels }
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "basic_module_structure", kernel_module = @kernels }
+        : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
+    return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  gpu.module @kernels {
+    // expected-error @below {{failed to legalize operation 'gpu.func'}}
+    // expected-remark @below {{match failure: missing 'spv.entry_point_abi' attribute}}
+    gpu.func @missing_entry_point_abi(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} {
+      gpu.return
+    }
+  }
+
+  func @main() {
+    %0 = "op"() : () -> (f32)
+    %1 = "op"() : () -> (memref<12xf32>)
+    %cst = constant 1 : index
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "missing_entry_point_abi", kernel_module = @kernels }
        : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
    return
  }