From 50aeeed8a2dd68d2ead2a5337260e21e3d098764 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Tue, 4 Feb 2020 20:58:10 -0500 Subject: [PATCH] [mlir][spirv] Use spv.entry_point_abi in GPU to SPIR-V conversions We have spv.entry_point_abi for specifying the local workgroup size. It should be decorated onto input gpu.func ops to drive the SPIR-V CodeGen to generate the proper SPIR-V module execution mode. Compared to using command-line options for specifying the configuration, using attributes also has the benefits that 1) we are now able to use different local workgroup for different entry points and 2) the tests contains the configuration directly. Differential Revision: https://reviews.llvm.org/D74012 --- .../Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h | 8 ++-- .../GPUToSPIRV/ConvertGPUToSPIRVPass.h | 7 ++-- .../include/mlir/Dialect/SPIRV/TargetAndABI.h | 14 ++++--- .../GPUToSPIRV/ConvertGPUToSPIRV.cpp | 42 +++++++------------ .../GPUToSPIRV/ConvertGPUToSPIRVPass.cpp | 31 ++++---------- mlir/lib/Dialect/SPIRV/TargetAndABI.cpp | 33 +++++++++------ mlir/test/Conversion/GPUToSPIRV/builtins.mlir | 27 ++++++------ mlir/test/Conversion/GPUToSPIRV/if.mlir | 4 +- .../Conversion/GPUToSPIRV/load-store.mlir | 2 +- mlir/test/Conversion/GPUToSPIRV/loop.mlir | 2 +- mlir/test/Conversion/GPUToSPIRV/simple.mlir | 33 ++++++++++++--- 11 files changed, 105 insertions(+), 98 deletions(-) diff --git a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h index 8b5a0de76962..8bdb228c9ccc 100644 --- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h +++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h @@ -17,13 +17,13 @@ namespace mlir { class SPIRVTypeConverter; + /// Appends to a pattern list additional patterns for translating GPU Ops to -/// SPIR-V ops. Needs the workgroup size as input since SPIR-V/Vulkan requires -/// the workgroup size to be statically specified. +/// SPIR-V ops. For a gpu.func to be converted, it should have a +/// spv.entry_point_abi attribute. void populateGPUToSPIRVPatterns(MLIRContext *context, SPIRVTypeConverter &typeConverter, - OwningRewritePatternList &patterns, - ArrayRef workGroupSize); + OwningRewritePatternList &patterns); } // namespace mlir #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H diff --git a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h index c2a6bcf52b5c..cf3246a55114 100644 --- a/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h +++ b/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h @@ -22,10 +22,9 @@ namespace mlir { class ModuleOp; template class OpPassBase; -/// Pass to convert GPU Ops to SPIR-V ops. Needs the workgroup size as input -/// since SPIR-V/Vulkan requires the workgroup size to be statically specified. -std::unique_ptr> -createConvertGPUToSPIRVPass(ArrayRef workGroupSize); +/// Pass to convert GPU Ops to SPIR-V ops. For a gpu.func to be converted, it +/// should have a spv.entry_point_abi attribute. +std::unique_ptr> createConvertGPUToSPIRVPass(); } // namespace mlir #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H diff --git a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h index 74fb834f1325..073e0f509cba 100644 --- a/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h +++ b/mlir/include/mlir/Dialect/SPIRV/TargetAndABI.h @@ -112,6 +112,15 @@ StringRef getEntryPointABIAttrName(); EntryPointABIAttr getEntryPointABIAttr(ArrayRef localSize, MLIRContext *context); +/// Queries the entry point ABI on the nearest function-like op containing the +/// given `op`. Returns null attribute if not found. +EntryPointABIAttr lookupEntryPointABI(Operation *op); + +/// Queries the local workgroup size from entry point ABI on the nearest +/// function-like op containing the given `op`. Returns null attribute if not +/// found. +DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op); + /// Returns a default resource limits attribute that uses numbers from /// "Table 46. Required Limits" of the Vulkan spec. ResourceLimitsAttr getDefaultResourceLimits(MLIRContext *context); @@ -128,11 +137,6 @@ TargetEnvAttr getDefaultTargetEnv(MLIRContext *context); /// extensions) if not provided. TargetEnvAttr lookupTargetEnvOrDefault(Operation *op); -/// Queries the local workgroup size from entry point ABI on the nearest -/// function-like op containing the given `op`. Returns null attribute if not -/// found. -DenseIntElementsAttr lookupLocalWorkGroupSize(Operation *op); - } // namespace spirv } // namespace mlir diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp index 6a5da3f4e38a..fd33e4cd85c3 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp @@ -82,16 +82,9 @@ public: }; /// Pattern to convert a kernel function in GPU dialect within a spv.module. -class KernelFnConversion final : public SPIRVOpLowering { +class GPUFuncOpConversion final : public SPIRVOpLowering { public: - KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter, - ArrayRef workGroupSize, - PatternBenefit benefit = 1) - : SPIRVOpLowering(context, converter, benefit) { - auto config = workGroupSize.take_front(3); - workGroupSizeAsInt32.assign(config.begin(), config.end()); - workGroupSizeAsInt32.resize(3, 1); - } + using SPIRVOpLowering::SPIRVOpLowering; PatternMatchResult matchAndRewrite(gpu::GPUFuncOp funcOp, ArrayRef operands, @@ -352,13 +345,11 @@ lowerAsEntryFunction(gpu::GPUFuncOp funcOp, SPIRVTypeConverter &typeConverter, return newFuncOp; } -PatternMatchResult -KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp, - ArrayRef operands, - ConversionPatternRewriter &rewriter) const { - if (!gpu::GPUDialect::isKernel(funcOp)) { +PatternMatchResult GPUFuncOpConversion::matchAndRewrite( + gpu::GPUFuncOp funcOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + if (!gpu::GPUDialect::isKernel(funcOp)) return matchFailure(); - } SmallVector argABI; for (auto argNum : llvm::seq(0, funcOp.getNumArguments())) { @@ -366,14 +357,15 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp, 0, argNum, spirv::StorageClass::StorageBuffer, rewriter.getContext())); } - auto context = rewriter.getContext(); - auto entryPointAttr = - spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context); - FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter, - entryPointAttr, argABI); - if (!newFuncOp) { + auto entryPointAttr = spirv::lookupEntryPointABI(funcOp); + if (!entryPointAttr) { + funcOp.emitRemark("match failure: missing 'spv.entry_point_abi' attribute"); return matchFailure(); } + FuncOp newFuncOp = lowerAsEntryFunction(funcOp, typeConverter, rewriter, + entryPointAttr, argABI); + if (!newFuncOp) + return matchFailure(); newFuncOp.removeAttr(Identifier::get(gpu::GPUDialect::getKernelFuncAttrName(), rewriter.getContext())); return matchSuccess(); @@ -429,13 +421,11 @@ namespace { void mlir::populateGPUToSPIRVPatterns(MLIRContext *context, SPIRVTypeConverter &typeConverter, - OwningRewritePatternList &patterns, - ArrayRef workGroupSize) { + OwningRewritePatternList &patterns) { populateWithGenerated(context, &patterns); - patterns.insert(context, typeConverter, workGroupSize); patterns.insert< - ForOpConversion, GPUModuleConversion, GPUReturnOpConversion, - IfOpConversion, + ForOpConversion, GPUFuncOpConversion, GPUModuleConversion, + GPUReturnOpConversion, IfOpConversion, LaunchConfigConversion, LaunchConfigConversion, LaunchConfigConversion { -public: - GPUToSPIRVPass() = default; - GPUToSPIRVPass(const GPUToSPIRVPass &) {} - GPUToSPIRVPass(ArrayRef workGroupSize) { - this->workGroupSize = workGroupSize; - } - +struct GPUToSPIRVPass : public ModulePass { void runOnModule() override; - -private: - /// Command line option to specify the workgroup size. - ListOption workGroupSize{ - *this, "workgroup-size", - llvm::cl::desc( - "Workgroup Sizes in the SPIR-V module for x, followed by y, followed " - "by z dimension of the dispatch (others will be ignored)"), - llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated}; }; } // namespace @@ -70,7 +54,7 @@ void GPUToSPIRVPass::runOnModule() { SPIRVTypeConverter typeConverter; OwningRewritePatternList patterns; - populateGPUToSPIRVPatterns(context, typeConverter, patterns, workGroupSize); + populateGPUToSPIRVPatterns(context, typeConverter, patterns); populateStandardToSPIRVPatterns(context, typeConverter, patterns); std::unique_ptr target = spirv::SPIRVConversionTarget::get( @@ -84,9 +68,8 @@ void GPUToSPIRVPass::runOnModule() { } } -std::unique_ptr> -mlir::createConvertGPUToSPIRVPass(ArrayRef workGroupSize) { - return std::make_unique(workGroupSize); +std::unique_ptr> mlir::createConvertGPUToSPIRVPass() { + return std::make_unique(); } static PassRegistration diff --git a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp index 28e984128b44..fbb8a93956d1 100644 --- a/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp +++ b/mlir/lib/Dialect/SPIRV/TargetAndABI.cpp @@ -158,6 +158,26 @@ spirv::getEntryPointABIAttr(ArrayRef localSize, MLIRContext *context) { context); } +spirv::EntryPointABIAttr spirv::lookupEntryPointABI(Operation *op) { + while (op && !op->hasTrait()) + op = op->getParentOp(); + if (!op) + return {}; + + if (auto attr = op->getAttrOfType( + spirv::getEntryPointABIAttrName())) + return attr; + + return {}; +} + +DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { + if (auto entryPoint = spirv::lookupEntryPointABI(op)) + return entryPoint.local_size(); + + return {}; +} + spirv::ResourceLimitsAttr spirv::getDefaultResourceLimits(MLIRContext *context) { auto i32Type = IntegerType::get(32, context); @@ -187,16 +207,3 @@ spirv::TargetEnvAttr spirv::lookupTargetEnvOrDefault(Operation *op) { return attr; return getDefaultTargetEnv(op->getContext()); } - -DenseIntElementsAttr spirv::lookupLocalWorkGroupSize(Operation *op) { - while (op && !op->hasTrait()) - op = op->getParentOp(); - if (!op) - return {}; - - if (auto attr = op->getAttrOfType( - spirv::getEntryPointABIAttrName())) - return attr.local_size(); - - return {}; -} diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir index 6df86d2be56f..a3abd089d5af 100644 --- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s +// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv %s -o - | FileCheck %s module attributes {gpu.container_module} { func @builtin() { @@ -11,7 +11,7 @@ module attributes {gpu.container_module} { // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId") gpu.module @kernels { gpu.func @builtin_workgroup_id_x() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} @@ -34,7 +34,7 @@ module attributes {gpu.container_module} { // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId") gpu.module @kernels { gpu.func @builtin_workgroup_id_y() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}} @@ -57,7 +57,7 @@ module attributes {gpu.container_module} { // CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId") gpu.module @kernels { gpu.func @builtin_workgroup_id_z() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}} @@ -79,8 +79,11 @@ module attributes {gpu.container_module} { // CHECK-LABEL: spv.module "Logical" "GLSL450" gpu.module @kernels { gpu.func @builtin_workgroup_size_x() - attributes {gpu.kernel} { - // The constant value is obtained fomr the command line option above. + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 1, 1]>: vector<3xi32>}} { + // The constant value is obtained from the spv.entry_point_abi. + // Note that this ignores the workgroup size specification in gpu.launch. + // We may want to define gpu.workgroup_size and convert it to the entry + // point ABI we want here. // CHECK: spv.constant 32 : i32 %0 = "gpu.block_dim"() {dimension = "x"} : () -> index gpu.return @@ -100,8 +103,8 @@ module attributes {gpu.container_module} { // CHECK-LABEL: spv.module "Logical" "GLSL450" gpu.module @kernels { gpu.func @builtin_workgroup_size_y() - attributes {gpu.kernel} { - // The constant value is obtained fomr the command line option above. + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { + // The constant value is obtained from the spv.entry_point_abi. // CHECK: spv.constant 4 : i32 %0 = "gpu.block_dim"() {dimension = "y"} : () -> index gpu.return @@ -121,8 +124,8 @@ module attributes {gpu.container_module} { // CHECK-LABEL: spv.module "Logical" "GLSL450" gpu.module @kernels { gpu.func @builtin_workgroup_size_z() - attributes {gpu.kernel} { - // The constant value is obtained fomr the command line option above (1 is default). + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { + // The constant value is obtained from the spv.entry_point_abi. // CHECK: spv.constant 1 : i32 %0 = "gpu.block_dim"() {dimension = "z"} : () -> index gpu.return @@ -143,7 +146,7 @@ module attributes {gpu.container_module} { // CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId") gpu.module @kernels { gpu.func @builtin_local_id_x() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} @@ -166,7 +169,7 @@ module attributes {gpu.container_module} { // CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups") gpu.module @kernels { gpu.func @builtin_num_workgroups_x() - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir index 7919c13b4a50..1585c53116c5 100644 --- a/mlir/test/Conversion/GPUToSPIRV/if.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir @@ -10,7 +10,7 @@ module attributes {gpu.container_module} { gpu.module @kernels { // CHECK-LABEL: @kernel_simple_selection gpu.func @kernel_simple_selection(%arg2 : memref<10xf32>, %arg3 : i1) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { %value = constant 0.0 : f32 %i = constant 0 : index @@ -31,7 +31,7 @@ module attributes {gpu.container_module} { // CHECK-LABEL: @kernel_nested_selection gpu.func @kernel_nested_selection(%arg3 : memref<10xf32>, %arg4 : memref<10xf32>, %arg5 : i1, %arg6 : i1) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { %i = constant 0 : index %j = constant 9 : index diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir index 919c90981573..7340001bd216 100644 --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -29,7 +29,7 @@ module attributes {gpu.container_module} { // CHECK-SAME: [[ARG5:%.*]]: i32 {spv.interface_var_abi = {binding = 5 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} // CHECK-SAME: [[ARG6:%.*]]: i32 {spv.interface_var_abi = {binding = 6 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} gpu.func @load_store_kernel(%arg0: memref<12x4xf32>, %arg1: memref<12x4xf32>, %arg2: memref<12x4xf32>, %arg3: index, %arg4: index, %arg5: index, %arg6: index) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[ADDRESSWORKGROUPID:%.*]] = spv._address_of [[WORKGROUPIDVAR]] // CHECK: [[WORKGROUPID:%.*]] = spv.Load "Input" [[ADDRESSWORKGROUPID]] // CHECK: [[WORKGROUPIDX:%.*]] = spv.CompositeExtract [[WORKGROUPID]]{{\[}}0 : i32{{\]}} diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir index bd97315a2ea4..7044d5474d3c 100644 --- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir @@ -9,7 +9,7 @@ module attributes {gpu.container_module} { gpu.module @kernels { gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>) - attributes {gpu.kernel} { + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[16, 1, 1]>: vector<3xi32>}} { // CHECK: [[LB:%.*]] = spv.constant 4 : i32 %lb = constant 4 : index // CHECK: [[UB:%.*]] = spv.constant 42 : i32 diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir index cca5eb9d0b49..400ab487f875 100644 --- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir @@ -1,25 +1,46 @@ -// RUN: mlir-opt -pass-pipeline='convert-gpu-to-spirv{workgroup-size=32,4}' %s -o - | FileCheck %s +// RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s module attributes {gpu.container_module} { - gpu.module @kernels { // CHECK: spv.module "Logical" "GLSL450" { - // CHECK-LABEL: func @kernel_1 + // CHECK-LABEL: func @basic_module_structure // CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} // CHECK-SAME: {{%.*}}: !spv.ptr [0]>, StorageBuffer> {spv.interface_var_abi = {binding = 1 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}} // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>} - gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} { + gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>) + attributes {gpu.kernel, spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { // CHECK: spv.Return gpu.return } // CHECK: attributes {capabilities = ["Shader"], extensions = ["SPV_KHR_storage_buffer_storage_class"]} } - func @foo() { + func @main() { %0 = "op"() : () -> (f32) %1 = "op"() : () -> (memref<12xf32>) %cst = constant 1 : index - "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel_1", kernel_module = @kernels } + "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "basic_module_structure", kernel_module = @kernels } + : (index, index, index, index, index, index, f32, memref<12xf32>) -> () + return + } +} + +// ----- + +module attributes {gpu.container_module} { + gpu.module @kernels { + // expected-error @below {{failed to legalize operation 'gpu.func'}} + // expected-remark @below {{match failure: missing 'spv.entry_point_abi' attribute}} + gpu.func @missing_entry_point_abi(%arg0 : f32, %arg1 : memref<12xf32>) attributes {gpu.kernel} { + gpu.return + } + } + + func @main() { + %0 = "op"() : () -> (f32) + %1 = "op"() : () -> (memref<12xf32>) + %cst = constant 1 : index + "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "missing_entry_point_abi", kernel_module = @kernels } : (index, index, index, index, index, index, f32, memref<12xf32>) -> () return }