[mlir][spirv] Make EntryPointABIAttr.local_size optional

* It doesn't required by OpenCL/Intel Level Zero and can be set programmatically. * Add GPU to spirv lowering in case when attribute is not present. * Set higher benefit to WorkGroupSizeConversion pattern so it will always try to lower first from the attribute. Differential Revision: https://reviews.llvm.org/D120399
2022-02-23 14:12:07 +03:00 · 2022-02-23 14:12:07 +03:00 · 4df9544108
parent bd0bddc1ea
commit 4df9544108
5 changed files with 89 additions and 6 deletions
--- a/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.td
@ -27,7 +27,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVBase.td"
 // points in the generated SPIR-V module:
 // 1) WorkGroup Size.
 def SPV_EntryPointABIAttr : StructAttr<"EntryPointABIAttr", SPIRV_Dialect, [
-    StructFieldAttr<"local_size", I32ElementsAttr>
+    StructFieldAttr<"local_size", OptionalAttr<I32ElementsAttr>>
 ]>;

 def SPV_ExtensionArrayAttr : TypedArrayAttrBase<
--- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp
@ -55,7 +55,8 @@ public:
 /// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
 class WorkGroupSizeConversion : public OpConversionPattern<gpu::BlockDimOp> {
 public:
-  using OpConversionPattern<gpu::BlockDimOp>::OpConversionPattern;
+  WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context)
+      : OpConversionPattern(typeConverter, context, /*benefit*/ 10) {}

  LogicalResult
  matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
@ -159,6 +160,9 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite(
    gpu::BlockDimOp op, OpAdaptor adaptor,
    ConversionPatternRewriter &rewriter) const {
  auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
+  if (!workGroupSizeAttr)
+    return failure();
+
  auto val = workGroupSizeAttr
                 .getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
  auto convertedType =
@ -366,6 +370,7 @@ void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
      GPUModuleEndConversion, GPUReturnOpConversion,
      LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
      LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
+      LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
      LaunchConfigConversion<gpu::ThreadIdOp,
                             spirv::BuiltIn::LocalInvocationId>,
      SingleDimLaunchConfigConversion<gpu::SubgroupIdOp,
--- a/mlir/lib/Dialect/SPIRV/IR/TargetAndABI.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/TargetAndABI.cpp
@ -120,6 +120,9 @@ StringRef spirv::getEntryPointABIAttrName() { return "spv.entry_point_abi"; }

 spirv::EntryPointABIAttr
 spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize, MLIRContext *context) {
+  if (localSize.empty())
+    return spirv::EntryPointABIAttr::get(nullptr, context);
+
  assert(localSize.size() == 3);
  return spirv::EntryPointABIAttr::get(
      DenseElementsAttr::get<int32_t>(
--- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
@ -136,10 +136,13 @@ static LogicalResult lowerEntryPointABIAttr(spirv::FuncOp funcOp,

  // Specifies the spv.ExecutionModeOp.
  auto localSizeAttr = entryPointAttr.local_size();
-  SmallVector<int32_t, 3> localSize(localSizeAttr.getValues<int32_t>());
-  builder.create<spirv::ExecutionModeOp>(
-      funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
-  funcOp->removeAttr(entryPointAttrName);
+  if (localSizeAttr) {
+    auto values = localSizeAttr.getValues<int32_t>();
+    SmallVector<int32_t, 3> localSize(values);
+    builder.create<spirv::ExecutionModeOp>(
+        funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
+    funcOp->removeAttr(entryPointAttrName);
+  }
  return success();
 }

--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@ -223,6 +223,78 @@ module attributes {gpu.container_module} {

 // -----

+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = arith.constant 1 : index
+    gpu.launch_func @kernels::@builtin_workgroup_size_x
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+    return
+  }
+
+  // CHECK-LABEL:  spv.module @{{.*}}
+  // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_x() kernel
+      attributes {spv.entry_point_abi = {}} {
+      // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
+      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
+      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
+      %0 = gpu.block_dim x
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = arith.constant 1 : index
+    gpu.launch_func @kernels::@builtin_workgroup_size_y
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+    return
+  }
+
+  // CHECK-LABEL:  spv.module @{{.*}}
+  // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_y() kernel
+      attributes {spv.entry_point_abi = {}} {
+      // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
+      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
+      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
+      %0 = gpu.block_dim y
+      gpu.return
+    }
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @builtin() {
+    %c0 = arith.constant 1 : index
+    gpu.launch_func @kernels::@builtin_workgroup_size_z
+        blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
+    return
+  }
+
+  // CHECK-LABEL:  spv.module @{{.*}}
+  // CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
+  gpu.module @kernels {
+    gpu.func @builtin_workgroup_size_z() kernel
+      attributes {spv.entry_point_abi = {}} {
+      // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
+      // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
+      // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
+      %0 = gpu.block_dim z
+      gpu.return
+    }
+  }
+}
+
+// -----
+
 module attributes {gpu.container_module} {
  // CHECK-LABEL:  spv.module @{{.*}} Logical GLSL450
  // CHECK: spv.GlobalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize")