[mlir][spirv] Make EntryPointABIAttr.local_size optional

* It doesn't required by OpenCL/Intel Level Zero and can be set programmatically.
* Add GPU to spirv lowering in case when attribute is not present.
* Set higher benefit to WorkGroupSizeConversion pattern so it will always try to lower first from the attribute.

Differential Revision: https://reviews.llvm.org/D120399
This commit is contained in:
Ivan Butygin 2022-02-23 14:12:07 +03:00
parent bd0bddc1ea
commit 4df9544108
5 changed files with 89 additions and 6 deletions

View File

@ -27,7 +27,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVBase.td"
// points in the generated SPIR-V module:
// 1) WorkGroup Size.
def SPV_EntryPointABIAttr : StructAttr<"EntryPointABIAttr", SPIRV_Dialect, [
StructFieldAttr<"local_size", I32ElementsAttr>
StructFieldAttr<"local_size", OptionalAttr<I32ElementsAttr>>
]>;
def SPV_ExtensionArrayAttr : TypedArrayAttrBase<

View File

@ -55,7 +55,8 @@ public:
/// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
class WorkGroupSizeConversion : public OpConversionPattern<gpu::BlockDimOp> {
public:
using OpConversionPattern<gpu::BlockDimOp>::OpConversionPattern;
WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context)
: OpConversionPattern(typeConverter, context, /*benefit*/ 10) {}
LogicalResult
matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
@ -159,6 +160,9 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite(
gpu::BlockDimOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
if (!workGroupSizeAttr)
return failure();
auto val = workGroupSizeAttr
.getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
auto convertedType =
@ -366,6 +370,7 @@ void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
GPUModuleEndConversion, GPUReturnOpConversion,
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
LaunchConfigConversion<gpu::ThreadIdOp,
spirv::BuiltIn::LocalInvocationId>,
SingleDimLaunchConfigConversion<gpu::SubgroupIdOp,

View File

@ -120,6 +120,9 @@ StringRef spirv::getEntryPointABIAttrName() { return "spv.entry_point_abi"; }
spirv::EntryPointABIAttr
spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize, MLIRContext *context) {
if (localSize.empty())
return spirv::EntryPointABIAttr::get(nullptr, context);
assert(localSize.size() == 3);
return spirv::EntryPointABIAttr::get(
DenseElementsAttr::get<int32_t>(

View File

@ -136,10 +136,13 @@ static LogicalResult lowerEntryPointABIAttr(spirv::FuncOp funcOp,
// Specifies the spv.ExecutionModeOp.
auto localSizeAttr = entryPointAttr.local_size();
SmallVector<int32_t, 3> localSize(localSizeAttr.getValues<int32_t>());
builder.create<spirv::ExecutionModeOp>(
funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
funcOp->removeAttr(entryPointAttrName);
if (localSizeAttr) {
auto values = localSizeAttr.getValues<int32_t>();
SmallVector<int32_t, 3> localSize(values);
builder.create<spirv::ExecutionModeOp>(
funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
funcOp->removeAttr(entryPointAttrName);
}
return success();
}

View File

@ -223,6 +223,78 @@ module attributes {gpu.container_module} {
// -----
module attributes {gpu.container_module} {
func @builtin() {
%c0 = arith.constant 1 : index
gpu.launch_func @kernels::@builtin_workgroup_size_x
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
return
}
// CHECK-LABEL: spv.module @{{.*}}
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
gpu.module @kernels {
gpu.func @builtin_workgroup_size_x() kernel
attributes {spv.entry_point_abi = {}} {
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
%0 = gpu.block_dim x
gpu.return
}
}
}
// -----
module attributes {gpu.container_module} {
func @builtin() {
%c0 = arith.constant 1 : index
gpu.launch_func @kernels::@builtin_workgroup_size_y
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
return
}
// CHECK-LABEL: spv.module @{{.*}}
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
gpu.module @kernels {
gpu.func @builtin_workgroup_size_y() kernel
attributes {spv.entry_point_abi = {}} {
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
%0 = gpu.block_dim y
gpu.return
}
}
}
// -----
module attributes {gpu.container_module} {
func @builtin() {
%c0 = arith.constant 1 : index
gpu.launch_func @kernels::@builtin_workgroup_size_z
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
return
}
// CHECK-LABEL: spv.module @{{.*}}
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
gpu.module @kernels {
gpu.func @builtin_workgroup_size_z() kernel
attributes {spv.entry_point_abi = {}} {
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
%0 = gpu.block_dim z
gpu.return
}
}
}
// -----
module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module @{{.*}} Logical GLSL450
// CHECK: spv.GlobalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize")