forked from OSchip/llvm-project
[mlir][spirv] Make EntryPointABIAttr.local_size optional
* It doesn't required by OpenCL/Intel Level Zero and can be set programmatically. * Add GPU to spirv lowering in case when attribute is not present. * Set higher benefit to WorkGroupSizeConversion pattern so it will always try to lower first from the attribute. Differential Revision: https://reviews.llvm.org/D120399
This commit is contained in:
parent
bd0bddc1ea
commit
4df9544108
|
@ -27,7 +27,7 @@ include "mlir/Dialect/SPIRV/IR/SPIRVBase.td"
|
|||
// points in the generated SPIR-V module:
|
||||
// 1) WorkGroup Size.
|
||||
def SPV_EntryPointABIAttr : StructAttr<"EntryPointABIAttr", SPIRV_Dialect, [
|
||||
StructFieldAttr<"local_size", I32ElementsAttr>
|
||||
StructFieldAttr<"local_size", OptionalAttr<I32ElementsAttr>>
|
||||
]>;
|
||||
|
||||
def SPV_ExtensionArrayAttr : TypedArrayAttrBase<
|
||||
|
|
|
@ -55,7 +55,8 @@ public:
|
|||
/// attribute on the surrounding FuncOp is used to replace the gpu::BlockDimOp.
|
||||
class WorkGroupSizeConversion : public OpConversionPattern<gpu::BlockDimOp> {
|
||||
public:
|
||||
using OpConversionPattern<gpu::BlockDimOp>::OpConversionPattern;
|
||||
WorkGroupSizeConversion(TypeConverter &typeConverter, MLIRContext *context)
|
||||
: OpConversionPattern(typeConverter, context, /*benefit*/ 10) {}
|
||||
|
||||
LogicalResult
|
||||
matchAndRewrite(gpu::BlockDimOp op, OpAdaptor adaptor,
|
||||
|
@ -159,6 +160,9 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite(
|
|||
gpu::BlockDimOp op, OpAdaptor adaptor,
|
||||
ConversionPatternRewriter &rewriter) const {
|
||||
auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op);
|
||||
if (!workGroupSizeAttr)
|
||||
return failure();
|
||||
|
||||
auto val = workGroupSizeAttr
|
||||
.getValues<int32_t>()[static_cast<int32_t>(op.dimension())];
|
||||
auto convertedType =
|
||||
|
@ -366,6 +370,7 @@ void mlir::populateGPUToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
|
|||
GPUModuleEndConversion, GPUReturnOpConversion,
|
||||
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
|
||||
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
|
||||
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
|
||||
LaunchConfigConversion<gpu::ThreadIdOp,
|
||||
spirv::BuiltIn::LocalInvocationId>,
|
||||
SingleDimLaunchConfigConversion<gpu::SubgroupIdOp,
|
||||
|
|
|
@ -120,6 +120,9 @@ StringRef spirv::getEntryPointABIAttrName() { return "spv.entry_point_abi"; }
|
|||
|
||||
spirv::EntryPointABIAttr
|
||||
spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize, MLIRContext *context) {
|
||||
if (localSize.empty())
|
||||
return spirv::EntryPointABIAttr::get(nullptr, context);
|
||||
|
||||
assert(localSize.size() == 3);
|
||||
return spirv::EntryPointABIAttr::get(
|
||||
DenseElementsAttr::get<int32_t>(
|
||||
|
|
|
@ -136,10 +136,13 @@ static LogicalResult lowerEntryPointABIAttr(spirv::FuncOp funcOp,
|
|||
|
||||
// Specifies the spv.ExecutionModeOp.
|
||||
auto localSizeAttr = entryPointAttr.local_size();
|
||||
SmallVector<int32_t, 3> localSize(localSizeAttr.getValues<int32_t>());
|
||||
builder.create<spirv::ExecutionModeOp>(
|
||||
funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
|
||||
funcOp->removeAttr(entryPointAttrName);
|
||||
if (localSizeAttr) {
|
||||
auto values = localSizeAttr.getValues<int32_t>();
|
||||
SmallVector<int32_t, 3> localSize(values);
|
||||
builder.create<spirv::ExecutionModeOp>(
|
||||
funcOp.getLoc(), funcOp, spirv::ExecutionMode::LocalSize, localSize);
|
||||
funcOp->removeAttr(entryPointAttrName);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
|
|
|
@ -223,6 +223,78 @@ module attributes {gpu.container_module} {
|
|||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
func @builtin() {
|
||||
%c0 = arith.constant 1 : index
|
||||
gpu.launch_func @kernels::@builtin_workgroup_size_x
|
||||
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: spv.module @{{.*}}
|
||||
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
|
||||
gpu.module @kernels {
|
||||
gpu.func @builtin_workgroup_size_x() kernel
|
||||
attributes {spv.entry_point_abi = {}} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
|
||||
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
|
||||
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}}
|
||||
%0 = gpu.block_dim x
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
func @builtin() {
|
||||
%c0 = arith.constant 1 : index
|
||||
gpu.launch_func @kernels::@builtin_workgroup_size_y
|
||||
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: spv.module @{{.*}}
|
||||
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
|
||||
gpu.module @kernels {
|
||||
gpu.func @builtin_workgroup_size_y() kernel
|
||||
attributes {spv.entry_point_abi = {}} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
|
||||
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
|
||||
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}}
|
||||
%0 = gpu.block_dim y
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
func @builtin() {
|
||||
%c0 = arith.constant 1 : index
|
||||
gpu.launch_func @kernels::@builtin_workgroup_size_z
|
||||
blocks in (%c0, %c0, %c0) threads in (%c0, %c0, %c0)
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: spv.module @{{.*}}
|
||||
// CHECK: spv.GlobalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
|
||||
gpu.module @kernels {
|
||||
gpu.func @builtin_workgroup_size_z() kernel
|
||||
attributes {spv.entry_point_abi = {}} {
|
||||
// CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPSIZE]]
|
||||
// CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]]
|
||||
// CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}}
|
||||
%0 = gpu.block_dim z
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module attributes {gpu.container_module} {
|
||||
// CHECK-LABEL: spv.module @{{.*}} Logical GLSL450
|
||||
// CHECK: spv.GlobalVariable [[SUBGROUPSIZE:@.*]] built_in("SubgroupSize")
|
||||
|
|
Loading…
Reference in New Issue