forked from OSchip/llvm-project
Harden the requirements to memory attribution types in gpu.func
When memory attributions are present in `gpu.func`, require that they are of memref type and live in memoryspaces 3 and 5 for workgroup and private memory attributions, respectively. Adapt the conversion from the GPU dialect to the NVVM dialect to drop the private memory space from attributions as NVVM is able to model them as local `llvm.alloca`s in the default memory space. PiperOrigin-RevId: 286161763
This commit is contained in:
parent
c6c6a74d55
commit
40ef46fba4
|
@ -63,7 +63,11 @@ public:
|
|||
|
||||
/// Returns the numeric value used to identify the workgroup memory address
|
||||
/// space.
|
||||
static int getWorkgroupAddressSpace() { return 3; }
|
||||
static unsigned getWorkgroupAddressSpace() { return 3; }
|
||||
|
||||
/// Returns the numeric value used to identify the private memory address
|
||||
/// space.
|
||||
static unsigned getPrivateAddressSpace() { return 5; }
|
||||
|
||||
LogicalResult verifyOperationAttribute(Operation *op,
|
||||
NamedAttribute attr) override;
|
||||
|
|
|
@ -38,6 +38,26 @@ using namespace mlir;
|
|||
|
||||
namespace {
|
||||
|
||||
/// Derived type converter for GPU to NVVM lowering. The GPU dialect uses memory
|
||||
/// space 5 for private memory attributions, but NVVM represents private
|
||||
/// memory allocations as local `alloca`s in the default address space. This
|
||||
/// converter drops the private memory space to support the use case above.
|
||||
class NVVMTypeConverter : public LLVMTypeConverter {
|
||||
public:
|
||||
using LLVMTypeConverter::LLVMTypeConverter;
|
||||
|
||||
Type convertType(Type type) override {
|
||||
auto memref = type.dyn_cast<MemRefType>();
|
||||
if (memref &&
|
||||
memref.getMemorySpace() == gpu::GPUDialect::getPrivateAddressSpace()) {
|
||||
type = MemRefType::get(memref.getShape(), memref.getElementType(),
|
||||
memref.getAffineMaps());
|
||||
}
|
||||
|
||||
return LLVMTypeConverter::convertType(type);
|
||||
}
|
||||
};
|
||||
|
||||
/// Converts all_reduce op to LLVM/NVVM ops.
|
||||
struct GPUAllReduceOpLowering : public LLVMOpLowering {
|
||||
using AccumulatorFactory = std::function<Value *(
|
||||
|
@ -559,9 +579,12 @@ struct GPUFuncOpLowering : LLVMOpLowering {
|
|||
assert(type && type.hasStaticShape() &&
|
||||
"unexpected type in attribution");
|
||||
|
||||
// Explicitly drop memory space when lowering private memory
|
||||
// attributions since NVVM models it as `alloca`s in the default
|
||||
// memory space and does not support `alloca`s with addrspace(5).
|
||||
auto ptrType = lowering.convertType(type.getElementType())
|
||||
.cast<LLVM::LLVMType>()
|
||||
.getPointerTo(type.getMemorySpace());
|
||||
.getPointerTo();
|
||||
Value *numElements = rewriter.create<LLVM::ConstantOp>(
|
||||
gpuFuncOp.getLoc(), int64Ty,
|
||||
rewriter.getI64IntegerAttr(type.getNumElements()));
|
||||
|
@ -635,7 +658,7 @@ public:
|
|||
return;
|
||||
|
||||
OwningRewritePatternList patterns;
|
||||
LLVMTypeConverter converter(m.getContext());
|
||||
NVVMTypeConverter converter(m.getContext());
|
||||
populateStdToLLVMConversionPatterns(converter, patterns);
|
||||
populateGpuToNVVMConversionPatterns(converter, patterns);
|
||||
ConversionTarget target(getContext());
|
||||
|
|
|
@ -739,6 +739,22 @@ LogicalResult GPUFuncOp::verifyType() {
|
|||
return success();
|
||||
}
|
||||
|
||||
static LogicalResult verifyAttributions(Operation *op,
|
||||
ArrayRef<BlockArgument *> attributions,
|
||||
unsigned memorySpace) {
|
||||
for (Value *v : attributions) {
|
||||
auto type = v->getType().dyn_cast<MemRefType>();
|
||||
if (!type)
|
||||
return op->emitOpError() << "expected memref type in attribution";
|
||||
|
||||
if (type.getMemorySpace() != memorySpace) {
|
||||
return op->emitOpError()
|
||||
<< "expected memory space " << memorySpace << " in attribution";
|
||||
}
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Verifies the body of the function.
|
||||
LogicalResult GPUFuncOp::verifyBody() {
|
||||
unsigned numFuncArguments = getNumArguments();
|
||||
|
@ -758,6 +774,12 @@ LogicalResult GPUFuncOp::verifyBody() {
|
|||
<< blockArgType;
|
||||
}
|
||||
|
||||
if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(),
|
||||
GPUDialect::getWorkgroupAddressSpace())) ||
|
||||
failed(verifyAttributions(getOperation(), getPrivateAttributions(),
|
||||
GPUDialect::getPrivateAddressSpace())))
|
||||
return failure();
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
module attributes {gpu.kernel_module} {
|
||||
// CHECK-LABEL: llvm.func @private
|
||||
gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32>) {
|
||||
gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) {
|
||||
// Allocate private memory inside the function.
|
||||
// CHECK: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64
|
||||
// CHECK: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*">
|
||||
|
@ -24,7 +24,7 @@ module attributes {gpu.kernel_module} {
|
|||
// CHECK: llvm.getelementptr
|
||||
// CHECK: llvm.store
|
||||
%c0 = constant 0 : index
|
||||
store %arg0, %arg1[%c0] : memref<4xf32>
|
||||
store %arg0, %arg1[%c0] : memref<4xf32, 5>
|
||||
|
||||
"terminator"() : () -> ()
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ module attributes {gpu.kernel_module} {
|
|||
// CHECK-LABEL: llvm.func @multiple
|
||||
gpu.func @multiple(%arg0: f32)
|
||||
workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>)
|
||||
private(%arg3: memref<3xf32>, %arg4: memref<4xf32>) {
|
||||
private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) {
|
||||
|
||||
// Workgroup buffers.
|
||||
// CHECK: llvm.mlir.addressof @[[buffer1]]
|
||||
|
@ -138,8 +138,8 @@ module attributes {gpu.kernel_module} {
|
|||
%c0 = constant 0 : index
|
||||
store %arg0, %arg1[%c0] : memref<1xf32, 3>
|
||||
store %arg0, %arg2[%c0] : memref<2xf32, 3>
|
||||
store %arg0, %arg3[%c0] : memref<3xf32>
|
||||
store %arg0, %arg4[%c0] : memref<4xf32>
|
||||
store %arg0, %arg3[%c0] : memref<3xf32, 5>
|
||||
store %arg0, %arg4[%c0] : memref<4xf32, 5>
|
||||
"terminator"() : () -> ()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -382,3 +382,36 @@ module {
|
|||
}) {sym_name="kernel_1", type=f32} : () -> ()
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module {
|
||||
module @gpu_funcs attributes {gpu.kernel_module} {
|
||||
// expected-error @+1 {{expected memref type in attribution}}
|
||||
gpu.func @kernel() workgroup(%0: i32) {
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module {
|
||||
module @gpu_funcs attributes {gpu.kernel_module} {
|
||||
// expected-error @+1 {{expected memory space 3 in attribution}}
|
||||
gpu.func @kernel() workgroup(%0: memref<4xf32>) {
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
module {
|
||||
module @gpu_funcs attributes {gpu.kernel_module} {
|
||||
// expected-error @+1 {{expected memory space 5 in attribution}}
|
||||
gpu.func @kernel() private(%0: memref<4xf32>) {
|
||||
gpu.return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue