Harden the requirements to memory attribution types in gpu.func

When memory attributions are present in `gpu.func`, require that they are of
memref type and live in memoryspaces 3 and 5 for workgroup and private memory
attributions, respectively. Adapt the conversion from the GPU dialect to the
NVVM dialect to drop the private memory space from attributions as NVVM is able
to model them as local `llvm.alloca`s in the default memory space.

PiperOrigin-RevId: 286161763
This commit is contained in:
Alex Zinenko 2019-12-18 03:38:18 -08:00 committed by A. Unique TensorFlower
parent c6c6a74d55
commit 40ef46fba4
5 changed files with 90 additions and 8 deletions

View File

@ -63,7 +63,11 @@ public:
/// Returns the numeric value used to identify the workgroup memory address
/// space.
static int getWorkgroupAddressSpace() { return 3; }
static unsigned getWorkgroupAddressSpace() { return 3; }
/// Returns the numeric value used to identify the private memory address
/// space.
static unsigned getPrivateAddressSpace() { return 5; }
LogicalResult verifyOperationAttribute(Operation *op,
NamedAttribute attr) override;

View File

@ -38,6 +38,26 @@ using namespace mlir;
namespace {
/// Derived type converter for GPU to NVVM lowering. The GPU dialect uses memory
/// space 5 for private memory attributions, but NVVM represents private
/// memory allocations as local `alloca`s in the default address space. This
/// converter drops the private memory space to support the use case above.
class NVVMTypeConverter : public LLVMTypeConverter {
public:
using LLVMTypeConverter::LLVMTypeConverter;
Type convertType(Type type) override {
auto memref = type.dyn_cast<MemRefType>();
if (memref &&
memref.getMemorySpace() == gpu::GPUDialect::getPrivateAddressSpace()) {
type = MemRefType::get(memref.getShape(), memref.getElementType(),
memref.getAffineMaps());
}
return LLVMTypeConverter::convertType(type);
}
};
/// Converts all_reduce op to LLVM/NVVM ops.
struct GPUAllReduceOpLowering : public LLVMOpLowering {
using AccumulatorFactory = std::function<Value *(
@ -559,9 +579,12 @@ struct GPUFuncOpLowering : LLVMOpLowering {
assert(type && type.hasStaticShape() &&
"unexpected type in attribution");
// Explicitly drop memory space when lowering private memory
// attributions since NVVM models it as `alloca`s in the default
// memory space and does not support `alloca`s with addrspace(5).
auto ptrType = lowering.convertType(type.getElementType())
.cast<LLVM::LLVMType>()
.getPointerTo(type.getMemorySpace());
.getPointerTo();
Value *numElements = rewriter.create<LLVM::ConstantOp>(
gpuFuncOp.getLoc(), int64Ty,
rewriter.getI64IntegerAttr(type.getNumElements()));
@ -635,7 +658,7 @@ public:
return;
OwningRewritePatternList patterns;
LLVMTypeConverter converter(m.getContext());
NVVMTypeConverter converter(m.getContext());
populateStdToLLVMConversionPatterns(converter, patterns);
populateGpuToNVVMConversionPatterns(converter, patterns);
ConversionTarget target(getContext());

View File

@ -739,6 +739,22 @@ LogicalResult GPUFuncOp::verifyType() {
return success();
}
static LogicalResult verifyAttributions(Operation *op,
ArrayRef<BlockArgument *> attributions,
unsigned memorySpace) {
for (Value *v : attributions) {
auto type = v->getType().dyn_cast<MemRefType>();
if (!type)
return op->emitOpError() << "expected memref type in attribution";
if (type.getMemorySpace() != memorySpace) {
return op->emitOpError()
<< "expected memory space " << memorySpace << " in attribution";
}
}
return success();
}
/// Verifies the body of the function.
LogicalResult GPUFuncOp::verifyBody() {
unsigned numFuncArguments = getNumArguments();
@ -758,6 +774,12 @@ LogicalResult GPUFuncOp::verifyBody() {
<< blockArgType;
}
if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(),
GPUDialect::getWorkgroupAddressSpace())) ||
failed(verifyAttributions(getOperation(), getPrivateAttributions(),
GPUDialect::getPrivateAddressSpace())))
return failure();
return success();
}

View File

@ -2,7 +2,7 @@
module attributes {gpu.kernel_module} {
// CHECK-LABEL: llvm.func @private
gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32>) {
gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) {
// Allocate private memory inside the function.
// CHECK: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64
// CHECK: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*">
@ -24,7 +24,7 @@ module attributes {gpu.kernel_module} {
// CHECK: llvm.getelementptr
// CHECK: llvm.store
%c0 = constant 0 : index
store %arg0, %arg1[%c0] : memref<4xf32>
store %arg0, %arg1[%c0] : memref<4xf32, 5>
"terminator"() : () -> ()
}
@ -123,7 +123,7 @@ module attributes {gpu.kernel_module} {
// CHECK-LABEL: llvm.func @multiple
gpu.func @multiple(%arg0: f32)
workgroup(%arg1: memref<1xf32, 3>, %arg2: memref<2xf32, 3>)
private(%arg3: memref<3xf32>, %arg4: memref<4xf32>) {
private(%arg3: memref<3xf32, 5>, %arg4: memref<4xf32, 5>) {
// Workgroup buffers.
// CHECK: llvm.mlir.addressof @[[buffer1]]
@ -138,8 +138,8 @@ module attributes {gpu.kernel_module} {
%c0 = constant 0 : index
store %arg0, %arg1[%c0] : memref<1xf32, 3>
store %arg0, %arg2[%c0] : memref<2xf32, 3>
store %arg0, %arg3[%c0] : memref<3xf32>
store %arg0, %arg4[%c0] : memref<4xf32>
store %arg0, %arg3[%c0] : memref<3xf32, 5>
store %arg0, %arg4[%c0] : memref<4xf32, 5>
"terminator"() : () -> ()
}
}

View File

@ -382,3 +382,36 @@ module {
}) {sym_name="kernel_1", type=f32} : () -> ()
}
}
// -----
module {
module @gpu_funcs attributes {gpu.kernel_module} {
// expected-error @+1 {{expected memref type in attribution}}
gpu.func @kernel() workgroup(%0: i32) {
gpu.return
}
}
}
// -----
module {
module @gpu_funcs attributes {gpu.kernel_module} {
// expected-error @+1 {{expected memory space 3 in attribution}}
gpu.func @kernel() workgroup(%0: memref<4xf32>) {
gpu.return
}
}
}
// -----
module {
module @gpu_funcs attributes {gpu.kernel_module} {
// expected-error @+1 {{expected memory space 5 in attribution}}
gpu.func @kernel() private(%0: memref<4xf32>) {
gpu.return
}
}
}