[mlir][rocdl] add rocdl.barier op.

- Add rocdl.barrier op.
- Lower gpu.barier to rocdl.barrier in -convert-gpu-to-rocdl.

Differential Revision: https://reviews.llvm.org/D79126
This commit is contained in:
Wen-Heng (Jack) Chung 2020-05-04 10:32:16 +02:00 committed by Alex Zinenko
parent a581c6f8cd
commit bc23c1d85e
7 changed files with 70 additions and 0 deletions

View File

@ -87,5 +87,19 @@ def ROCDL_GridDimYOp : ROCDL_DeviceFunctionOp<"grid.dim.y",
def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z",
"__ockl_get_global_size", 2>;
//===----------------------------------------------------------------------===//
// Synchronization primitives
def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
string llvmBuilder = [{
llvm::LLVMContext &llvmContext = builder.getContext();
builder.CreateFence(llvm::AtomicOrdering::Release,
llvmContext.getOrInsertSyncScopeID("workgroup"));
createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier);
builder.CreateFence(llvm::AtomicOrdering::Acquire,
llvmContext.getOrInsertSyncScopeID("workgroup"));
}];
let assemblyFormat = "attr-dict";
}
#endif // ROCDLIR_OPS

View File

@ -1,9 +1,15 @@
set(LLVM_TARGET_DEFINITIONS GPUToROCDL.td)
mlir_tablegen(GPUToROCDL.cpp.inc -gen-rewriters)
add_public_tablegen_target(MLIRGPUToROCDLIncGen)
add_mlir_conversion_library(MLIRGPUtoROCDLTransforms
LowerGpuOpsToROCDLOps.cpp
DEPENDS
MLIRConversionPassIncGen
MLIRGPUToROCDLIncGen
)
target_link_libraries(MLIRGPUtoROCDLTransforms
PUBLIC
LLVMSupport

View File

@ -0,0 +1,21 @@
//==-- GPUToROCDL.td - GPU Ops to ROCDL Patterns -------------*- tablegen -*==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Defines Patterns to lower GPU ops to ROCDL.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_CONVERSION_GPUTOROCDL_TD
#define MLIR_CONVERSION_GPUTOROCDL_TD
include "mlir/Dialect/GPU/GPUOps.td"
include "mlir/Dialect/LLVMIR/ROCDLOps.td"
def : Pat<(GPU_BarrierOp), (ROCDL_BarrierOp)>;
#endif // MLIR_CONVERSION_GPUTOROCDL_TD

View File

@ -32,6 +32,9 @@ using namespace mlir;
namespace {
/// Import the GPU Ops to ROCDL Patterns.
#include "GPUToROCDL.cpp.inc"
// A pass that replaces all occurrences of GPU device operations with their
// corresponding ROCDL equivalent.
//
@ -71,6 +74,7 @@ public:
void mlir::populateGpuToROCDLConversionPatterns(
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
populateWithGenerated(converter.getDialect()->getContext(), &patterns);
patterns.insert<
GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>,

View File

@ -42,6 +42,17 @@ gpu.module @test_module {
// -----
gpu.module @test_module {
// CHECK-LABEL: func @gpu_sync()
func @gpu_sync() {
// CHECK: rocdl.barrier
gpu.barrier
std.return
}
}
// -----
gpu.module @test_module {
// CHECK: llvm.func @__ocml_fabs_f32(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__ocml_fabs_f64(!llvm.double) -> !llvm.double

View File

@ -28,3 +28,9 @@ func @rocdl_special_regs() -> !llvm.i32 {
%11 = rocdl.grid.dim.z : !llvm.i32
llvm.return %0 : !llvm.i32
}
func @rocdl.barrier() {
// CHECK: rocdl.barrier
rocdl.barrier
llvm.return
}

View File

@ -33,3 +33,11 @@ llvm.func @kernel_func() attributes {gpu.kernel} {
// CHECK-LABEL: amdgpu_kernel void @kernel_func
llvm.return
}
llvm.func @rocdl.barrier() {
// CHECK: fence syncscope("workgroup") release
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
// CHECK-NEXT: fence syncscope("workgroup") acquire
rocdl.barrier
llvm.return
}