forked from OSchip/llvm-project
[mlir][rocdl] add rocdl.barier op.
- Add rocdl.barrier op. - Lower gpu.barier to rocdl.barrier in -convert-gpu-to-rocdl. Differential Revision: https://reviews.llvm.org/D79126
This commit is contained in:
parent
a581c6f8cd
commit
bc23c1d85e
|
@ -87,5 +87,19 @@ def ROCDL_GridDimYOp : ROCDL_DeviceFunctionOp<"grid.dim.y",
|
|||
def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z",
|
||||
"__ockl_get_global_size", 2>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Synchronization primitives
|
||||
|
||||
def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
|
||||
string llvmBuilder = [{
|
||||
llvm::LLVMContext &llvmContext = builder.getContext();
|
||||
builder.CreateFence(llvm::AtomicOrdering::Release,
|
||||
llvmContext.getOrInsertSyncScopeID("workgroup"));
|
||||
createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier);
|
||||
builder.CreateFence(llvm::AtomicOrdering::Acquire,
|
||||
llvmContext.getOrInsertSyncScopeID("workgroup"));
|
||||
}];
|
||||
let assemblyFormat = "attr-dict";
|
||||
}
|
||||
|
||||
#endif // ROCDLIR_OPS
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
set(LLVM_TARGET_DEFINITIONS GPUToROCDL.td)
|
||||
mlir_tablegen(GPUToROCDL.cpp.inc -gen-rewriters)
|
||||
add_public_tablegen_target(MLIRGPUToROCDLIncGen)
|
||||
|
||||
add_mlir_conversion_library(MLIRGPUtoROCDLTransforms
|
||||
LowerGpuOpsToROCDLOps.cpp
|
||||
|
||||
DEPENDS
|
||||
MLIRConversionPassIncGen
|
||||
MLIRGPUToROCDLIncGen
|
||||
)
|
||||
|
||||
target_link_libraries(MLIRGPUtoROCDLTransforms
|
||||
PUBLIC
|
||||
LLVMSupport
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
//==-- GPUToROCDL.td - GPU Ops to ROCDL Patterns -------------*- tablegen -*==//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Defines Patterns to lower GPU ops to ROCDL.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef MLIR_CONVERSION_GPUTOROCDL_TD
|
||||
#define MLIR_CONVERSION_GPUTOROCDL_TD
|
||||
|
||||
include "mlir/Dialect/GPU/GPUOps.td"
|
||||
include "mlir/Dialect/LLVMIR/ROCDLOps.td"
|
||||
|
||||
def : Pat<(GPU_BarrierOp), (ROCDL_BarrierOp)>;
|
||||
|
||||
#endif // MLIR_CONVERSION_GPUTOROCDL_TD
|
|
@ -32,6 +32,9 @@ using namespace mlir;
|
|||
|
||||
namespace {
|
||||
|
||||
/// Import the GPU Ops to ROCDL Patterns.
|
||||
#include "GPUToROCDL.cpp.inc"
|
||||
|
||||
// A pass that replaces all occurrences of GPU device operations with their
|
||||
// corresponding ROCDL equivalent.
|
||||
//
|
||||
|
@ -71,6 +74,7 @@ public:
|
|||
|
||||
void mlir::populateGpuToROCDLConversionPatterns(
|
||||
LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
|
||||
populateWithGenerated(converter.getDialect()->getContext(), &patterns);
|
||||
patterns.insert<
|
||||
GPUIndexIntrinsicOpLowering<gpu::ThreadIdOp, ROCDL::ThreadIdXOp,
|
||||
ROCDL::ThreadIdYOp, ROCDL::ThreadIdZOp>,
|
||||
|
|
|
@ -42,6 +42,17 @@ gpu.module @test_module {
|
|||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
// CHECK-LABEL: func @gpu_sync()
|
||||
func @gpu_sync() {
|
||||
// CHECK: rocdl.barrier
|
||||
gpu.barrier
|
||||
std.return
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
gpu.module @test_module {
|
||||
// CHECK: llvm.func @__ocml_fabs_f32(!llvm.float) -> !llvm.float
|
||||
// CHECK: llvm.func @__ocml_fabs_f64(!llvm.double) -> !llvm.double
|
||||
|
|
|
@ -28,3 +28,9 @@ func @rocdl_special_regs() -> !llvm.i32 {
|
|||
%11 = rocdl.grid.dim.z : !llvm.i32
|
||||
llvm.return %0 : !llvm.i32
|
||||
}
|
||||
|
||||
func @rocdl.barrier() {
|
||||
// CHECK: rocdl.barrier
|
||||
rocdl.barrier
|
||||
llvm.return
|
||||
}
|
||||
|
|
|
@ -33,3 +33,11 @@ llvm.func @kernel_func() attributes {gpu.kernel} {
|
|||
// CHECK-LABEL: amdgpu_kernel void @kernel_func
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @rocdl.barrier() {
|
||||
// CHECK: fence syncscope("workgroup") release
|
||||
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
|
||||
// CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
rocdl.barrier
|
||||
llvm.return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue