[OpenMP] Emit calls to int64_t functions for amdgcn

[OpenMP] Emit calls to int64_t functions for amdgcn

Two functions, syncwarp and active_thread_mask, return lanemask_t. Currently
this is assumed to be int32, which is true for nvptx. Patch makes the type
target architecture dependent.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D89746
This commit is contained in:
Jon Chesterfield 2020-10-22 15:02:44 +01:00 committed by JonChesterfield
parent b2faf75568
commit 09bc755dea
5 changed files with 60 additions and 2 deletions

View File

@ -226,6 +226,9 @@ public:
omp::IdentFlag Flags = omp::IdentFlag(0), omp::IdentFlag Flags = omp::IdentFlag(0),
unsigned Reserve2Flags = 0); unsigned Reserve2Flags = 0);
// Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
Type *getLanemaskType();
/// Generate control flow and cleanup for cancellation. /// Generate control flow and cleanup for cancellation.
/// ///
/// \param CancelFlag Flag indicating if the cancellation is performed. /// \param CancelFlag Flag indicating if the cancellation is performed.

View File

@ -153,6 +153,7 @@ __OMP_TYPE(Int32Ptr)
__OMP_TYPE(Int64Ptr) __OMP_TYPE(Int64Ptr)
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
OMP_TYPE(LanemaskTy, getLanemaskType())
#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo()) #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
@ -553,8 +554,9 @@ __OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy,
Int16, VoidPtrPtr) Int16, VoidPtrPtr)
__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) __OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16)
__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, )
__OMP_RTL(__kmpc_syncwarp, false, Void, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,)
__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy)
__OMP_RTL(__last, false, Void, ) __OMP_RTL(__last, false, Void, )

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CFG.h" #include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
@ -217,6 +218,14 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
return Ident; return Ident;
} }
Type *OpenMPIRBuilder::getLanemaskType() {
LLVMContext &Ctx = M.getContext();
Triple triple(M.getTargetTriple());
// This test is adequate until deviceRTL has finer grained lane widths
return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
}
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
Constant *&SrcLocStr = SrcLocStrMap[LocStr]; Constant *&SrcLocStr = SrcLocStrMap[LocStr];
if (!SrcLocStr) { if (!SrcLocStr) {

View File

@ -629,6 +629,10 @@ declare void @__kmpc_destroy_allocator(i32, i8*)
declare void @__kmpc_push_target_tripcount(i64, i64) declare void @__kmpc_push_target_tripcount(i64, i64)
declare i32 @__kmpc_warp_active_thread_mask()
declare void @__kmpc_syncwarp(i32)
declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
@ -1142,6 +1146,12 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; CHECK: ; Function Attrs: nounwind ; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) ; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_syncwarp(i32)
; CHECK: ; Function Attrs: nounwind ; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) ; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
@ -1661,6 +1671,12 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly ; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i32)
; OPTIMISTIC: ; Function Attrs: nounwind ; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) ; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)

View File

@ -0,0 +1,28 @@
; RUN: opt < %s -S -openmpopt | FileCheck %s
; RUN: opt < %s -S -passes=openmpopt | FileCheck %s
; RUN: opt < %s -S -openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC
; RUN: opt < %s -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC
target triple = "amdgcn-amd-amdhsa"
define void @call_all(i64 %arg) {
call void @__kmpc_syncwarp(i64 %arg)
call i64 @__kmpc_warp_active_thread_mask()
ret void
}
declare i64 @__kmpc_warp_active_thread_mask()
declare void @__kmpc_syncwarp(i64)
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_syncwarp(i64)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)