forked from OSchip/llvm-project
[OpenMP] Emit calls to int64_t functions for amdgcn
[OpenMP] Emit calls to int64_t functions for amdgcn Two functions, syncwarp and active_thread_mask, return lanemask_t. Currently this is assumed to be int32, which is true for nvptx. Patch makes the type target architecture dependent. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D89746
This commit is contained in:
parent
b2faf75568
commit
09bc755dea
|
@ -226,6 +226,9 @@ public:
|
||||||
omp::IdentFlag Flags = omp::IdentFlag(0),
|
omp::IdentFlag Flags = omp::IdentFlag(0),
|
||||||
unsigned Reserve2Flags = 0);
|
unsigned Reserve2Flags = 0);
|
||||||
|
|
||||||
|
// Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
|
||||||
|
Type *getLanemaskType();
|
||||||
|
|
||||||
/// Generate control flow and cleanup for cancellation.
|
/// Generate control flow and cleanup for cancellation.
|
||||||
///
|
///
|
||||||
/// \param CancelFlag Flag indicating if the cancellation is performed.
|
/// \param CancelFlag Flag indicating if the cancellation is performed.
|
||||||
|
|
|
@ -153,6 +153,7 @@ __OMP_TYPE(Int32Ptr)
|
||||||
__OMP_TYPE(Int64Ptr)
|
__OMP_TYPE(Int64Ptr)
|
||||||
|
|
||||||
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
|
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
|
||||||
|
OMP_TYPE(LanemaskTy, getLanemaskType())
|
||||||
|
|
||||||
#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
|
#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
|
||||||
|
|
||||||
|
@ -553,8 +554,9 @@ __OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy,
|
||||||
Int16, VoidPtrPtr)
|
Int16, VoidPtrPtr)
|
||||||
__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16)
|
__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16)
|
||||||
__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
|
__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
|
||||||
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, )
|
|
||||||
__OMP_RTL(__kmpc_syncwarp, false, Void, Int32)
|
__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,)
|
||||||
|
__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy)
|
||||||
|
|
||||||
__OMP_RTL(__last, false, Void, )
|
__OMP_RTL(__last, false, Void, )
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
#include "llvm/ADT/StringSwitch.h"
|
#include "llvm/ADT/StringSwitch.h"
|
||||||
|
#include "llvm/ADT/Triple.h"
|
||||||
#include "llvm/IR/CFG.h"
|
#include "llvm/IR/CFG.h"
|
||||||
#include "llvm/IR/DebugInfo.h"
|
#include "llvm/IR/DebugInfo.h"
|
||||||
#include "llvm/IR/IRBuilder.h"
|
#include "llvm/IR/IRBuilder.h"
|
||||||
|
@ -217,6 +218,14 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
|
||||||
return Ident;
|
return Ident;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Type *OpenMPIRBuilder::getLanemaskType() {
|
||||||
|
LLVMContext &Ctx = M.getContext();
|
||||||
|
Triple triple(M.getTargetTriple());
|
||||||
|
|
||||||
|
// This test is adequate until deviceRTL has finer grained lane widths
|
||||||
|
return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
|
||||||
|
}
|
||||||
|
|
||||||
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
|
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
|
||||||
Constant *&SrcLocStr = SrcLocStrMap[LocStr];
|
Constant *&SrcLocStr = SrcLocStrMap[LocStr];
|
||||||
if (!SrcLocStr) {
|
if (!SrcLocStr) {
|
||||||
|
|
|
@ -629,6 +629,10 @@ declare void @__kmpc_destroy_allocator(i32, i8*)
|
||||||
|
|
||||||
declare void @__kmpc_push_target_tripcount(i64, i64)
|
declare void @__kmpc_push_target_tripcount(i64, i64)
|
||||||
|
|
||||||
|
declare i32 @__kmpc_warp_active_thread_mask()
|
||||||
|
|
||||||
|
declare void @__kmpc_syncwarp(i32)
|
||||||
|
|
||||||
declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
||||||
|
|
||||||
declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
||||||
|
@ -1142,6 +1146,12 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
|
||||||
; CHECK: ; Function Attrs: nounwind
|
; CHECK: ; Function Attrs: nounwind
|
||||||
; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
|
; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
|
||||||
|
|
||||||
|
; CHECK: ; Function Attrs: convergent nounwind
|
||||||
|
; CHECK-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
|
||||||
|
|
||||||
|
; CHECK: ; Function Attrs: convergent nounwind
|
||||||
|
; CHECK-NEXT: declare void @__kmpc_syncwarp(i32)
|
||||||
|
|
||||||
; CHECK: ; Function Attrs: nounwind
|
; CHECK: ; Function Attrs: nounwind
|
||||||
; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
||||||
|
|
||||||
|
@ -1661,6 +1671,12 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
|
||||||
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
|
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
|
||||||
; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
|
; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
|
||||||
|
|
||||||
|
; OPTIMISTIC: ; Function Attrs: convergent nounwind
|
||||||
|
; OPTIMISTIC-NEXT: declare i32 @__kmpc_warp_active_thread_mask()
|
||||||
|
|
||||||
|
; OPTIMISTIC: ; Function Attrs: convergent nounwind
|
||||||
|
; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i32)
|
||||||
|
|
||||||
; OPTIMISTIC: ; Function Attrs: nounwind
|
; OPTIMISTIC: ; Function Attrs: nounwind
|
||||||
; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
; RUN: opt < %s -S -openmpopt | FileCheck %s
|
||||||
|
; RUN: opt < %s -S -passes=openmpopt | FileCheck %s
|
||||||
|
; RUN: opt < %s -S -openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC
|
||||||
|
; RUN: opt < %s -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC
|
||||||
|
|
||||||
|
target triple = "amdgcn-amd-amdhsa"
|
||||||
|
|
||||||
|
define void @call_all(i64 %arg) {
|
||||||
|
call void @__kmpc_syncwarp(i64 %arg)
|
||||||
|
call i64 @__kmpc_warp_active_thread_mask()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i64 @__kmpc_warp_active_thread_mask()
|
||||||
|
|
||||||
|
declare void @__kmpc_syncwarp(i64)
|
||||||
|
|
||||||
|
; CHECK: ; Function Attrs: convergent nounwind
|
||||||
|
; CHECK-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
|
||||||
|
|
||||||
|
; CHECK: ; Function Attrs: convergent nounwind
|
||||||
|
; CHECK-NEXT: declare void @__kmpc_syncwarp(i64)
|
||||||
|
|
||||||
|
; OPTIMISTIC: ; Function Attrs: convergent nounwind
|
||||||
|
; OPTIMISTIC-NEXT: declare i64 @__kmpc_warp_active_thread_mask()
|
||||||
|
|
||||||
|
; OPTIMISTIC: ; Function Attrs: convergent nounwind
|
||||||
|
; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64)
|
Loading…
Reference in New Issue