Revert "[OpenMP] Replace OpenMP RTL Functions With OMPIRBuilder and OMPKinds.def"

Failing tests on Arm due to the tests automatically populating
incomatible pointer width architectures. Reverting until the tests are
updated. Failing tests:

OpenMP/distribute_parallel_for_num_threads_codegen.cpp
OpenMP/distribute_parallel_for_if_codegen.cpp
OpenMP/distribute_parallel_for_simd_if_codegen.cpp
OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
OpenMP/teams_distribute_parallel_for_if_codegen.cpp
OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp

This reverts commit 90eaedda9b.
This commit is contained in:
Joseph Huber 2020-09-30 15:11:51 -04:00
parent 81921ebc43
commit 1b60f63e4f
6 changed files with 687 additions and 319 deletions

View File

@ -306,9 +306,6 @@ protected:
CodeGenModule &CGM;
StringRef FirstSeparator, Separator;
/// An OpenMP-IR-Builder instance.
llvm::OpenMPIRBuilder OMPBuilder;
/// Constructor allowing to redefine the name separator for the variables.
explicit CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
StringRef Separator);
@ -389,6 +386,8 @@ protected:
llvm::Value *getCriticalRegionLock(StringRef CriticalName);
private:
/// An OpenMP-IR-Builder instance.
llvm::OpenMPIRBuilder OMPBuilder;
/// Map for SourceLocation and OpenMP runtime library debug locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;

View File

@ -28,6 +28,96 @@ using namespace CodeGen;
using namespace llvm::omp;
namespace {
enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime);
OMPRTL_NVPTX__kmpc_kernel_init,
/// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
OMPRTL_NVPTX__kmpc_kernel_deinit,
/// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
OMPRTL_NVPTX__kmpc_spmd_kernel_init,
/// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function);
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
/// Call to bool __kmpc_kernel_parallel(void **outlined_function);
OMPRTL_NVPTX__kmpc_kernel_parallel,
/// Call to void __kmpc_kernel_end_parallel();
OMPRTL_NVPTX__kmpc_kernel_end_parallel,
/// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL_NVPTX__kmpc_serialized_parallel,
/// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL_NVPTX__kmpc_end_serialized_parallel,
/// Call to int32_t __kmpc_shuffle_int32(int32_t element,
/// int16_t lane_offset, int16_t warp_size);
OMPRTL_NVPTX__kmpc_shuffle_int32,
/// Call to int64_t __kmpc_shuffle_int64(int64_t element,
/// int16_t lane_offset, int16_t warp_size);
OMPRTL_NVPTX__kmpc_shuffle_int64,
/// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
/// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
/// global_tid, void *global_buffer, int32_t num_of_records, void*
/// reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
/// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
/// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
/// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
/// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
/// *buffer, int idx, void *reduce_data));
OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
/// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
OMPRTL_NVPTX__kmpc_end_reduce_nowait,
/// Call to void __kmpc_data_sharing_init_stack();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
/// Call to void __kmpc_data_sharing_init_stack_spmd();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
/// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
/// int16_t UseSharedMemory);
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
/// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
/// UseSharedMemory);
OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
/// Call to void __kmpc_data_sharing_pop_stack(void *a);
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
/// Call to void __kmpc_begin_sharing_variables(void ***args,
/// size_t n_args);
OMPRTL_NVPTX__kmpc_begin_sharing_variables,
/// Call to void __kmpc_end_sharing_variables();
OMPRTL_NVPTX__kmpc_end_sharing_variables,
/// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
OMPRTL_NVPTX__kmpc_get_shared_variables,
/// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL_NVPTX__kmpc_parallel_level,
/// Call to int8_t __kmpc_is_spmd_exec_mode();
OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
/// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
/// const void *buf, size_t size, int16_t is_shared, const void **res);
OMPRTL_NVPTX__kmpc_get_team_static_memory,
/// Call to void __kmpc_restore_team_static_memory(int16_t
/// isSPMDExecutionMode, int16_t is_shared);
OMPRTL_NVPTX__kmpc_restore_team_static_memory,
/// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_barrier,
/// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
/// global_tid);
OMPRTL__kmpc_barrier_simple_spmd,
/// Call to int32_t __kmpc_warp_active_thread_mask(void);
OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
/// Call to void __kmpc_syncwarp(int32_t Mask);
OMPRTL_NVPTX__kmpc_syncwarp,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
class NVPTXActionTy final : public PrePostActionTy {
@ -1153,13 +1243,13 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
// TODO: Optimize runtime initialization and pass in correct value.
llvm::Value *Args[] = {getThreadLimit(CGF),
Bld.getInt16(/*RequiresOMPRuntime=*/1)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_kernel_init),
Args);
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
// For data sharing, we need to initialize the stack.
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack));
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
emitGenericVarsProlog(CGF, WST.Loc);
}
@ -1182,9 +1272,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
// Signal termination condition.
// TODO: Optimize runtime initialization and pass in correct value.
llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_kernel_deinit),
Args);
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
// Barrier to terminate worker threads.
syncCTAThreads(CGF);
// Master thread jumps to exit point.
@ -1258,14 +1347,13 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader(
/*RequiresOMPRuntime=*/
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
/*RequiresDataSharing=*/Bld.getInt16(0)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init),
Args);
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
if (RequiresFullRuntime) {
// For data sharing, we need to initialize the stack.
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd));
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
}
CGF.EmitBranch(ExecuteBB);
@ -1291,9 +1379,9 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF,
// DeInitialize the OMP state in the runtime; called by all active threads.
llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2),
Args);
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(EST.ExitBB);
@ -1327,7 +1415,7 @@ void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) {
}
void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
WorkerFunctionState &WST) {
WorkerFunctionState &WST) {
//
// The workers enter this loop and wait for parallel work from the master.
// When the master encounters a parallel region it sets up the work + variable
@ -1362,10 +1450,8 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
// TODO: Optimize runtime initialization and pass in correct value.
llvm::Value *Args[] = {WorkFn.getPointer()};
llvm::Value *Ret =
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_kernel_parallel),
Args);
llvm::Value *Ret = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
// On termination condition (workid == 0), exit loop.
@ -1430,9 +1516,9 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
// Signal end of parallel region.
CGF.EmitBlock(TerminateBB);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel),
llvm::None);
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
llvm::None);
CGF.EmitBranch(BarrierBB);
// All active and inactive workers wait at a barrier after parallel region.
@ -1447,6 +1533,328 @@ void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
clearLocThreadIdInsertPt(CGF);
}
/// Returns specified OpenMP runtime function for the current OpenMP
/// implementation. Specialized for the NVPTX device.
/// \param Function OpenMP runtime function.
/// \return Specified function.
llvm::FunctionCallee
CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) {
llvm::FunctionCallee RTLFn = nullptr;
switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
case OMPRTL_NVPTX__kmpc_kernel_init: {
// Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
// RequiresOMPRuntime);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_deinit: {
// Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
llvm::Type *TypeParams[] = {CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
break;
}
case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
// Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
// Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
llvm::Type *TypeParams[] = {CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
/// Build void __kmpc_kernel_prepare_parallel(
/// void *outlined_function);
llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_parallel: {
/// Build bool __kmpc_kernel_parallel(void **outlined_function);
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
auto *FnTy =
llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
/// Build void __kmpc_kernel_end_parallel();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_serialized_parallel: {
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_shuffle_int32: {
// Build int32_t __kmpc_shuffle_int32(int32_t element,
// int16_t lane_offset, int16_t warp_size);
llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
break;
}
case OMPRTL_NVPTX__kmpc_shuffle_int64: {
// Build int64_t __kmpc_shuffle_int64(int64_t element,
// int16_t lane_offset, int16_t warp_size);
llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
break;
}
case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
// Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
// kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
// reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
// lane_id, int16_t lane_offset, int16_t Algorithm Version), void
// (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
CGM.Int16Ty, CGM.Int16Ty};
auto *ShuffleReduceFnTy =
llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
/*isVarArg=*/false);
llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
auto *InterWarpCopyFnTy =
llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
/*isVarArg=*/false);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
CGM.Int32Ty,
CGM.Int32Ty,
CGM.SizeTy,
CGM.VoidPtrTy,
ShuffleReduceFnTy->getPointerTo(),
InterWarpCopyFnTy->getPointerTo()};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
break;
}
case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
// Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
llvm::Type *TypeParams[] = {CGM.Int32Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
break;
}
case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
// Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
// global_tid, void *global_buffer, int32_t num_of_records, void*
// reduce_data,
// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
// lane_offset, int16_t shortCircuit),
// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
// *buffer, int idx, void *reduce_data));
llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
CGM.Int16Ty, CGM.Int16Ty};
auto *ShuffleReduceFnTy =
llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
/*isVarArg=*/false);
llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
auto *InterWarpCopyFnTy =
llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
/*isVarArg=*/false);
llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
CGM.VoidPtrTy};
auto *GlobalListFnTy =
llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
/*isVarArg=*/false);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
CGM.Int32Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrTy,
ShuffleReduceFnTy->getPointerTo(),
InterWarpCopyFnTy->getPointerTo(),
GlobalListFnTy->getPointerTo(),
GlobalListFnTy->getPointerTo(),
GlobalListFnTy->getPointerTo(),
GlobalListFnTy->getPointerTo()};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
/// Build void __kmpc_data_sharing_init_stack();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
/// Build void __kmpc_data_sharing_init_stack_spmd();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
// Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
// int16_t UseSharedMemory);
llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
// Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
// UseSharedMemory);
llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
// Build void __kmpc_data_sharing_pop_stack(void *a);
llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy,
/*Name=*/"__kmpc_data_sharing_pop_stack");
break;
}
case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
/// Build void __kmpc_begin_sharing_variables(void ***args,
/// size_t n_args);
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
break;
}
case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
/// Build void __kmpc_end_sharing_variables();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
break;
}
case OMPRTL_NVPTX__kmpc_get_shared_variables: {
/// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
break;
}
case OMPRTL_NVPTX__kmpc_parallel_level: {
// Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
break;
}
case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
// Build int8_t __kmpc_is_spmd_exec_mode();
auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
break;
}
case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
// Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
// const void *buf, size_t size, int16_t is_shared, const void **res);
llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
CGM.Int16Ty, CGM.VoidPtrPtrTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
break;
}
case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
// Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
// int16_t is_shared);
llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
break;
}
case OMPRTL__kmpc_barrier: {
// Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn =
CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
break;
}
case OMPRTL__kmpc_barrier_simple_spmd: {
// Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateConvergentRuntimeFunction(
FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
break;
}
case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
// Build int32_t __kmpc_warp_active_thread_mask(void);
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
break;
}
case OMPRTL_NVPTX__kmpc_syncwarp: {
// Build void __kmpc_syncwarp(kmp_int32 Mask);
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
break;
}
}
return RTLFn;
}
void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
llvm::Constant *Addr,
uint64_t Size, int32_t,
@ -1749,14 +2157,12 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *PL = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
OMPRTL___kmpc_parallel_level),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
{RTLoc, ThreadID});
IsTTD = Bld.CreateIsNull(PL);
}
llvm::Value *IsSPMD = Bld.CreateIsNotNull(
CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode)));
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
@ -1790,8 +2196,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack),
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, GlobalRecPtrTy);
@ -1853,10 +2259,9 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
CGM.Int16Ty,
getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_get_team_static_memory),
GlobalRecordSizeArg);
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_get_team_static_memory),
GlobalRecordSizeArg);
GlobalizedRecords.back().Buffer = StaticGlobalized;
GlobalizedRecords.back().RecSize = RecSize;
GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
@ -1883,10 +2288,10 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(),
IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack
: OMPRTL___kmpc_data_sharing_coalesced_push_stack),
createNVPTXRuntimeFunction(
IsInTTDRegion
? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
: OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, GlobalRecPtrTy);
@ -1985,8 +2390,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack),
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
@ -2014,8 +2419,7 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
for (llvm::Value *Addr :
llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
Addr);
}
if (I->getSecond().GlobalRecordAddr) {
@ -2030,8 +2434,8 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack),
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
} else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
@ -2052,15 +2456,14 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
IsInSharedMemory};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory),
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_restore_team_static_memory),
Args);
}
} else {
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack),
I->getSecond().GlobalRecordAddr);
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
I->getSecond().GlobalRecordAddr);
}
}
}
@ -2132,11 +2535,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
llvm::Value *Args[] = {RTLoc, ThreadID};
NVPTXActionTy Action(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_serialized_parallel),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
Args,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
Args);
RCG.setAction(Action);
RCG(CGF);
@ -2152,8 +2553,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
// Prepare for parallel region. Indicate the outlined function.
llvm::Value *Args[] = {ID};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_kernel_prepare_parallel),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
Args);
// Create a private scope that will globalize the arguments
@ -2170,10 +2570,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
llvm::Value *DataSharingArgs[] = {
SharedArgsPtr,
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_begin_sharing_variables),
DataSharingArgs);
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_begin_sharing_variables),
DataSharingArgs);
// Store variable address in a list of references to pass to workers.
unsigned Idx = 0;
@ -2207,8 +2606,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
syncCTAThreads(CGF);
if (!CapturedVars.empty())
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_sharing_variables));
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
// Remember for post-processing in worker loop.
Work.emplace_back(WFn);
@ -2232,9 +2631,8 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
llvm::Value *IsSPMD = Bld.CreateIsNotNull(
CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode)));
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
@ -2242,8 +2640,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *PL = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
OMPRTL___kmpc_parallel_level),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
{RTLoc, ThreadID});
llvm::Value *Res = Bld.CreateIsNotNull(PL);
Bld.CreateCondBr(Res, SeqBB, MasterBB);
@ -2307,11 +2704,9 @@ void CGOpenMPRuntimeGPU::emitSPMDParallelCall(
llvm::Value *Args[] = {RTLoc, ThreadID};
NVPTXActionTy Action(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_serialized_parallel),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
Args,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_end_serialized_parallel),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
Args);
RCG.setAction(Action);
RCG(CGF);
@ -2341,9 +2736,9 @@ void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {
llvm::ConstantPointerNull::get(
cast<llvm::PointerType>(getIdentTyPointerTy())),
llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier_simple_spmd),
Args);
llvm::CallInst *Call = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
Call->setConvergent();
}
void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
@ -2357,10 +2752,9 @@ void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
unsigned Flags = getDefaultFlagsForBarriers(Kind);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_barrier),
Args);
llvm::CallInst *Call = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
Call->setConvergent();
}
void CGOpenMPRuntimeGPU::emitCriticalRegion(
@ -2376,8 +2770,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion(
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
// Get the mask of active threads in the warp.
llvm::Value *Mask = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_warp_active_thread_mask));
llvm::Value *Mask = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
// Fetch team-local id of the thread.
llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
@ -2419,9 +2813,8 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion(
// counter variable and returns to the loop.
CGF.EmitBlock(SyncBB);
// Reconverge active threads in the warp.
(void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_syncwarp),
Mask);
(void)CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
llvm::Value *IncCounterVal =
CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
@ -2471,15 +2864,14 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
CGBuilderTy &Bld = CGF.Builder;
CGOpenMPRuntimeGPU &RT =
*(static_cast<CGOpenMPRuntimeGPU *>(&CGM.getOpenMPRuntime()));
llvm::OpenMPIRBuilder &OMPBuilder = RT.getOMPBuilder();
CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
assert(Size.getQuantity() <= 8 &&
"Unsupported bitwidth in shuffle instruction.");
RuntimeFunction ShuffleFn = Size.getQuantity() <= 4
? OMPRTL___kmpc_shuffle_int32
: OMPRTL___kmpc_shuffle_int64;
OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
? OMPRTL_NVPTX__kmpc_shuffle_int32
: OMPRTL_NVPTX__kmpc_shuffle_int64;
// Cast all types to 32- or 64-bit values before calling shuffle routines.
QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
@ -2489,8 +2881,7 @@ static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), ShuffleFn),
{ElemCast, Offset, WarpSize});
RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
}
@ -4000,8 +4391,8 @@ void CGOpenMPRuntimeGPU::emitReduction(
InterWarpCopyFn};
Res = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2),
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
Args);
} else {
assert(TeamsReduction && "expected teams reduction.");
@ -4050,8 +4441,8 @@ void CGOpenMPRuntimeGPU::emitReduction(
BufferToGlobalRedFn};
Res = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2),
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
Args);
}
@ -4086,8 +4477,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
RegionCodeGenTy RCG(CodeGen);
NVPTXActionTy Action(
nullptr, llvm::None,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
EndArgs);
RCG.setAction(Action);
RCG(CGF);
@ -4098,7 +4488,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
const VarDecl *
CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD,
const VarDecl *NativeParam) const {
const VarDecl *NativeParam) const {
if (!NativeParam->getType()->isReferenceType())
return NativeParam;
QualType ArgType = NativeParam->getType();
@ -4248,9 +4638,9 @@ llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_get_shared_variables),
DataSharingArgs);
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
DataSharingArgs);
// Retrieve the shared variables from the list of references returned
// by the runtime. Pass the variables to the outlined function.

View File

@ -1068,6 +1068,16 @@ public:
llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
bool Local = false, bool AssumeConvergent = false);
/// Create or return a runtime function declaration with the specified type
/// and name. This will automatically add the convergent attribute to the
/// function declaration.
llvm::FunctionCallee CreateConvergentRuntimeFunction(
llvm::FunctionType *Ty, StringRef Name,
llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
bool Local = false) {
return CreateRuntimeFunction(Ty, Name, ExtraAttrs, Local, true);
}
/// Create a new runtime global variable with the specified type and name.
llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
StringRef Name);

View File

@ -91,7 +91,7 @@ int bar(int n){
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]]
// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
// CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
// store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
@ -321,10 +321,10 @@ int bar(int n){
// CHECK: define internal void [[PARALLEL_FN4]](
// CHECK: [[A:%.+]] = alloca i[[SZ:32|64]],
// CHECK: store i[[SZ]] 45, i[[SZ]]* %a,
// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#CONVERGENT:]]
// CHECK: ret void
// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT:]]
// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#CONVERGENT]]
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}_worker()
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l58}}(
@ -377,6 +377,6 @@ int bar(int n){
// CHECK: declare i32 @__kmpc_warp_active_thread_mask() #[[#CONVERGENT:]]
// CHECK: declare void @__kmpc_syncwarp(i32) #[[#CONVERGENT:]]
// CHECK: attributes #[[#CONVERGENT:]] = {{.*}} convergent {{.*}}
// CHECK: attributes #[[#CONVERGENT]] = {{.*}} convergent {{.*}}
#endif

View File

@ -220,9 +220,6 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr)
__OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr)
__OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32,
/* kmp_task_t */ VoidPtr)
__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16)
__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32)
__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr)
#undef __OMP_FUNCTION_TYPE
#undef OMP_FUNCTION_TYPE
@ -314,6 +311,8 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32,
/* Int */ Int32)
__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
/* kmp_task_t */ VoidPtr, Int32,
/* kmp_task_affinity_info_t */ VoidPtr)
@ -519,42 +518,17 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
__OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
/// OpenMP Device runtime functions
__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16)
__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16)
__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16, Int16)
__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, )
__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16)
__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr)
__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32)
__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32,
VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr,
GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr)
__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16)
__OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, )
__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, )
__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16)
__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, )
__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy,
Int16)
__OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16)
__OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr)
__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy)
__OMP_RTL(__kmpc_end_sharing_variables, false, Void, )
__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr)
__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32)
__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, )
__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy,
Int16, VoidPtrPtr)
__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16)
__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int32, )
__OMP_RTL(__kmpc_syncwarp, false, Void, Int32)
/// Note that device runtime functions (in the following) do not necessarily
/// need attributes as we expect to see the definitions.
__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__last, false, Void, )
@ -603,8 +577,8 @@ __OMP_ATTRS_SET(DefaultAttrs,
__OMP_ATTRS_SET(BarrierAttrs,
OptimisticAttributes
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))
: AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)))
? AttributeSet(EnumAttr(NoUnwind))
: AttributeSet(EnumAttr(NoUnwind)))
__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs,
OptimisticAttributes
@ -676,11 +650,6 @@ __OMP_ATTRS_SET(ReturnAlignedPtrAttrs,
__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(),
ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),

View File

@ -888,313 +888,313 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; CHECK: declare dso_local i32 @omp_pause_resource_all(i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels()
; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32)
; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*)
; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0
; CHECK: ; Function Attrs: convergent nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*)
; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_register_requires(i64)
; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0
; CHECK: ; Function Attrs: nounwind
; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32)
@ -1212,52 +1212,52 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare dso_local void @use_int(i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32)
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32)
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly)
; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare dso_local i32 @omp_get_max_task_priority()
@ -1326,7 +1326,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: declare dso_local i32 @omp_get_team_num()
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() #1
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare dso_local i32 @omp_get_initial_device()
@ -1356,25 +1356,25 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: declare dso_local i32 @omp_get_device_num()
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() #1
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind
; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly)
; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*)
; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1
; OPTIMISTIC-NOT: Function Attrs
; OPTIMISTIC: declare dso_local i32 @omp_control_tool(i32, i32, i8*)
@ -1419,7 +1419,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels()
; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1
; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly)
@ -1427,7 +1427,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
@ -1451,13 +1451,13 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@ -1466,22 +1466,22 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
@ -1523,10 +1523,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@ -1535,10 +1535,10 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
@ -1598,7 +1598,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@ -1622,7 +1622,7 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
@ -1634,16 +1634,16 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
; OPTIMISTIC: ; Function Attrs: convergent nounwind
; OPTIMISTIC: ; Function Attrs: nounwind
; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32)
; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn