[libomptarget][nvptx] Fix build, second symbol reordering

This commit is contained in:
JonChesterfield 2019-12-19 02:00:26 +00:00
parent f0ced2ddb4
commit b40822fc14
2 changed files with 11 additions and 11 deletions

View File

@ -124,8 +124,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
return __ballot64(1);
}
EXTERN bool __kmpc_impl_is_first_active_thread();
EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
int32_t SrcLane);
@ -157,6 +155,8 @@ INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
INLINE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
INLINE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
EXTERN bool __kmpc_impl_is_first_active_thread();
// Locks
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);

View File

@ -133,15 +133,6 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
#endif
}
// Return true if this is the first active thread in the warp.
INLINE bool __kmpc_impl_is_first_active_thread() {
unsigned long long Mask = __kmpc_impl_activemask();
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
unsigned long long Sh = Mask << ShNum;
// Truncate Sh to the 32 lower bits
return (unsigned)Sh == 0;
}
// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
@ -197,6 +188,15 @@ INLINE int GetBlockIdInKernel() { return blockIdx.x; }
INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
// Return true if this is the first active thread in the warp.
INLINE bool __kmpc_impl_is_first_active_thread() {
unsigned long long Mask = __kmpc_impl_activemask();
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
unsigned long long Sh = Mask << ShNum;
// Truncate Sh to the 32 lower bits
return (unsigned)Sh == 0;
}
// Locks
EXTERN void __kmpc_impl_init_lock(omp_lock_t *lock);
EXTERN void __kmpc_impl_destroy_lock(omp_lock_t *lock);