forked from OSchip/llvm-project
[libomptarget][NVPTX] Drop dead code and data structures, NFCI.
* cg and HasCancel in WorkDescr were never read and can be removed. * This eliminates the last use of priv in ThreadPrivateContext. * CounterGroup is unused afterwards. * Remove duplicate external declares in omptarget-nvptx.cu that are already in the header omptarget-nvptx.h. Differential Revision: https://reviews.llvm.org/D51622 llvm-svn: 341370
This commit is contained in:
parent
d5bc65444c
commit
82d20201d0
|
@ -1,51 +0,0 @@
|
|||
//===------ counter_group.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.txt for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to implement OpenMP loop scheduling
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _OMPTARGET_NVPTX_COUNTER_GROUP_H_
|
||||
#define _OMPTARGET_NVPTX_COUNTER_GROUP_H_
|
||||
|
||||
#include "option.h"
|
||||
|
||||
// counter group type for synchronizations
|
||||
class omptarget_nvptx_CounterGroup {
|
||||
public:
|
||||
// getters and setters
|
||||
INLINE Counter &Event() { return v_event; }
|
||||
INLINE volatile Counter &Start() { return v_start; }
|
||||
INLINE Counter &Init() { return v_init; }
|
||||
|
||||
// Synchronization Interface
|
||||
|
||||
INLINE void Clear(); // first time start=event
|
||||
INLINE void Reset(); // init = first
|
||||
INLINE void Init(Counter &priv); // priv = init
|
||||
INLINE Counter Next(); // just counts number of events
|
||||
|
||||
// set priv to n, to be used in later waitOrRelease
|
||||
INLINE void Complete(Counter &priv, Counter n);
|
||||
|
||||
// check priv and decide if we have to wait or can free the other warps
|
||||
INLINE void Release(Counter priv, Counter current_event_value);
|
||||
INLINE void WaitOrRelease(Counter priv, Counter current_event_value);
|
||||
|
||||
private:
|
||||
Counter v_event; // counter of events (atomic)
|
||||
|
||||
// volatile is needed to force loads to read from global
|
||||
// memory or L2 cache and see the write by the last master
|
||||
volatile Counter v_start; // signal when events registered are finished
|
||||
|
||||
Counter v_init; // used to initialize local thread variables
|
||||
};
|
||||
|
||||
#endif /* SRC_COUNTER_GROUP_H_ */
|
|
@ -1,82 +0,0 @@
|
|||
//===----- counter_groupi.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is dual licensed under the MIT and the University of Illinois Open
|
||||
// Source Licenses. See LICENSE.txt for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface implementation for OpenMP loop scheduling
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "option.h"
|
||||
|
||||
INLINE void omptarget_nvptx_CounterGroup::Clear() {
|
||||
PRINT0(LD_SYNCD, "clear counters\n")
|
||||
v_event = 0;
|
||||
v_start = 0;
|
||||
// v_init does not need to be reset (its value is dead)
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_CounterGroup::Reset() {
|
||||
// done by master before entering parallel
|
||||
ASSERT(LT_FUSSY, v_event == v_start,
|
||||
"error, entry %lld !=start %lld at reset\n", P64(v_event),
|
||||
P64(v_start));
|
||||
v_init = v_start;
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_CounterGroup::Init(Counter &priv) {
|
||||
PRINT(LD_SYNCD, "init priv counter 0x%llx with val %lld\n", P64(&priv),
|
||||
P64(v_start));
|
||||
priv = v_start;
|
||||
}
|
||||
|
||||
// just counts number of events
|
||||
INLINE Counter omptarget_nvptx_CounterGroup::Next() {
|
||||
Counter oldVal = atomicAdd(&v_event, (Counter)1);
|
||||
PRINT(LD_SYNCD, "next event counter 0x%llx with val %lld->%lld\n",
|
||||
P64(&v_event), P64(oldVal), P64(oldVal + 1));
|
||||
|
||||
return oldVal;
|
||||
}
|
||||
|
||||
// set priv to n, to be used in later waitOrRelease
|
||||
INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) {
|
||||
PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n",
|
||||
P64(&priv), P64(priv), P64(priv + n), n);
|
||||
priv += n;
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_CounterGroup::Release(Counter priv,
|
||||
Counter current_event_value) {
|
||||
if (priv - 1 == current_event_value) {
|
||||
PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
|
||||
P64(&v_start), P64(v_start), P64(priv));
|
||||
v_start = priv;
|
||||
}
|
||||
}
|
||||
|
||||
// check priv and decide if we have to wait or can free the other warps
|
||||
INLINE void
|
||||
omptarget_nvptx_CounterGroup::WaitOrRelease(Counter priv,
|
||||
Counter current_event_value) {
|
||||
if (priv - 1 == current_event_value) {
|
||||
PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
|
||||
P64(&v_start), P64(v_start), P64(priv));
|
||||
v_start = priv;
|
||||
} else {
|
||||
PRINT(LD_SYNCD,
|
||||
"Start waiting while start counter 0x%llx with val %lld < %lld\n",
|
||||
P64(&v_start), P64(v_start), P64(priv));
|
||||
while (priv > v_start) {
|
||||
// IDLE LOOP
|
||||
// start is volatile: it will be re-loaded at each while loop
|
||||
}
|
||||
PRINT(LD_SYNCD,
|
||||
"Done waiting as start counter 0x%llx with val %lld >= %lld\n",
|
||||
P64(&v_start), P64(v_start), P64(priv));
|
||||
}
|
||||
}
|
|
@ -20,6 +20,8 @@
|
|||
#ifndef _INTERFACES_H_
|
||||
#define _INTERFACES_H_
|
||||
|
||||
#include "option.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// OpenMP interface
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -21,25 +21,10 @@ extern __device__
|
|||
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
|
||||
omptarget_nvptx_device_State[MAX_SM];
|
||||
|
||||
extern __device__ __shared__
|
||||
omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
|
||||
|
||||
extern __device__ omptarget_nvptx_Queue<
|
||||
omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT>
|
||||
omptarget_nvptx_device_simpleState[MAX_SM];
|
||||
|
||||
extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
|
||||
*omptarget_nvptx_simpleThreadPrivateContext;
|
||||
|
||||
//
|
||||
// The team master sets the outlined function and its arguments in these
|
||||
// variables to communicate with the workers. Since they are in shared memory,
|
||||
// there is one copy of these variables for each kernel, instance, and team.
|
||||
//
|
||||
extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
|
||||
omptarget_nvptx_workFn;
|
||||
extern __device__ __shared__ uint32_t execution_param;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// init entry points
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -146,8 +131,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
|
|||
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
|
||||
// init team context
|
||||
currTeamDescr.InitTeamDescr();
|
||||
// init counters (copy start to init)
|
||||
workDescr.CounterGroup().Reset();
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
@ -168,8 +151,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
|
|||
newTaskDescr);
|
||||
|
||||
// init thread private from init value
|
||||
workDescr.CounterGroup().Init(
|
||||
omptarget_nvptx_threadPrivateContext->Priv(threadId));
|
||||
PRINT(LD_PAR,
|
||||
"thread will execute parallel region with id %d in a team of "
|
||||
"%d threads\n",
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include <math.h>
|
||||
|
||||
// local includes
|
||||
#include "counter_group.h"
|
||||
#include "debug.h" // debug
|
||||
#include "interface.h" // interfaces with omp, compiler, and user
|
||||
#include "option.h" // choices we have
|
||||
|
@ -242,15 +241,10 @@ class omptarget_nvptx_WorkDescr {
|
|||
|
||||
public:
|
||||
// access to data
|
||||
INLINE omptarget_nvptx_CounterGroup &CounterGroup() { return cg; }
|
||||
INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; }
|
||||
// init
|
||||
INLINE void InitWorkDescr();
|
||||
|
||||
private:
|
||||
omptarget_nvptx_CounterGroup cg; // for barrier (no other needed)
|
||||
omptarget_nvptx_TaskDescr masterTaskICV;
|
||||
bool hasCancel;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -347,9 +341,6 @@ public:
|
|||
INLINE uint16_t &SimdLimitForNextSimd(int tid) {
|
||||
return nextRegion.slim[tid];
|
||||
}
|
||||
// sync
|
||||
INLINE Counter &Priv(int tid) { return priv[tid]; }
|
||||
INLINE void IncrementPriv(int tid, Counter val) { priv[tid] += val; }
|
||||
// schedule (for dispatch)
|
||||
INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; }
|
||||
INLINE int64_t &Chunk(int tid) { return chunk[tid]; }
|
||||
|
@ -377,8 +368,6 @@ private:
|
|||
// simd limit
|
||||
uint16_t slim[MAX_THREADS_PER_TEAM];
|
||||
} nextRegion;
|
||||
// sync
|
||||
Counter priv[MAX_THREADS_PER_TEAM];
|
||||
// schedule (for dispatch)
|
||||
kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for
|
||||
int64_t chunk[MAX_THREADS_PER_TEAM];
|
||||
|
@ -469,7 +458,6 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
|
|||
// inlined implementation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "counter_groupi.h"
|
||||
#include "omptarget-nvptxi.h"
|
||||
#include "supporti.h"
|
||||
|
||||
|
|
|
@ -168,31 +168,17 @@ omptarget_nvptx_ThreadPrivateContext::InitThreadPrivateContext(int tid) {
|
|||
topTaskDescr[tid] = NULL;
|
||||
// no num threads value has been pushed
|
||||
nextRegion.tnum[tid] = 0;
|
||||
// priv counter init to zero
|
||||
priv[tid] = 0;
|
||||
// the following don't need to be init here; they are init when using dyn
|
||||
// sched
|
||||
// current_Event, events_Number, chunk, num_Iterations, schedule
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Work Descriptor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
INLINE void omptarget_nvptx_WorkDescr::InitWorkDescr() {
|
||||
cg.Clear(); // start and stop to zero too
|
||||
// threadsInParallelTeam does not need to be init (done in start parallel)
|
||||
hasCancel = FALSE;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Team Descriptor
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() {
|
||||
levelZeroTaskDescr.InitLevelZeroTaskDescr();
|
||||
workDescrForActiveParallel.InitWorkDescr();
|
||||
// omp_init_lock(criticalLock);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -46,13 +46,6 @@
|
|||
// algo options
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// data options
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// decide if counters are 32 or 64 bit
|
||||
#define Counter unsigned long long
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// misc options (by def everythig here is device)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -306,8 +306,6 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
|
|||
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
|
||||
workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr,
|
||||
CudaThreadsForParallel / NumLanes);
|
||||
// init counters (copy start to init)
|
||||
workDescr.CounterGroup().Reset();
|
||||
}
|
||||
|
||||
// All workers call this function. Deactivate those not needed.
|
||||
|
@ -345,8 +343,6 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
|
|||
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
|
||||
newTaskDescr);
|
||||
// init private from int value
|
||||
workDescr.CounterGroup().Init(
|
||||
omptarget_nvptx_threadPrivateContext->Priv(threadId));
|
||||
PRINT(LD_PAR,
|
||||
"thread will execute parallel region with id %d in a team of "
|
||||
"%d threads\n",
|
||||
|
|
Loading…
Reference in New Issue