[libomptarget][NVPTX] Drop dead code and data structures, NFCI.

* cg and HasCancel in WorkDescr were never read and can be removed.
 * This eliminates the last use of priv in ThreadPrivateContext.
 * CounterGroup is unused afterwards.
 * Remove duplicate external declares in omptarget-nvptx.cu that are
   already in the header omptarget-nvptx.h.

Differential Revision: https://reviews.llvm.org/D51622

llvm-svn: 341370
This commit is contained in:
Jonas Hahnfeld 2018-09-04 15:13:17 +00:00
parent d5bc65444c
commit 82d20201d0
8 changed files with 2 additions and 189 deletions

View File

@ -1,51 +0,0 @@
//===------ counter_group.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.txt for details.
//
//===----------------------------------------------------------------------===//
//
// Interface to implement OpenMP loop scheduling
//
//===----------------------------------------------------------------------===//
#ifndef _OMPTARGET_NVPTX_COUNTER_GROUP_H_
#define _OMPTARGET_NVPTX_COUNTER_GROUP_H_
#include "option.h"
// counter group type for synchronizations
class omptarget_nvptx_CounterGroup {
public:
// getters and setters
INLINE Counter &Event() { return v_event; }
INLINE volatile Counter &Start() { return v_start; }
INLINE Counter &Init() { return v_init; }
// Synchronization Interface
INLINE void Clear(); // first time start=event
INLINE void Reset(); // init = first
INLINE void Init(Counter &priv); // priv = init
INLINE Counter Next(); // just counts number of events
// set priv to n, to be used in later waitOrRelease
INLINE void Complete(Counter &priv, Counter n);
// check priv and decide if we have to wait or can free the other warps
INLINE void Release(Counter priv, Counter current_event_value);
INLINE void WaitOrRelease(Counter priv, Counter current_event_value);
private:
Counter v_event; // counter of events (atomic)
// volatile is needed to force loads to read from global
// memory or L2 cache and see the write by the last master
volatile Counter v_start; // signal when events registered are finished
Counter v_init; // used to initialize local thread variables
};
#endif /* SRC_COUNTER_GROUP_H_ */

View File

@ -1,82 +0,0 @@
//===----- counter_groupi.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.txt for details.
//
//===----------------------------------------------------------------------===//
//
// Interface implementation for OpenMP loop scheduling
//
//===----------------------------------------------------------------------===//
#include "option.h"
INLINE void omptarget_nvptx_CounterGroup::Clear() {
PRINT0(LD_SYNCD, "clear counters\n")
v_event = 0;
v_start = 0;
// v_init does not need to be reset (its value is dead)
}
INLINE void omptarget_nvptx_CounterGroup::Reset() {
// done by master before entering parallel
ASSERT(LT_FUSSY, v_event == v_start,
"error, entry %lld !=start %lld at reset\n", P64(v_event),
P64(v_start));
v_init = v_start;
}
INLINE void omptarget_nvptx_CounterGroup::Init(Counter &priv) {
PRINT(LD_SYNCD, "init priv counter 0x%llx with val %lld\n", P64(&priv),
P64(v_start));
priv = v_start;
}
// just counts number of events
INLINE Counter omptarget_nvptx_CounterGroup::Next() {
Counter oldVal = atomicAdd(&v_event, (Counter)1);
PRINT(LD_SYNCD, "next event counter 0x%llx with val %lld->%lld\n",
P64(&v_event), P64(oldVal), P64(oldVal + 1));
return oldVal;
}
// set priv to n, to be used in later waitOrRelease
INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) {
PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n",
P64(&priv), P64(priv), P64(priv + n), n);
priv += n;
}
INLINE void omptarget_nvptx_CounterGroup::Release(Counter priv,
Counter current_event_value) {
if (priv - 1 == current_event_value) {
PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
P64(&v_start), P64(v_start), P64(priv));
v_start = priv;
}
}
// check priv and decide if we have to wait or can free the other warps
INLINE void
omptarget_nvptx_CounterGroup::WaitOrRelease(Counter priv,
Counter current_event_value) {
if (priv - 1 == current_event_value) {
PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
P64(&v_start), P64(v_start), P64(priv));
v_start = priv;
} else {
PRINT(LD_SYNCD,
"Start waiting while start counter 0x%llx with val %lld < %lld\n",
P64(&v_start), P64(v_start), P64(priv));
while (priv > v_start) {
// IDLE LOOP
// start is volatile: it will be re-loaded at each while loop
}
PRINT(LD_SYNCD,
"Done waiting as start counter 0x%llx with val %lld >= %lld\n",
P64(&v_start), P64(v_start), P64(priv));
}
}

View File

@ -20,6 +20,8 @@
#ifndef _INTERFACES_H_
#define _INTERFACES_H_
#include "option.h"
////////////////////////////////////////////////////////////////////////////////
// OpenMP interface
////////////////////////////////////////////////////////////////////////////////

View File

@ -21,25 +21,10 @@ extern __device__
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_State[MAX_SM];
extern __device__ __shared__
omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
extern __device__ omptarget_nvptx_Queue<
omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_simpleState[MAX_SM];
extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
*omptarget_nvptx_simpleThreadPrivateContext;
//
// The team master sets the outlined function and its arguments in these
// variables to communicate with the workers. Since they are in shared memory,
// there is one copy of these variables for each kernel, instance, and team.
//
extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
omptarget_nvptx_workFn;
extern __device__ __shared__ uint32_t execution_param;
////////////////////////////////////////////////////////////////////////////////
// init entry points
////////////////////////////////////////////////////////////////////////////////
@ -146,8 +131,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
// init team context
currTeamDescr.InitTeamDescr();
// init counters (copy start to init)
workDescr.CounterGroup().Reset();
}
__syncthreads();
@ -168,8 +151,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
newTaskDescr);
// init thread private from init value
workDescr.CounterGroup().Init(
omptarget_nvptx_threadPrivateContext->Priv(threadId));
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",

View File

@ -26,7 +26,6 @@
#include <math.h>
// local includes
#include "counter_group.h"
#include "debug.h" // debug
#include "interface.h" // interfaces with omp, compiler, and user
#include "option.h" // choices we have
@ -242,15 +241,10 @@ class omptarget_nvptx_WorkDescr {
public:
// access to data
INLINE omptarget_nvptx_CounterGroup &CounterGroup() { return cg; }
INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; }
// init
INLINE void InitWorkDescr();
private:
omptarget_nvptx_CounterGroup cg; // for barrier (no other needed)
omptarget_nvptx_TaskDescr masterTaskICV;
bool hasCancel;
};
////////////////////////////////////////////////////////////////////////////////
@ -347,9 +341,6 @@ public:
INLINE uint16_t &SimdLimitForNextSimd(int tid) {
return nextRegion.slim[tid];
}
// sync
INLINE Counter &Priv(int tid) { return priv[tid]; }
INLINE void IncrementPriv(int tid, Counter val) { priv[tid] += val; }
// schedule (for dispatch)
INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; }
INLINE int64_t &Chunk(int tid) { return chunk[tid]; }
@ -377,8 +368,6 @@ private:
// simd limit
uint16_t slim[MAX_THREADS_PER_TEAM];
} nextRegion;
// sync
Counter priv[MAX_THREADS_PER_TEAM];
// schedule (for dispatch)
kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for
int64_t chunk[MAX_THREADS_PER_TEAM];
@ -469,7 +458,6 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
// inlined implementation
////////////////////////////////////////////////////////////////////////////////
#include "counter_groupi.h"
#include "omptarget-nvptxi.h"
#include "supporti.h"

View File

@ -168,31 +168,17 @@ omptarget_nvptx_ThreadPrivateContext::InitThreadPrivateContext(int tid) {
topTaskDescr[tid] = NULL;
// no num threads value has been pushed
nextRegion.tnum[tid] = 0;
// priv counter init to zero
priv[tid] = 0;
// the following don't need to be init here; they are init when using dyn
// sched
// current_Event, events_Number, chunk, num_Iterations, schedule
}
////////////////////////////////////////////////////////////////////////////////
// Work Descriptor
////////////////////////////////////////////////////////////////////////////////
INLINE void omptarget_nvptx_WorkDescr::InitWorkDescr() {
cg.Clear(); // start and stop to zero too
// threadsInParallelTeam does not need to be init (done in start parallel)
hasCancel = FALSE;
}
////////////////////////////////////////////////////////////////////////////////
// Team Descriptor
////////////////////////////////////////////////////////////////////////////////
INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() {
levelZeroTaskDescr.InitLevelZeroTaskDescr();
workDescrForActiveParallel.InitWorkDescr();
// omp_init_lock(criticalLock);
}
////////////////////////////////////////////////////////////////////////////////

View File

@ -46,13 +46,6 @@
// algo options
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// data options
////////////////////////////////////////////////////////////////////////////////
// decide if counters are 32 or 64 bit
#define Counter unsigned long long
////////////////////////////////////////////////////////////////////////////////
// misc options (by def everythig here is device)
////////////////////////////////////////////////////////////////////////////////

View File

@ -306,8 +306,6 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr,
CudaThreadsForParallel / NumLanes);
// init counters (copy start to init)
workDescr.CounterGroup().Reset();
}
// All workers call this function. Deactivate those not needed.
@ -345,8 +343,6 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
newTaskDescr);
// init private from int value
workDescr.CounterGroup().Init(
omptarget_nvptx_threadPrivateContext->Priv(threadId));
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",