[libomptarget][NVPTX] Drop dead code and data structures, NFCI.

* cg and HasCancel in WorkDescr were never read and can be removed. * This eliminates the last use of priv in ThreadPrivateContext. * CounterGroup is unused afterwards. * Remove duplicate external declares in omptarget-nvptx.cu that are already in the header omptarget-nvptx.h. Differential Revision: https://reviews.llvm.org/D51622 llvm-svn: 341370
2018-09-04 15:13:17 +00:00 · 2018-09-04 15:13:17 +00:00 · 82d20201d0
parent d5bc65444c
commit 82d20201d0
8 changed files with 2 additions and 189 deletions
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/counter_group.h
@ -1,51 +0,0 @@
-//===------ counter_group.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface to implement OpenMP loop scheduling
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _OMPTARGET_NVPTX_COUNTER_GROUP_H_
-#define _OMPTARGET_NVPTX_COUNTER_GROUP_H_
-
-#include "option.h"
-
-// counter group type for synchronizations
-class omptarget_nvptx_CounterGroup {
-public:
-  // getters and setters
-  INLINE Counter &Event() { return v_event; }
-  INLINE volatile Counter &Start() { return v_start; }
-  INLINE Counter &Init() { return v_init; }
-
-  // Synchronization Interface
-
-  INLINE void Clear();             // first time start=event
-  INLINE void Reset();             // init = first
-  INLINE void Init(Counter &priv); // priv = init
-  INLINE Counter Next();           // just counts number of events
-
-  // set priv to n, to be used in later waitOrRelease
-  INLINE void Complete(Counter &priv, Counter n);
-
-  // check priv and decide if we have to wait or can free the other warps
-  INLINE void Release(Counter priv, Counter current_event_value);
-  INLINE void WaitOrRelease(Counter priv, Counter current_event_value);
-
-private:
-  Counter v_event; // counter of events (atomic)
-
-  // volatile is needed to force loads to read from global
-  // memory or L2 cache and see the write by the last master
-  volatile Counter v_start; // signal when events registered are finished
-
-  Counter v_init; // used to initialize local thread variables
-};
-
-#endif /* SRC_COUNTER_GROUP_H_ */
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/counter_groupi.h
@ -1,82 +0,0 @@
-//===----- counter_groupi.h - NVPTX OpenMP loop scheduling ------- CUDA -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface implementation for OpenMP loop scheduling
-//
-//===----------------------------------------------------------------------===//
-
-#include "option.h"
-
-INLINE void omptarget_nvptx_CounterGroup::Clear() {
-  PRINT0(LD_SYNCD, "clear counters\n")
-  v_event = 0;
-  v_start = 0;
-  // v_init does not need to be reset (its value is dead)
-}
-
-INLINE void omptarget_nvptx_CounterGroup::Reset() {
-  // done by master before entering parallel
-  ASSERT(LT_FUSSY, v_event == v_start,
-         "error, entry %lld !=start %lld at reset\n", P64(v_event),
-         P64(v_start));
-  v_init = v_start;
-}
-
-INLINE void omptarget_nvptx_CounterGroup::Init(Counter &priv) {
-  PRINT(LD_SYNCD, "init priv counter 0x%llx with val %lld\n", P64(&priv),
-        P64(v_start));
-  priv = v_start;
-}
-
-// just counts number of events
-INLINE Counter omptarget_nvptx_CounterGroup::Next() {
-  Counter oldVal = atomicAdd(&v_event, (Counter)1);
-  PRINT(LD_SYNCD, "next event counter 0x%llx with val %lld->%lld\n",
-        P64(&v_event), P64(oldVal), P64(oldVal + 1));
-
-  return oldVal;
-}
-
-// set priv to n, to be used in later waitOrRelease
-INLINE void omptarget_nvptx_CounterGroup::Complete(Counter &priv, Counter n) {
-  PRINT(LD_SYNCD, "complete priv counter 0x%llx with val %llu->%llu (+%llu)\n",
-        P64(&priv), P64(priv), P64(priv + n), n);
-  priv += n;
-}
-
-INLINE void omptarget_nvptx_CounterGroup::Release(Counter priv,
-                                                  Counter current_event_value) {
-  if (priv - 1 == current_event_value) {
-    PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
-          P64(&v_start), P64(v_start), P64(priv));
-    v_start = priv;
-  }
-}
-
-// check priv and decide if we have to wait or can free the other warps
-INLINE void
-omptarget_nvptx_CounterGroup::WaitOrRelease(Counter priv,
-                                            Counter current_event_value) {
-  if (priv - 1 == current_event_value) {
-    PRINT(LD_SYNCD, "Release start counter 0x%llx with val %lld->%lld\n",
-          P64(&v_start), P64(v_start), P64(priv));
-    v_start = priv;
-  } else {
-    PRINT(LD_SYNCD,
-          "Start waiting while start counter 0x%llx with val %lld < %lld\n",
-          P64(&v_start), P64(v_start), P64(priv));
-    while (priv > v_start) {
-      // IDLE LOOP
-      // start is volatile: it will be re-loaded at each while loop
-    }
-    PRINT(LD_SYNCD,
-          "Done waiting as start counter 0x%llx with val %lld >= %lld\n",
-          P64(&v_start), P64(v_start), P64(priv));
-  }
-}
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/interface.h
@ -20,6 +20,8 @@
 #ifndef _INTERFACES_H_
 #define _INTERFACES_H_

+#include "option.h"
+
 ////////////////////////////////////////////////////////////////////////////////
 // OpenMP interface
 ////////////////////////////////////////////////////////////////////////////////
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@ -21,25 +21,10 @@ extern __device__
    omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
        omptarget_nvptx_device_State[MAX_SM];

-extern __device__ __shared__
-    omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
-
 extern __device__ omptarget_nvptx_Queue<
    omptarget_nvptx_SimpleThreadPrivateContext, OMP_STATE_COUNT>
    omptarget_nvptx_device_simpleState[MAX_SM];

-extern __device__ __shared__ omptarget_nvptx_SimpleThreadPrivateContext
-    *omptarget_nvptx_simpleThreadPrivateContext;
-
-//
-// The team master sets the outlined function and its arguments in these
-// variables to communicate with the workers.  Since they are in shared memory,
-// there is one copy of these variables for each kernel, instance, and team.
-//
-extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
-    omptarget_nvptx_workFn;
-extern __device__ __shared__ uint32_t execution_param;
-
 ////////////////////////////////////////////////////////////////////////////////
 // init entry points
 ////////////////////////////////////////////////////////////////////////////////
@ -146,8 +131,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
    omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
    // init team context
    currTeamDescr.InitTeamDescr();
-    // init counters (copy start to init)
-    workDescr.CounterGroup().Reset();
  }
  __syncthreads();

@ -168,8 +151,6 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
                                                             newTaskDescr);

  // init thread private from init value
-  workDescr.CounterGroup().Init(
-      omptarget_nvptx_threadPrivateContext->Priv(threadId));
  PRINT(LD_PAR,
        "thread will execute parallel region with id %d in a team of "
        "%d threads\n",
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@ -26,7 +26,6 @@
 #include <math.h>

 // local includes
-#include "counter_group.h"
 #include "debug.h"     // debug
 #include "interface.h" // interfaces with omp, compiler, and user
 #include "option.h"    // choices we have
@ -242,15 +241,10 @@ class omptarget_nvptx_WorkDescr {

 public:
  // access to data
-  INLINE omptarget_nvptx_CounterGroup &CounterGroup() { return cg; }
  INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; }
-  // init
-  INLINE void InitWorkDescr();

 private:
-  omptarget_nvptx_CounterGroup cg; // for barrier (no other needed)
  omptarget_nvptx_TaskDescr masterTaskICV;
-  bool hasCancel;
 };

 ////////////////////////////////////////////////////////////////////////////////
@ -347,9 +341,6 @@ public:
  INLINE uint16_t &SimdLimitForNextSimd(int tid) {
    return nextRegion.slim[tid];
  }
-  // sync
-  INLINE Counter &Priv(int tid) { return priv[tid]; }
-  INLINE void IncrementPriv(int tid, Counter val) { priv[tid] += val; }
  // schedule (for dispatch)
  INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; }
  INLINE int64_t &Chunk(int tid) { return chunk[tid]; }
@ -377,8 +368,6 @@ private:
    // simd limit
    uint16_t slim[MAX_THREADS_PER_TEAM];
  } nextRegion;
-  // sync
-  Counter priv[MAX_THREADS_PER_TEAM];
  // schedule (for dispatch)
  kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for
  int64_t chunk[MAX_THREADS_PER_TEAM];
@ -469,7 +458,6 @@ INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
 // inlined implementation
 ////////////////////////////////////////////////////////////////////////////////

-#include "counter_groupi.h"
 #include "omptarget-nvptxi.h"
 #include "supporti.h"

--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
@ -168,31 +168,17 @@ omptarget_nvptx_ThreadPrivateContext::InitThreadPrivateContext(int tid) {
  topTaskDescr[tid] = NULL;
  // no num threads value has been pushed
  nextRegion.tnum[tid] = 0;
-  // priv counter init to zero
-  priv[tid] = 0;
  // the following don't need to be init here; they are init when using dyn
  // sched
  // current_Event, events_Number, chunk, num_Iterations, schedule
 }

-////////////////////////////////////////////////////////////////////////////////
-// Work Descriptor
-////////////////////////////////////////////////////////////////////////////////
-
-INLINE void omptarget_nvptx_WorkDescr::InitWorkDescr() {
-  cg.Clear(); // start and stop to zero too
-  // threadsInParallelTeam does not need to be init (done in start parallel)
-  hasCancel = FALSE;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Team Descriptor
 ////////////////////////////////////////////////////////////////////////////////

 INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() {
  levelZeroTaskDescr.InitLevelZeroTaskDescr();
-  workDescrForActiveParallel.InitWorkDescr();
-  // omp_init_lock(criticalLock);
 }

 ////////////////////////////////////////////////////////////////////////////////
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/option.h
@ -46,13 +46,6 @@
 // algo options
 ////////////////////////////////////////////////////////////////////////////////

-////////////////////////////////////////////////////////////////////////////////
-// data options
-////////////////////////////////////////////////////////////////////////////////
-
-// decide if counters are 32 or 64 bit
-#define Counter unsigned long long
-
 ////////////////////////////////////////////////////////////////////////////////
 // misc options (by def everythig here is device)
 ////////////////////////////////////////////////////////////////////////////////
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@ -306,8 +306,6 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
  omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
  workDescr.WorkTaskDescr()->CopyToWorkDescr(currTaskDescr,
                                             CudaThreadsForParallel / NumLanes);
-  // init counters (copy start to init)
-  workDescr.CounterGroup().Reset();
 }

 // All workers call this function.  Deactivate those not needed.
@ -345,8 +343,6 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
    omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
                                                               newTaskDescr);
    // init private from int value
-    workDescr.CounterGroup().Init(
-        omptarget_nvptx_threadPrivateContext->Priv(threadId));
    PRINT(LD_PAR,
          "thread will execute parallel region with id %d in a team of "
          "%d threads\n",