git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12127 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2014-06-13 22:03:10 +00:00 · 2014-06-13 22:03:10 +00:00 · 3f98c4d076
parent a4b4422712
commit 3f98c4d076
6 changed files with 49 additions and 34 deletions
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@ -35,8 +35,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
  // process any command-line args that invoke Kokkos settings

  int device = 0;
-  int num_threads = 1;
-  int numa = 1;
+  num_threads = 1;
+  numa = 1;

  int iarg = 0;
  while (iarg < narg) {
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@ -27,6 +27,8 @@ class KokkosLMP : protected Pointers {
  int forward_comm_classic;
  int exchange_comm_on_host;
  int forward_comm_on_host;
+  int num_threads;
+  int numa;

  KokkosLMP(class LAMMPS *, int, char **);
  ~KokkosLMP();
--- a/src/USER-CUDA/comm_cuda.cpp
+++ b/src/USER-CUDA/comm_cuda.cpp
@ -75,7 +75,7 @@ CommCuda::CommCuda(LAMMPS *lmp) : CommBrick(lmp)
  buf_send = NULL;
  buf_recv = NULL;

-  Comm::free_swap();
+  CommBrick::free_swap();
  allocate_swap(maxswap);
 }

@ -132,7 +132,7 @@ void CommCuda::init()
  cuda->shared_data.comm.slablo.dev_data=cu_slablo->dev_data();
  cuda->shared_data.comm.slabhi.dev_data=cu_slabhi->dev_data();

-  Comm::init();
+  CommBrick::init();
 }

 /* ----------------------------------------------------------------------
@ -145,7 +145,7 @@ void CommCuda::init()
 void CommCuda::setup()
 {
  if(cuda->shared_data.pair.neighall) cutghostuser = MAX(2.0*neighbor->cutneighmax,cutghostuser);
-  Comm::setup();
+  CommBrick::setup();

  //upload changed geometry to device
    if(style == SINGLE)
@ -197,7 +197,7 @@ void CommCuda::forward_comm_cuda()
  if(not comm_x_only && not avec->cudable)
  {
          cuda->downloadAll();
-    Comm::forward_comm();
+    CommBrick::forward_comm();
    cuda->uploadAll();
    return;
  }
@ -630,7 +630,7 @@ void CommCuda::forward_comm_pair(Pair *pair)
 {
  if(not cuda->shared_data.pair.cudable_force)
  {
-          return Comm::forward_comm_pair(pair);
+          return CommBrick::forward_comm_pair(pair);
  }

  int iswap,n;
@ -753,7 +753,7 @@ void CommCuda::exchange()

  if(not cuda->oncpu) cuda->downloadAll();

-  Comm::exchange();
+  CommBrick::exchange();
 }


@ -887,7 +887,7 @@ void CommCuda::borders()
           return;
  }

-  Comm::borders();
+  CommBrick::borders();

  cuda->setSystemParams();
  if(cuda->finished_setup) {cuda->checkResize(); cuda->uploadAll();}
@ -1313,7 +1313,7 @@ void CommCuda::grow_list(int iswap, int n)
 void CommCuda::grow_swap(int n)
 {
  int oldmaxswap=maxswap;
-  Comm::grow_swap(n);
+  CommBrick::grow_swap(n);
  if(n>cu_sendlist->get_dim()[0])
  {
   MYDBG(printf(" # CUDA CommCuda::grow_swap\n");)
@ -1357,7 +1357,7 @@ void CommCuda::grow_swap(int n)

 void CommCuda::allocate_swap(int n)
 {
-   Comm::allocate_swap(n);
+   CommBrick::allocate_swap(n);

          delete cu_pbc;
          delete cu_slablo;
@ -1392,7 +1392,7 @@ void CommCuda::allocate_swap(int n)

 void CommCuda::allocate_multi(int n)
 {
-  Comm::allocate_multi(n);
+  CommBrick::allocate_multi(n);

          delete cu_multilo;
          delete cu_multihi;
@ -1410,7 +1410,7 @@ void CommCuda::allocate_multi(int n)
 void CommCuda::free_swap()
 {

-  Comm::free_swap();
+  CommBrick::free_swap();

  delete cuda->shared_data.comm.nsend_swap; cuda->shared_data.comm.nsend_swap=NULL;
  delete cu_pbc; cu_pbc = NULL;
@ -1431,7 +1431,7 @@ void CommCuda::free_swap()

 void CommCuda::free_multi()
 {
-  Comm::free_multi();
+  CommBrick::free_multi();
  delete cu_multilo; cu_multilo = NULL;
  delete cu_multihi; cu_multihi = NULL;
 }
--- a/src/USER-CUDA/pair_gran_hooke_cuda.cpp
+++ b/src/USER-CUDA/pair_gran_hooke_cuda.cpp
@ -168,8 +168,6 @@ void PairGranHookeCuda::init_style()

  dt = update->dt;

-
-
  // check for Fix freeze and set freeze_group_bit

  for (i = 0; i < modify->nfix; i++)
@ -178,42 +176,52 @@ void PairGranHookeCuda::init_style()
  else freeze_group_bit = 0;

  cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
-  // check for Fix pour and set pour_type and pour_maxdiam

-  int pour_type = 0;
-  double pour_maxrad = 0.0;
-  for (i = 0; i < modify->nfix; i++)
-    if (strcmp(modify->fix[i]->style,"pour") == 0) break;
-  if (i < modify->nfix) {
-    pour_type = ((FixPour *) modify->fix[i])->ntype;
-    pour_maxrad = ((FixPour *) modify->fix[i])->radius_max;
-  }
+  // check for FixPour and FixDeposit so can extract particle radii
+
+  int ipour;
+  for (ipour = 0; ipour < modify->nfix; ipour++)
+    if (strcmp(modify->fix[ipour]->style,"pour") == 0) break;
+  if (ipour == modify->nfix) ipour = -1;
+
+  int idep;
+  for (idep = 0; idep < modify->nfix; idep++)
+    if (strcmp(modify->fix[idep]->style,"deposit") == 0) break;
+  if (idep == modify->nfix) idep = -1;

  // set maxrad_dynamic and maxrad_frozen for each type
-  // include future Fix pour particles as dynamic
+  // include future FixPour and FixDeposit particles as dynamic

-  for (i = 1; i <= atom->ntypes; i++)
+  int itype;
+  for (i = 1; i <= atom->ntypes; i++) {
    onerad_dynamic[i] = onerad_frozen[i] = 0.0;
-  if (pour_type) onerad_dynamic[pour_type] = pour_maxrad;
+    if (ipour >= 0) {
+      itype = i;
+      onerad_dynamic[i] = 
+        *((double *) modify->fix[ipour]->extract("radius",itype));
+    }
+    if (idep >= 0) {
+      itype = i;
+      onerad_dynamic[i] = 
+        *((double *) modify->fix[idep]->extract("radius",itype));
+    }
+  }

  double *radius = atom->radius;
  int *mask = atom->mask;
  int *type = atom->type;
  int nlocal = atom->nlocal;

-  for (i = 0; i < nlocal; i++){
+  for (i = 0; i < nlocal; i++)
    if (mask[i] & freeze_group_bit)
      onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
    else
      onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
-  }

  MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
                MPI_DOUBLE,MPI_MAX,world);
  MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
                MPI_DOUBLE,MPI_MAX,world);
-
-  MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); )
 }

 void PairGranHookeCuda::init_list(int id, NeighList *ptr)
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@ -134,6 +134,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
  // allocate list for per thread accumulator manager class instances
  // and then have each thread create an instance of this class to
  // encourage the OS to use storage that is "close" to each thread's CPU.
+
  thr = new ThrData *[nthreads];
  _nthr = nthreads;
 #if defined(_OPENMP)
@ -207,6 +208,7 @@ void FixOMP::init()
  // kspace_split == 0 : regular processing
  // kspace_split < 0  : master partition, does not do kspace
  // kspace_split > 0  : slave partition, only does kspace
+
  if (strstr(update->integrate_style,"verlet/split") != NULL) {
    if (universe->iworld == 0) kspace_split = -1;
    else kspace_split = 1;
--- a/src/comm_brick.cpp
+++ b/src/comm_brick.cpp
@ -35,6 +35,7 @@
 #include "compute.h"
 #include "output.h"
 #include "dump.h"
+#include "accelerator_kokkos.h"
 #include "math_extra.h"
 #include "error.h"
 #include "memory.h"
@ -76,7 +77,9 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)

  nthreads = 1;
 #ifdef _OPENMP
-  if (getenv("OMP_NUM_THREADS") == NULL) {
+  if (lmp->kokkos) {
+    nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
+  } else if (getenv("OMP_NUM_THREADS") == NULL) {
    nthreads = 1;
    if (me == 0)
      error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
@ -87,7 +90,7 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
  // enforce consistent number of threads across all MPI tasks

  MPI_Bcast(&nthreads,1,MPI_INT,0,world);
-  omp_set_num_threads(nthreads);
+  if (!lmp->kokkos) omp_set_num_threads(nthreads);

  if (me == 0) {
    if (screen)