diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 4f6031f229..b73e25dbc6 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -35,8 +35,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // process any command-line args that invoke Kokkos settings int device = 0; - int num_threads = 1; - int numa = 1; + num_threads = 1; + numa = 1; int iarg = 0; while (iarg < narg) { diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 512c76a489..84a87279fc 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -27,6 +27,8 @@ class KokkosLMP : protected Pointers { int forward_comm_classic; int exchange_comm_on_host; int forward_comm_on_host; + int num_threads; + int numa; KokkosLMP(class LAMMPS *, int, char **); ~KokkosLMP(); diff --git a/src/USER-CUDA/comm_cuda.cpp b/src/USER-CUDA/comm_cuda.cpp index 4c130e67b5..1644fca80a 100644 --- a/src/USER-CUDA/comm_cuda.cpp +++ b/src/USER-CUDA/comm_cuda.cpp @@ -75,7 +75,7 @@ CommCuda::CommCuda(LAMMPS *lmp) : CommBrick(lmp) buf_send = NULL; buf_recv = NULL; - Comm::free_swap(); + CommBrick::free_swap(); allocate_swap(maxswap); } @@ -132,7 +132,7 @@ void CommCuda::init() cuda->shared_data.comm.slablo.dev_data=cu_slablo->dev_data(); cuda->shared_data.comm.slabhi.dev_data=cu_slabhi->dev_data(); - Comm::init(); + CommBrick::init(); } /* ---------------------------------------------------------------------- @@ -145,7 +145,7 @@ void CommCuda::init() void CommCuda::setup() { if(cuda->shared_data.pair.neighall) cutghostuser = MAX(2.0*neighbor->cutneighmax,cutghostuser); - Comm::setup(); + CommBrick::setup(); //upload changed geometry to device if(style == SINGLE) @@ -197,7 +197,7 @@ void CommCuda::forward_comm_cuda() if(not comm_x_only && not avec->cudable) { cuda->downloadAll(); - Comm::forward_comm(); + CommBrick::forward_comm(); cuda->uploadAll(); return; } @@ -630,7 +630,7 @@ void CommCuda::forward_comm_pair(Pair *pair) { if(not cuda->shared_data.pair.cudable_force) { - return Comm::forward_comm_pair(pair); + return CommBrick::forward_comm_pair(pair); } int iswap,n; @@ -753,7 +753,7 @@ void CommCuda::exchange() if(not cuda->oncpu) cuda->downloadAll(); - Comm::exchange(); + CommBrick::exchange(); } @@ -887,7 +887,7 @@ void CommCuda::borders() return; } - Comm::borders(); + CommBrick::borders(); cuda->setSystemParams(); if(cuda->finished_setup) {cuda->checkResize(); cuda->uploadAll();} @@ -1313,7 +1313,7 @@ void CommCuda::grow_list(int iswap, int n) void CommCuda::grow_swap(int n) { int oldmaxswap=maxswap; - Comm::grow_swap(n); + CommBrick::grow_swap(n); if(n>cu_sendlist->get_dim()[0]) { MYDBG(printf(" # CUDA CommCuda::grow_swap\n");) @@ -1357,7 +1357,7 @@ void CommCuda::grow_swap(int n) void CommCuda::allocate_swap(int n) { - Comm::allocate_swap(n); + CommBrick::allocate_swap(n); delete cu_pbc; delete cu_slablo; @@ -1392,7 +1392,7 @@ void CommCuda::allocate_swap(int n) void CommCuda::allocate_multi(int n) { - Comm::allocate_multi(n); + CommBrick::allocate_multi(n); delete cu_multilo; delete cu_multihi; @@ -1410,7 +1410,7 @@ void CommCuda::allocate_multi(int n) void CommCuda::free_swap() { - Comm::free_swap(); + CommBrick::free_swap(); delete cuda->shared_data.comm.nsend_swap; cuda->shared_data.comm.nsend_swap=NULL; delete cu_pbc; cu_pbc = NULL; @@ -1431,7 +1431,7 @@ void CommCuda::free_swap() void CommCuda::free_multi() { - Comm::free_multi(); + CommBrick::free_multi(); delete cu_multilo; cu_multilo = NULL; delete cu_multihi; cu_multihi = NULL; } diff --git a/src/USER-CUDA/pair_gran_hooke_cuda.cpp b/src/USER-CUDA/pair_gran_hooke_cuda.cpp index 08360a996b..4b31d0d869 100644 --- a/src/USER-CUDA/pair_gran_hooke_cuda.cpp +++ b/src/USER-CUDA/pair_gran_hooke_cuda.cpp @@ -168,8 +168,6 @@ void PairGranHookeCuda::init_style() dt = update->dt; - - // check for Fix freeze and set freeze_group_bit for (i = 0; i < modify->nfix; i++) @@ -178,42 +176,52 @@ void PairGranHookeCuda::init_style() else freeze_group_bit = 0; cuda->shared_data.pair.freeze_group_bit=freeze_group_bit; - // check for Fix pour and set pour_type and pour_maxdiam - int pour_type = 0; - double pour_maxrad = 0.0; - for (i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"pour") == 0) break; - if (i < modify->nfix) { - pour_type = ((FixPour *) modify->fix[i])->ntype; - pour_maxrad = ((FixPour *) modify->fix[i])->radius_max; - } + // check for FixPour and FixDeposit so can extract particle radii + + int ipour; + for (ipour = 0; ipour < modify->nfix; ipour++) + if (strcmp(modify->fix[ipour]->style,"pour") == 0) break; + if (ipour == modify->nfix) ipour = -1; + + int idep; + for (idep = 0; idep < modify->nfix; idep++) + if (strcmp(modify->fix[idep]->style,"deposit") == 0) break; + if (idep == modify->nfix) idep = -1; // set maxrad_dynamic and maxrad_frozen for each type - // include future Fix pour particles as dynamic + // include future FixPour and FixDeposit particles as dynamic - for (i = 1; i <= atom->ntypes; i++) + int itype; + for (i = 1; i <= atom->ntypes; i++) { onerad_dynamic[i] = onerad_frozen[i] = 0.0; - if (pour_type) onerad_dynamic[pour_type] = pour_maxrad; + if (ipour >= 0) { + itype = i; + onerad_dynamic[i] = + *((double *) modify->fix[ipour]->extract("radius",itype)); + } + if (idep >= 0) { + itype = i; + onerad_dynamic[i] = + *((double *) modify->fix[idep]->extract("radius",itype)); + } + } double *radius = atom->radius; int *mask = atom->mask; int *type = atom->type; int nlocal = atom->nlocal; - for (i = 0; i < nlocal; i++){ + for (i = 0; i < nlocal; i++) if (mask[i] & freeze_group_bit) onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]); else onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]); - } MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes, MPI_DOUBLE,MPI_MAX,world); MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes, MPI_DOUBLE,MPI_MAX,world); - - MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); ) } void PairGranHookeCuda::init_list(int id, NeighList *ptr) diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp index f5808a3457..9b0771ceef 100644 --- a/src/USER-OMP/fix_omp.cpp +++ b/src/USER-OMP/fix_omp.cpp @@ -134,6 +134,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) // allocate list for per thread accumulator manager class instances // and then have each thread create an instance of this class to // encourage the OS to use storage that is "close" to each thread's CPU. + thr = new ThrData *[nthreads]; _nthr = nthreads; #if defined(_OPENMP) @@ -207,6 +208,7 @@ void FixOMP::init() // kspace_split == 0 : regular processing // kspace_split < 0 : master partition, does not do kspace // kspace_split > 0 : slave partition, only does kspace + if (strstr(update->integrate_style,"verlet/split") != NULL) { if (universe->iworld == 0) kspace_split = -1; else kspace_split = 1; diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index 2be56389a3..93a81c3ad2 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -35,6 +35,7 @@ #include "compute.h" #include "output.h" #include "dump.h" +#include "accelerator_kokkos.h" #include "math_extra.h" #include "error.h" #include "memory.h" @@ -76,7 +77,9 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp) nthreads = 1; #ifdef _OPENMP - if (getenv("OMP_NUM_THREADS") == NULL) { + if (lmp->kokkos) { + nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa; + } else if (getenv("OMP_NUM_THREADS") == NULL) { nthreads = 1; if (me == 0) error->warning(FLERR,"OMP_NUM_THREADS environment is not set."); @@ -87,7 +90,7 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp) // enforce consistent number of threads across all MPI tasks MPI_Bcast(&nthreads,1,MPI_INT,0,world); - omp_set_num_threads(nthreads); + if (!lmp->kokkos) omp_set_num_threads(nthreads); if (me == 0) { if (screen)