forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12127 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
a4b4422712
commit
3f98c4d076
|
@ -35,8 +35,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
// process any command-line args that invoke Kokkos settings
|
||||
|
||||
int device = 0;
|
||||
int num_threads = 1;
|
||||
int numa = 1;
|
||||
num_threads = 1;
|
||||
numa = 1;
|
||||
|
||||
int iarg = 0;
|
||||
while (iarg < narg) {
|
||||
|
|
|
@ -27,6 +27,8 @@ class KokkosLMP : protected Pointers {
|
|||
int forward_comm_classic;
|
||||
int exchange_comm_on_host;
|
||||
int forward_comm_on_host;
|
||||
int num_threads;
|
||||
int numa;
|
||||
|
||||
KokkosLMP(class LAMMPS *, int, char **);
|
||||
~KokkosLMP();
|
||||
|
|
|
@ -75,7 +75,7 @@ CommCuda::CommCuda(LAMMPS *lmp) : CommBrick(lmp)
|
|||
buf_send = NULL;
|
||||
buf_recv = NULL;
|
||||
|
||||
Comm::free_swap();
|
||||
CommBrick::free_swap();
|
||||
allocate_swap(maxswap);
|
||||
}
|
||||
|
||||
|
@ -132,7 +132,7 @@ void CommCuda::init()
|
|||
cuda->shared_data.comm.slablo.dev_data=cu_slablo->dev_data();
|
||||
cuda->shared_data.comm.slabhi.dev_data=cu_slabhi->dev_data();
|
||||
|
||||
Comm::init();
|
||||
CommBrick::init();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -145,7 +145,7 @@ void CommCuda::init()
|
|||
void CommCuda::setup()
|
||||
{
|
||||
if(cuda->shared_data.pair.neighall) cutghostuser = MAX(2.0*neighbor->cutneighmax,cutghostuser);
|
||||
Comm::setup();
|
||||
CommBrick::setup();
|
||||
|
||||
//upload changed geometry to device
|
||||
if(style == SINGLE)
|
||||
|
@ -197,7 +197,7 @@ void CommCuda::forward_comm_cuda()
|
|||
if(not comm_x_only && not avec->cudable)
|
||||
{
|
||||
cuda->downloadAll();
|
||||
Comm::forward_comm();
|
||||
CommBrick::forward_comm();
|
||||
cuda->uploadAll();
|
||||
return;
|
||||
}
|
||||
|
@ -630,7 +630,7 @@ void CommCuda::forward_comm_pair(Pair *pair)
|
|||
{
|
||||
if(not cuda->shared_data.pair.cudable_force)
|
||||
{
|
||||
return Comm::forward_comm_pair(pair);
|
||||
return CommBrick::forward_comm_pair(pair);
|
||||
}
|
||||
|
||||
int iswap,n;
|
||||
|
@ -753,7 +753,7 @@ void CommCuda::exchange()
|
|||
|
||||
if(not cuda->oncpu) cuda->downloadAll();
|
||||
|
||||
Comm::exchange();
|
||||
CommBrick::exchange();
|
||||
}
|
||||
|
||||
|
||||
|
@ -887,7 +887,7 @@ void CommCuda::borders()
|
|||
return;
|
||||
}
|
||||
|
||||
Comm::borders();
|
||||
CommBrick::borders();
|
||||
|
||||
cuda->setSystemParams();
|
||||
if(cuda->finished_setup) {cuda->checkResize(); cuda->uploadAll();}
|
||||
|
@ -1313,7 +1313,7 @@ void CommCuda::grow_list(int iswap, int n)
|
|||
void CommCuda::grow_swap(int n)
|
||||
{
|
||||
int oldmaxswap=maxswap;
|
||||
Comm::grow_swap(n);
|
||||
CommBrick::grow_swap(n);
|
||||
if(n>cu_sendlist->get_dim()[0])
|
||||
{
|
||||
MYDBG(printf(" # CUDA CommCuda::grow_swap\n");)
|
||||
|
@ -1357,7 +1357,7 @@ void CommCuda::grow_swap(int n)
|
|||
|
||||
void CommCuda::allocate_swap(int n)
|
||||
{
|
||||
Comm::allocate_swap(n);
|
||||
CommBrick::allocate_swap(n);
|
||||
|
||||
delete cu_pbc;
|
||||
delete cu_slablo;
|
||||
|
@ -1392,7 +1392,7 @@ void CommCuda::allocate_swap(int n)
|
|||
|
||||
void CommCuda::allocate_multi(int n)
|
||||
{
|
||||
Comm::allocate_multi(n);
|
||||
CommBrick::allocate_multi(n);
|
||||
|
||||
delete cu_multilo;
|
||||
delete cu_multihi;
|
||||
|
@ -1410,7 +1410,7 @@ void CommCuda::allocate_multi(int n)
|
|||
void CommCuda::free_swap()
|
||||
{
|
||||
|
||||
Comm::free_swap();
|
||||
CommBrick::free_swap();
|
||||
|
||||
delete cuda->shared_data.comm.nsend_swap; cuda->shared_data.comm.nsend_swap=NULL;
|
||||
delete cu_pbc; cu_pbc = NULL;
|
||||
|
@ -1431,7 +1431,7 @@ void CommCuda::free_swap()
|
|||
|
||||
void CommCuda::free_multi()
|
||||
{
|
||||
Comm::free_multi();
|
||||
CommBrick::free_multi();
|
||||
delete cu_multilo; cu_multilo = NULL;
|
||||
delete cu_multihi; cu_multihi = NULL;
|
||||
}
|
||||
|
|
|
@ -168,8 +168,6 @@ void PairGranHookeCuda::init_style()
|
|||
|
||||
dt = update->dt;
|
||||
|
||||
|
||||
|
||||
// check for Fix freeze and set freeze_group_bit
|
||||
|
||||
for (i = 0; i < modify->nfix; i++)
|
||||
|
@ -178,42 +176,52 @@ void PairGranHookeCuda::init_style()
|
|||
else freeze_group_bit = 0;
|
||||
|
||||
cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
|
||||
// check for Fix pour and set pour_type and pour_maxdiam
|
||||
|
||||
int pour_type = 0;
|
||||
double pour_maxrad = 0.0;
|
||||
for (i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"pour") == 0) break;
|
||||
if (i < modify->nfix) {
|
||||
pour_type = ((FixPour *) modify->fix[i])->ntype;
|
||||
pour_maxrad = ((FixPour *) modify->fix[i])->radius_max;
|
||||
}
|
||||
// check for FixPour and FixDeposit so can extract particle radii
|
||||
|
||||
int ipour;
|
||||
for (ipour = 0; ipour < modify->nfix; ipour++)
|
||||
if (strcmp(modify->fix[ipour]->style,"pour") == 0) break;
|
||||
if (ipour == modify->nfix) ipour = -1;
|
||||
|
||||
int idep;
|
||||
for (idep = 0; idep < modify->nfix; idep++)
|
||||
if (strcmp(modify->fix[idep]->style,"deposit") == 0) break;
|
||||
if (idep == modify->nfix) idep = -1;
|
||||
|
||||
// set maxrad_dynamic and maxrad_frozen for each type
|
||||
// include future Fix pour particles as dynamic
|
||||
// include future FixPour and FixDeposit particles as dynamic
|
||||
|
||||
for (i = 1; i <= atom->ntypes; i++)
|
||||
int itype;
|
||||
for (i = 1; i <= atom->ntypes; i++) {
|
||||
onerad_dynamic[i] = onerad_frozen[i] = 0.0;
|
||||
if (pour_type) onerad_dynamic[pour_type] = pour_maxrad;
|
||||
if (ipour >= 0) {
|
||||
itype = i;
|
||||
onerad_dynamic[i] =
|
||||
*((double *) modify->fix[ipour]->extract("radius",itype));
|
||||
}
|
||||
if (idep >= 0) {
|
||||
itype = i;
|
||||
onerad_dynamic[i] =
|
||||
*((double *) modify->fix[idep]->extract("radius",itype));
|
||||
}
|
||||
}
|
||||
|
||||
double *radius = atom->radius;
|
||||
int *mask = atom->mask;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
for (i = 0; i < nlocal; i++){
|
||||
for (i = 0; i < nlocal; i++)
|
||||
if (mask[i] & freeze_group_bit)
|
||||
onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
|
||||
else
|
||||
onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
|
||||
}
|
||||
|
||||
MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
|
||||
MPI_DOUBLE,MPI_MAX,world);
|
||||
MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
|
||||
MPI_DOUBLE,MPI_MAX,world);
|
||||
|
||||
MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); )
|
||||
}
|
||||
|
||||
void PairGranHookeCuda::init_list(int id, NeighList *ptr)
|
||||
|
|
|
@ -134,6 +134,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
|
|||
// allocate list for per thread accumulator manager class instances
|
||||
// and then have each thread create an instance of this class to
|
||||
// encourage the OS to use storage that is "close" to each thread's CPU.
|
||||
|
||||
thr = new ThrData *[nthreads];
|
||||
_nthr = nthreads;
|
||||
#if defined(_OPENMP)
|
||||
|
@ -207,6 +208,7 @@ void FixOMP::init()
|
|||
// kspace_split == 0 : regular processing
|
||||
// kspace_split < 0 : master partition, does not do kspace
|
||||
// kspace_split > 0 : slave partition, only does kspace
|
||||
|
||||
if (strstr(update->integrate_style,"verlet/split") != NULL) {
|
||||
if (universe->iworld == 0) kspace_split = -1;
|
||||
else kspace_split = 1;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "compute.h"
|
||||
#include "output.h"
|
||||
#include "dump.h"
|
||||
#include "accelerator_kokkos.h"
|
||||
#include "math_extra.h"
|
||||
#include "error.h"
|
||||
#include "memory.h"
|
||||
|
@ -76,7 +77,9 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
|
|||
|
||||
nthreads = 1;
|
||||
#ifdef _OPENMP
|
||||
if (getenv("OMP_NUM_THREADS") == NULL) {
|
||||
if (lmp->kokkos) {
|
||||
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
|
||||
} else if (getenv("OMP_NUM_THREADS") == NULL) {
|
||||
nthreads = 1;
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
|
||||
|
@ -87,7 +90,7 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
|
|||
// enforce consistent number of threads across all MPI tasks
|
||||
|
||||
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
|
||||
omp_set_num_threads(nthreads);
|
||||
if (!lmp->kokkos) omp_set_num_threads(nthreads);
|
||||
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
|
|
Loading…
Reference in New Issue