git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12127 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-06-13 22:03:10 +00:00
parent a4b4422712
commit 3f98c4d076
6 changed files with 49 additions and 34 deletions

View File

@ -35,8 +35,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
// process any command-line args that invoke Kokkos settings
int device = 0;
int num_threads = 1;
int numa = 1;
num_threads = 1;
numa = 1;
int iarg = 0;
while (iarg < narg) {

View File

@ -27,6 +27,8 @@ class KokkosLMP : protected Pointers {
int forward_comm_classic;
int exchange_comm_on_host;
int forward_comm_on_host;
int num_threads;
int numa;
KokkosLMP(class LAMMPS *, int, char **);
~KokkosLMP();

View File

@ -75,7 +75,7 @@ CommCuda::CommCuda(LAMMPS *lmp) : CommBrick(lmp)
buf_send = NULL;
buf_recv = NULL;
Comm::free_swap();
CommBrick::free_swap();
allocate_swap(maxswap);
}
@ -132,7 +132,7 @@ void CommCuda::init()
cuda->shared_data.comm.slablo.dev_data=cu_slablo->dev_data();
cuda->shared_data.comm.slabhi.dev_data=cu_slabhi->dev_data();
Comm::init();
CommBrick::init();
}
/* ----------------------------------------------------------------------
@ -145,7 +145,7 @@ void CommCuda::init()
void CommCuda::setup()
{
if(cuda->shared_data.pair.neighall) cutghostuser = MAX(2.0*neighbor->cutneighmax,cutghostuser);
Comm::setup();
CommBrick::setup();
//upload changed geometry to device
if(style == SINGLE)
@ -197,7 +197,7 @@ void CommCuda::forward_comm_cuda()
if(not comm_x_only && not avec->cudable)
{
cuda->downloadAll();
Comm::forward_comm();
CommBrick::forward_comm();
cuda->uploadAll();
return;
}
@ -630,7 +630,7 @@ void CommCuda::forward_comm_pair(Pair *pair)
{
if(not cuda->shared_data.pair.cudable_force)
{
return Comm::forward_comm_pair(pair);
return CommBrick::forward_comm_pair(pair);
}
int iswap,n;
@ -753,7 +753,7 @@ void CommCuda::exchange()
if(not cuda->oncpu) cuda->downloadAll();
Comm::exchange();
CommBrick::exchange();
}
@ -887,7 +887,7 @@ void CommCuda::borders()
return;
}
Comm::borders();
CommBrick::borders();
cuda->setSystemParams();
if(cuda->finished_setup) {cuda->checkResize(); cuda->uploadAll();}
@ -1313,7 +1313,7 @@ void CommCuda::grow_list(int iswap, int n)
void CommCuda::grow_swap(int n)
{
int oldmaxswap=maxswap;
Comm::grow_swap(n);
CommBrick::grow_swap(n);
if(n>cu_sendlist->get_dim()[0])
{
MYDBG(printf(" # CUDA CommCuda::grow_swap\n");)
@ -1357,7 +1357,7 @@ void CommCuda::grow_swap(int n)
void CommCuda::allocate_swap(int n)
{
Comm::allocate_swap(n);
CommBrick::allocate_swap(n);
delete cu_pbc;
delete cu_slablo;
@ -1392,7 +1392,7 @@ void CommCuda::allocate_swap(int n)
void CommCuda::allocate_multi(int n)
{
Comm::allocate_multi(n);
CommBrick::allocate_multi(n);
delete cu_multilo;
delete cu_multihi;
@ -1410,7 +1410,7 @@ void CommCuda::allocate_multi(int n)
void CommCuda::free_swap()
{
Comm::free_swap();
CommBrick::free_swap();
delete cuda->shared_data.comm.nsend_swap; cuda->shared_data.comm.nsend_swap=NULL;
delete cu_pbc; cu_pbc = NULL;
@ -1431,7 +1431,7 @@ void CommCuda::free_swap()
void CommCuda::free_multi()
{
Comm::free_multi();
CommBrick::free_multi();
delete cu_multilo; cu_multilo = NULL;
delete cu_multihi; cu_multihi = NULL;
}

View File

@ -168,8 +168,6 @@ void PairGranHookeCuda::init_style()
dt = update->dt;
// check for Fix freeze and set freeze_group_bit
for (i = 0; i < modify->nfix; i++)
@ -178,42 +176,52 @@ void PairGranHookeCuda::init_style()
else freeze_group_bit = 0;
cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
// check for Fix pour and set pour_type and pour_maxdiam
int pour_type = 0;
double pour_maxrad = 0.0;
for (i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"pour") == 0) break;
if (i < modify->nfix) {
pour_type = ((FixPour *) modify->fix[i])->ntype;
pour_maxrad = ((FixPour *) modify->fix[i])->radius_max;
}
// check for FixPour and FixDeposit so can extract particle radii
int ipour;
for (ipour = 0; ipour < modify->nfix; ipour++)
if (strcmp(modify->fix[ipour]->style,"pour") == 0) break;
if (ipour == modify->nfix) ipour = -1;
int idep;
for (idep = 0; idep < modify->nfix; idep++)
if (strcmp(modify->fix[idep]->style,"deposit") == 0) break;
if (idep == modify->nfix) idep = -1;
// set maxrad_dynamic and maxrad_frozen for each type
// include future Fix pour particles as dynamic
// include future FixPour and FixDeposit particles as dynamic
for (i = 1; i <= atom->ntypes; i++)
int itype;
for (i = 1; i <= atom->ntypes; i++) {
onerad_dynamic[i] = onerad_frozen[i] = 0.0;
if (pour_type) onerad_dynamic[pour_type] = pour_maxrad;
if (ipour >= 0) {
itype = i;
onerad_dynamic[i] =
*((double *) modify->fix[ipour]->extract("radius",itype));
}
if (idep >= 0) {
itype = i;
onerad_dynamic[i] =
*((double *) modify->fix[idep]->extract("radius",itype));
}
}
double *radius = atom->radius;
int *mask = atom->mask;
int *type = atom->type;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++){
for (i = 0; i < nlocal; i++)
if (mask[i] & freeze_group_bit)
onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
else
onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
}
MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
MPI_DOUBLE,MPI_MAX,world);
MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
MPI_DOUBLE,MPI_MAX,world);
MYDBG(printf("# CUDA PairGranHookeCuda::init_style end\n"); )
}
void PairGranHookeCuda::init_list(int id, NeighList *ptr)

View File

@ -134,6 +134,7 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
// allocate list for per thread accumulator manager class instances
// and then have each thread create an instance of this class to
// encourage the OS to use storage that is "close" to each thread's CPU.
thr = new ThrData *[nthreads];
_nthr = nthreads;
#if defined(_OPENMP)
@ -207,6 +208,7 @@ void FixOMP::init()
// kspace_split == 0 : regular processing
// kspace_split < 0 : master partition, does not do kspace
// kspace_split > 0 : slave partition, only does kspace
if (strstr(update->integrate_style,"verlet/split") != NULL) {
if (universe->iworld == 0) kspace_split = -1;
else kspace_split = 1;

View File

@ -35,6 +35,7 @@
#include "compute.h"
#include "output.h"
#include "dump.h"
#include "accelerator_kokkos.h"
#include "math_extra.h"
#include "error.h"
#include "memory.h"
@ -76,7 +77,9 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
nthreads = 1;
#ifdef _OPENMP
if (getenv("OMP_NUM_THREADS") == NULL) {
if (lmp->kokkos) {
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
} else if (getenv("OMP_NUM_THREADS") == NULL) {
nthreads = 1;
if (me == 0)
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
@ -87,7 +90,7 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
// enforce consistent number of threads across all MPI tasks
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
omp_set_num_threads(nthreads);
if (!lmp->kokkos) omp_set_num_threads(nthreads);
if (me == 0) {
if (screen)