git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11633 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-03-14 22:28:09 +00:00
parent 6f3d33368c
commit 7b291636cf
14 changed files with 384 additions and 143 deletions

View File

@ -16,6 +16,7 @@
Axel Kohlmeyer (Temple U) added support for
FFTW3, KISSFFT, Dfti/MKL, and ACML.
Phil Blood (PSC) added single precision FFT.
Paul Coffman (IBM) added MPI collectives remap
------------------------------------------------------------------------- */
#include "mpi.h"
@ -347,6 +348,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
1 = permute once = mid->fast, slow->mid, fast->slow
2 = permute twice = slow->fast, fast->mid, mid->slow
nbuf returns size of internal storage buffers used by FFT
usecollective use collective MPI operations for remapping data
------------------------------------------------------------------------- */
struct fft_plan_3d *fft_3d_create_plan(
@ -355,7 +357,7 @@ struct fft_plan_3d *fft_3d_create_plan(
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf)
int scaled, int permute, int *nbuf, int usecollective)
{
struct fft_plan_3d *plan;
int me,nprocs;
@ -430,7 +432,7 @@ struct fft_plan_3d *fft_3d_create_plan(
plan->pre_plan =
remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,2,0,0,FFT_PRECISION);
first_klo,first_khi,2,0,0,FFT_PRECISION,0);
if (plan->pre_plan == NULL) return NULL;
}
@ -454,7 +456,7 @@ struct fft_plan_3d *fft_3d_create_plan(
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,
second_ilo,second_ihi,second_jlo,second_jhi,
second_klo,second_khi,2,1,0,FFT_PRECISION);
second_klo,second_khi,2,1,0,FFT_PRECISION,usecollective);
if (plan->mid1_plan == NULL) return NULL;
// 1d FFTs along mid axis
@ -496,7 +498,7 @@ struct fft_plan_3d *fft_3d_create_plan(
second_jlo,second_jhi,second_klo,second_khi,
second_ilo,second_ihi,
third_jlo,third_jhi,third_klo,third_khi,
third_ilo,third_ihi,2,1,0,FFT_PRECISION);
third_ilo,third_ihi,2,1,0,FFT_PRECISION,usecollective);
if (plan->mid2_plan == NULL) return NULL;
// 1d FFTs along slow axis
@ -525,7 +527,7 @@ struct fft_plan_3d *fft_3d_create_plan(
third_klo,third_khi,third_ilo,third_ihi,
third_jlo,third_jhi,
out_klo,out_khi,out_ilo,out_ihi,
out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION);
out_jlo,out_jhi,2,(permute+1)%3,0,FFT_PRECISION,0);
if (plan->post_plan == NULL) return NULL;
}

View File

@ -319,7 +319,7 @@ extern "C" {
struct fft_plan_3d *fft_3d_create_plan(MPI_Comm, int, int, int,
int, int, int, int, int,
int, int, int, int, int, int, int,
int, int, int *);
int, int, int *, int);
void fft_3d_destroy_plan(struct fft_plan_3d *);
void factor(int, int *, int *);
void bifactor(int, int *, int *);

View File

@ -24,12 +24,12 @@ FFT3d::FFT3d(LAMMPS *lmp, MPI_Comm comm, int nfast, int nmid, int nslow,
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf) : Pointers(lmp)
int scaled, int permute, int *nbuf, int usecollective) : Pointers(lmp)
{
plan = fft_3d_create_plan(comm,nfast,nmid,nslow,
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
scaled,permute,nbuf);
scaled,permute,nbuf,usecollective);
if (plan == NULL) error->one(FLERR,"Could not create 3d FFT plan");
}

View File

@ -22,7 +22,7 @@ namespace LAMMPS_NS {
class FFT3d : protected Pointers {
public:
FFT3d(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,int,int,int,
int,int,int,int,int,int,int,int,int *);
int,int,int,int,int,int,int,int,int *,int);
~FFT3d();
void compute(FFT_SCALAR *, FFT_SCALAR *, int);
void timing1d(FFT_SCALAR *, int, int);

View File

@ -800,17 +800,17 @@ void PPPM::allocate()
fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp);
0,0,&tmp,collective_flag);
fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp);
0,0,&tmp,collective_flag);
remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION);
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
@ -1268,8 +1268,8 @@ void PPPM::adjust_gewald()
}
/* ----------------------------------------------------------------------
Calculate f(x) using Newton-Raphson solver
------------------------------------------------------------------------- */
calculate f(x) using Newton-Raphson solver
------------------------------------------------------------------------- */
double PPPM::newton_raphson_f()
{
@ -1287,9 +1287,9 @@ double PPPM::newton_raphson_f()
}
/* ----------------------------------------------------------------------
Calculate numerical derivative f'(x) using forward difference
calculate numerical derivative f'(x) using forward difference
[f(x + h) - f(x)] / h
------------------------------------------------------------------------- */
------------------------------------------------------------------------- */
double PPPM::derivf()
{
@ -1307,7 +1307,7 @@ double PPPM::derivf()
}
/* ----------------------------------------------------------------------
Calculate the final estimate of the accuracy
calculate the final estimate of the accuracy
------------------------------------------------------------------------- */
double PPPM::final_accuracy()
@ -1315,7 +1315,6 @@ double PPPM::final_accuracy()
double xprd = domain->xprd;
double yprd = domain->yprd;
double zprd = domain->zprd;
double zprd_slab = zprd*slab_volfactor;
bigint natoms = atom->natoms;
double df_kspace = compute_df_kspace();
@ -3385,7 +3384,7 @@ void PPPM::poisson_groups(int AA_flag)
void PPPM::poisson_groups_triclinic()
{
int i,j,k,n;
int i,n;
// reuse memory (already declared)

View File

@ -1652,17 +1652,17 @@ void PPPMDisp::allocate()
fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp);
0,0,&tmp,collective_flag);
fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp);
0,0,&tmp,collective_flag);
remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION);
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
@ -1731,17 +1731,17 @@ void PPPMDisp::allocate()
fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
0,0,&tmp);
0,0,&tmp,collective_flag);
fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
0,0,&tmp);
0,0,&tmp,collective_flag);
remap_6 = new Remap(lmp,world,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
1,0,0,FFT_PRECISION);
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
@ -1887,17 +1887,17 @@ void PPPMDisp::allocate()
fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
0,0,&tmp);
0,0,&tmp,collective_flag);
fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
0,0,&tmp);
0,0,&tmp,collective_flag);
remap_6 = new Remap(lmp,world,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
1,0,0,FFT_PRECISION);
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
@ -1967,17 +1967,17 @@ void PPPMDisp::allocate()
fft1_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
0,0,&tmp);
0,0,&tmp,collective_flag);
fft2_6 = new FFT3d(lmp,world,nx_pppm_6,ny_pppm_6,nz_pppm_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
0,0,&tmp);
0,0,&tmp,collective_flag);
remap_6 = new Remap(lmp,world,
nxlo_in_6,nxhi_in_6,nylo_in_6,nyhi_in_6,nzlo_in_6,nzhi_in_6,
nxlo_fft_6,nxhi_fft_6,nylo_fft_6,nyhi_fft_6,nzlo_fft_6,nzhi_fft_6,
1,0,0,FFT_PRECISION);
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication

View File

@ -839,17 +839,17 @@ void PPPMOld::allocate()
fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp);
0,0,&tmp,collective_flag);
fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp);
0,0,&tmp,collective_flag);
remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION);
1,0,0,FFT_PRECISION,collective_flag);
}
/* ----------------------------------------------------------------------

View File

@ -61,8 +61,10 @@
void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
struct remap_plan_3d *plan)
{
// use point-to-point communication
if (!plan->usecollective) {
MPI_Status status;
int i,isend,irecv;
FFT_SCALAR *scratch;
@ -107,6 +109,104 @@ void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
plan->unpack(&scratch[plan->recv_bufloc[irecv]],
&out[plan->recv_offset[irecv]],&plan->unpackplan[irecv]);
}
// use All2Allv collective for remap communication
} else {
if (plan->commringlen > 0) {
MPI_Status status;
int i,isend,irecv;
FFT_SCALAR *scratch;
if (plan->memory == 0) scratch = buf;
else scratch = plan->scratch;
// create send and recv buffers for alltoallv collective
int sendBufferSize = 0;
int recvBufferSize = 0;
for (int i=0;i<plan->nsend;i++)
sendBufferSize += plan->send_size[i];
for (int i=0;i<plan->nrecv;i++)
recvBufferSize += plan->recv_size[i];
FFT_SCALAR *packedSendBuffer
= (FFT_SCALAR *) malloc(sizeof(FFT_SCALAR) * sendBufferSize);
FFT_SCALAR *packedRecvBuffer
= (FFT_SCALAR *) malloc(sizeof(FFT_SCALAR) * recvBufferSize);
int *sendcnts = (int *) malloc(sizeof(int) * plan->commringlen);
int *rcvcnts = (int *) malloc(sizeof(int) * plan->commringlen);
int *sdispls = (int *) malloc(sizeof(int) * plan->commringlen);
int *rdispls = (int *) malloc(sizeof(int) * plan->commringlen);
int *nrecvmap = (int *) malloc(sizeof(int) * plan->commringlen);
// create and populate send data, count and displacement buffers
int currentSendBufferOffset = 0;
for (isend = 0; isend < plan->commringlen; isend++) {
sendcnts[isend] = 0;
sdispls[isend] = 0;
int foundentry = 0;
for (int i=0;(i<plan->nsend && !foundentry); i++) {
if (plan->send_proc[i] == plan->commringlist[isend]) {
foundentry = 1;
sendcnts[isend] = plan->send_size[i];
sdispls[isend] = currentSendBufferOffset;
plan->pack(&in[plan->send_offset[i]],
&packedSendBuffer[currentSendBufferOffset],
&plan->packplan[i]);
currentSendBufferOffset += plan->send_size[i];
}
}
}
// create and populate recv count and displacement buffers
int currentRecvBufferOffset = 0;
for (irecv = 0; irecv < plan->commringlen; irecv++) {
rcvcnts[irecv] = 0;
rdispls[irecv] = 0;
nrecvmap[irecv] = -1;
int foundentry = 0;
for (int i=0;(i<plan->nrecv && !foundentry); i++) {
if (plan->recv_proc[i] == plan->commringlist[irecv]) {
foundentry = 1;
rcvcnts[irecv] = plan->recv_size[i];
rdispls[irecv] = currentRecvBufferOffset;
currentRecvBufferOffset += plan->recv_size[i];
nrecvmap[irecv] = i;
}
}
}
int mpirc = MPI_Alltoallv(packedSendBuffer, sendcnts, sdispls,
MPI_FFT_SCALAR, packedRecvBuffer, rcvcnts,
rdispls, MPI_FFT_SCALAR, plan->comm);
// unpack the data from the recv buffer into out
currentRecvBufferOffset = 0;
for (irecv = 0; irecv < plan->commringlen; irecv++) {
if (nrecvmap[irecv] > -1) {
plan->unpack(&packedRecvBuffer[currentRecvBufferOffset],
&out[plan->recv_offset[nrecvmap[irecv]]],
&plan->unpackplan[nrecvmap[irecv]]);
currentRecvBufferOffset += plan->recv_size[nrecvmap[irecv]];
}
}
// free temporary data structures
free(sendcnts);
free(rcvcnts);
free(sdispls);
free(rdispls);
free(nrecvmap);
free(packedSendBuffer);
free(packedRecvBuffer);
}
}
}
/* ----------------------------------------------------------------------
@ -131,6 +231,7 @@ void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
precision precision of data
1 = single precision (4 bytes per datum)
2 = double precision (8 bytes per datum)
usecollective whether to use collective MPI or point-to-point
------------------------------------------------------------------------- */
struct remap_plan_3d *remap_3d_create_plan(
@ -139,11 +240,12 @@ struct remap_plan_3d *remap_3d_create_plan(
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int nqty, int permute, int memory, int precision)
int nqty, int permute, int memory, int precision, int usecollective)
{
struct remap_plan_3d *plan;
struct extent_3d *array;
struct extent_3d *inarray, *outarray;
struct extent_3d in,out,overlap;
int i,iproc,nsend,nrecv,ibuf,size,me,nprocs;
@ -156,6 +258,7 @@ struct remap_plan_3d *remap_3d_create_plan(
plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
if (plan == NULL) return NULL;
plan->usecollective = usecollective;
// store parameters in local data structs
@ -185,11 +288,14 @@ struct remap_plan_3d *remap_3d_create_plan(
// combine output extents across all procs
array = (struct extent_3d *) malloc(nprocs*sizeof(struct extent_3d));
if (array == NULL) return NULL;
inarray = (struct extent_3d *) malloc(nprocs*sizeof(struct extent_3d));
if (inarray == NULL) return NULL;
outarray = (struct extent_3d *) malloc(nprocs*sizeof(struct extent_3d));
if (outarray == NULL) return NULL;
MPI_Allgather(&out,sizeof(struct extent_3d),MPI_BYTE,
array,sizeof(struct extent_3d),MPI_BYTE,comm);
outarray,sizeof(struct extent_3d),MPI_BYTE,comm);
// count send collides, including self
@ -198,7 +304,7 @@ struct remap_plan_3d *remap_3d_create_plan(
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
nsend += remap_3d_collide(&in,&array[iproc],&overlap);
nsend += remap_3d_collide(&in,&outarray[iproc],&overlap);
}
// malloc space for send info
@ -223,7 +329,7 @@ struct remap_plan_3d *remap_3d_create_plan(
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
if (remap_3d_collide(&in,&array[iproc],&overlap)) {
if (remap_3d_collide(&in,&outarray[iproc],&overlap)) {
plan->send_proc[nsend] = iproc;
plan->send_offset[nsend] = nqty *
((overlap.klo-in.klo)*in.jsize*in.isize +
@ -241,15 +347,18 @@ struct remap_plan_3d *remap_3d_create_plan(
// plan->nsend = # of sends not including self
if (nsend && plan->send_proc[nsend-1] == me)
plan->nsend = nsend - 1;
if (nsend && plan->send_proc[nsend-1] == me) {
if (plan->usecollective) // for collectives include self in nsend list
plan->nsend = nsend;
else
plan->nsend = nsend - 1;
} else
plan->nsend = nsend;
// combine input extents across all procs
MPI_Allgather(&in,sizeof(struct extent_3d),MPI_BYTE,
array,sizeof(struct extent_3d),MPI_BYTE,comm);
inarray,sizeof(struct extent_3d),MPI_BYTE,comm);
// count recv collides, including self
@ -258,7 +367,7 @@ struct remap_plan_3d *remap_3d_create_plan(
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
nrecv += remap_3d_collide(&out,&array[iproc],&overlap);
nrecv += remap_3d_collide(&out,&inarray[iproc],&overlap);
}
// malloc space for recv info
@ -305,7 +414,7 @@ struct remap_plan_3d *remap_3d_create_plan(
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
if (remap_3d_collide(&out,&array[iproc],&overlap)) {
if (remap_3d_collide(&out,&inarray[iproc],&overlap)) {
plan->recv_proc[nrecv] = iproc;
plan->recv_bufloc[nrecv] = ibuf;
@ -349,25 +458,128 @@ struct remap_plan_3d *remap_3d_create_plan(
}
}
// plan->nrecv = # of recvs not including self
// create sub-comm rank list
if (nrecv && plan->recv_proc[nrecv-1] == me)
plan->nrecv = nrecv - 1;
else
plan->nrecv = nrecv;
if (plan->usecollective) {
plan->commringlist = NULL;
// merge recv and send rank lists
// ask Steve Plimpton about method to more accurately determine
// maximum number of procs contributing to pencil
int maxcommsize = nprocs;
int *commringlist = (int *) malloc(maxcommsize*sizeof(int));
int commringlen = 0;
for (int i = 0; i < nrecv; i++) {
commringlist[i] = plan->recv_proc[i];
commringlen++;
}
for (int i = 0; i < nsend; i++) {
int foundentry = 0;
for (int j=0;j<commringlen;j++)
if (commringlist[j] == plan->send_proc[i]) foundentry = 1;
if (!foundentry) {
commringlist[commringlen] = plan->send_proc[i];
commringlen++;
}
}
// sort initial commringlist
int swap = 0;
for (int c = 0 ; c < (commringlen - 1); c++) {
for (int d = 0 ; d < commringlen - c - 1; d++) {
if (commringlist[d] > commringlist[d+1]) {
swap = commringlist[d];
commringlist[d] = commringlist[d+1];
commringlist[d+1] = swap;
}
}
}
// collide all inarray extents for the comm ring with all output
// extents and all outarray extents for the comm ring with all input
// extents - if there is a collison add the rank to the comm ring,
// keep iterating until nothing is added to commring
int commringappend = 1;
while (commringappend) {
int newcommringlen = commringlen;
commringappend = 0;
for (int i=0;i<commringlen;i++) {
for (int j=0;j<nprocs;j++) {
if (remap_3d_collide(&inarray[commringlist[i]],
&outarray[j],&overlap)) {
int alreadyinlist = 0;
for (int k=0;k<newcommringlen;k++) {
if (commringlist[k] == j) {
alreadyinlist = 1;
}
}
if (!alreadyinlist) {
commringlist[newcommringlen++] = j;
commringappend = 1;
}
}
if (remap_3d_collide(&outarray[commringlist[i]],
&inarray[j],&overlap)) {
int alreadyinlist = 0;
for (int k=0;k<newcommringlen;k++) {
if (commringlist[k] == j) alreadyinlist = 1;
}
if (!alreadyinlist) {
commringlist[newcommringlen++] = j;
commringappend = 1;
}
}
}
}
commringlen = newcommringlen;
}
// sort the final commringlist
for (int c = 0 ; c < ( commringlen - 1 ); c++) {
for (int d = 0 ; d < commringlen - c - 1; d++) {
if (commringlist[d] > commringlist[d+1]) {
swap = commringlist[d];
commringlist[d] = commringlist[d+1];
commringlist[d+1] = swap;
}
}
}
// resize commringlist to final size
commringlist = (int *) realloc(commringlist, commringlen*sizeof(int));
// set the plan->commringlist
plan->commringlen = commringlen;
plan->commringlist = commringlist;
}
// plan->nrecv = # of recvs not including self
// for collectives include self in the nsend list
if (nrecv && plan->recv_proc[nrecv-1] == me) {
if (plan->usecollective) plan->nrecv = nrecv;
else plan->nrecv = nrecv - 1;
} else plan->nrecv = nrecv;
// init remaining fields in remap plan
plan->memory = memory;
if (nrecv == plan->nrecv)
plan->self = 0;
else
plan->self = 1;
if (nrecv == plan->nrecv) plan->self = 0;
else plan->self = 1;
// free locally malloced space
free(array);
free(inarray);
free(outarray);
// find biggest send message (not including self) and malloc space for it
@ -390,14 +602,34 @@ struct remap_plan_3d *remap_3d_create_plan(
if (memory == 1) {
if (nrecv > 0) {
plan->scratch =
(FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(FFT_SCALAR));
(FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize *
sizeof(FFT_SCALAR));
if (plan->scratch == NULL) return NULL;
}
}
// create new MPI communicator for remap
// if using collective and the commringlist is NOT empty create a
// communicator for the plan based off an MPI_Group created with
// ranks from the commringlist
MPI_Comm_dup(comm,&plan->comm);
if ((plan->usecollective && (plan->commringlen > 0))) {
MPI_Group orig_group, new_group;
MPI_Comm_group(comm, &orig_group);
MPI_Group_incl(orig_group, plan->commringlen,
plan->commringlist, &new_group);
MPI_Comm_create(comm, new_group, &plan->comm);
}
// if using collective and the comm ring list is empty create
// a communicator for the plan with an empty group
else if ((plan->usecollective) && (plan->commringlen == 0)) {
MPI_Comm_create(comm, MPI_GROUP_EMPTY, &plan->comm);
}
// not using collective - dup comm
else MPI_Comm_dup(comm,&plan->comm);
// return pointer to plan
@ -409,12 +641,17 @@ struct remap_plan_3d *remap_3d_create_plan(
------------------------------------------------------------------------- */
void remap_3d_destroy_plan(struct remap_plan_3d *plan)
{
// free MPI communicator
if (!((plan->usecollective) && (plan->commringlen == 0)))
MPI_Comm_free(&plan->comm);
if (plan->usecollective) {
if (plan->commringlist != NULL)
free(plan->commringlist);
}
// free internal arrays
if (plan->nsend || plan->self) {

View File

@ -45,6 +45,9 @@ struct remap_plan_3d {
int self; // whether I send/recv with myself
int memory; // user provides scratch space or not
MPI_Comm comm; // group of procs performing remap
int usecollective; // use collective or point-to-point MPI
int commringlen; // length of commringlist
int *commringlist; // ranks on communication ring of this plan
};
// collision between 2 regions
@ -59,8 +62,9 @@ struct extent_3d {
void remap_3d(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *, struct remap_plan_3d *);
struct remap_plan_3d *remap_3d_create_plan(MPI_Comm,
int, int, int, int, int, int, int, int, int, int, int, int,
int, int, int, int);
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int);
void remap_3d_destroy_plan(struct remap_plan_3d *);
int remap_3d_collide(struct extent_3d *,
struct extent_3d *, struct extent_3d *);

View File

@ -24,12 +24,13 @@ Remap::Remap(LAMMPS *lmp, MPI_Comm comm,
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int nqty, int permute, int memory, int precision) : Pointers(lmp)
int nqty, int permute, int memory,
int precision, int usecollective) : Pointers(lmp)
{
plan = remap_3d_create_plan(comm,
in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
nqty,permute,memory,precision);
nqty,permute,memory,precision,usecollective);
if (plan == NULL) error->one(FLERR,"Could not create 3d remap plan");
}

View File

@ -22,7 +22,7 @@ namespace LAMMPS_NS {
class Remap : protected Pointers {
public:
Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
int,int,int,int,int,int,int,int,int,int);
int,int,int,int,int,int,int,int,int,int,int);
~Remap();
void perform(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *);

View File

@ -158,7 +158,7 @@ FixPhonon::FixPhonon(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
for (int i = 1; i < nprocs; ++i) fft_disp[i] = fft_disp[i-1]+fft_cnts[i-1];
delete []nx_loc;
fft = new FFT3d(lmp,world,nz,ny,nx,0,nz-1,0,ny-1,nxlo,nxhi,0,nz-1,0,ny-1,nxlo,nxhi,0,0,&mysize);
fft = new FFT3d(lmp,world,nz,ny,nx,0,nz-1,0,ny-1,nxlo,nxhi,0,nz-1,0,ny-1,nxlo,nxhi,0,0,&mysize,0);
memory->create(fft_data, MAX(1,mynq)*2, "fix_phonon:fft_data");
// allocate variables; MAX(1,... is used because NULL buffer will result in error for MPI
@ -225,7 +225,6 @@ FixPhonon::FixPhonon(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
temperature = modify->compute[icompute];
inv_nTemp = 1.0/group->count(temperature->igroup);
return;
} // end of constructor
/* ---------------------------------------------------------------------- */
@ -294,8 +293,6 @@ void FixPhonon::init()
int count = 0;
for (int i = 0; i < modify->nfix; ++i) if (strcmp(modify->fix[i]->style,"gfc") == 0) ++count;
if (count > 1 && me == 0) error->warning(FLERR,"More than one fix phonon defined"); // just warn, but allowed.
return;
}
/* ---------------------------------------------------------------------- */
@ -318,8 +315,6 @@ void FixPhonon::setup(int flag)
prev_nstep = update->ntimestep;
neval = ifreq = 0;
return;
}
/* ---------------------------------------------------------------------- */
@ -422,7 +417,6 @@ void FixPhonon::end_of_step()
// compute and output Phi_q after every nfreq evaluations
if (++ifreq == nfreq) postprocess();
return;
} // end of end_of_step()
/* ---------------------------------------------------------------------- */
@ -459,8 +453,7 @@ int FixPhonon::modify_param(int narg, char **arg)
return 2;
}
return 0;
return 0;
}
/* ----------------------------------------------------------------------
@ -522,8 +515,6 @@ void FixPhonon::getmass()
delete []mass_all;
delete []type_one;
delete []type_all;
return;
}
@ -629,8 +620,6 @@ void FixPhonon::readmap()
error->one(FLERR,"The mapping info read is incorrect!");
}
}
return;
}
/* ----------------------------------------------------------------------
@ -780,7 +769,6 @@ void FixPhonon::postprocess( )
fflush(flog);
}
return;
} // end of postprocess
/* ----------------------------------------------------------------------
@ -864,8 +852,6 @@ void FixPhonon::GaussJordan(int n, std::complex<double> *Mat)
delete []indxr;
delete []indxc;
delete []ipiv;
return;
}
/* ----------------------------------------------------------------------
@ -926,7 +912,5 @@ void FixPhonon::EnforceASR()
}
}
}
return;
}
/* --------------------------------------------------------------------*/

View File

@ -44,7 +44,15 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
gewaldflag = 0;
minorder = 2;
overlap_allowed = 1;
fftbench = 1;
fftbench = 0;
// default to using MPI collectives for FFT/remap only on IBM BlueGene
#ifdef __bg__
collective_flag = 1;
#else
collective_flag = 0;
#endif
kewaldflag = 0;
@ -356,31 +364,31 @@ void KSpace::modify_params(int narg, char **arg)
while (iarg < narg) {
if (strcmp(arg[iarg],"mesh") == 0) {
if (iarg+4 > narg) error->all(FLERR,"Illegal kspace_modify command");
nx_pppm = nx_msm_max = atoi(arg[iarg+1]);
ny_pppm = ny_msm_max = atoi(arg[iarg+2]);
nz_pppm = nz_msm_max = atoi(arg[iarg+3]);
nx_pppm = nx_msm_max = force->inumeric(FLERR,arg[iarg+1]);
ny_pppm = ny_msm_max = force->inumeric(FLERR,arg[iarg+2]);
nz_pppm = nz_msm_max = force->inumeric(FLERR,arg[iarg+3]);
if (nx_pppm == 0 && ny_pppm == 0 && nz_pppm == 0) gridflag = 0;
else gridflag = 1;
iarg += 4;
} else if (strcmp(arg[iarg],"mesh/disp") == 0) {
if (iarg+4 > narg) error->all(FLERR,"Illegal kspace_modify command");
nx_pppm_6 = atoi(arg[iarg+1]);
ny_pppm_6 = atoi(arg[iarg+2]);
nz_pppm_6 = atoi(arg[iarg+3]);
nx_pppm_6 = force->inumeric(FLERR,arg[iarg+1]);
ny_pppm_6 = force->inumeric(FLERR,arg[iarg+2]);
nz_pppm_6 = force->inumeric(FLERR,arg[iarg+3]);
if (nx_pppm_6 == 0 || ny_pppm_6 == 0 || nz_pppm_6 == 0) gridflag_6 = 0;
else gridflag_6 = 1;
iarg += 4;
} else if (strcmp(arg[iarg],"order") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
order = atoi(arg[iarg+1]);
order = force->inumeric(FLERR,arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"order/disp") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
order_6 = atoi(arg[iarg+1]);
order_6 = force->inumeric(FLERR,arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"minorder") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
minorder = atoi(arg[iarg+1]);
minorder = force->inumeric(FLERR,arg[iarg+1]);
if (minorder < 2) error->all(FLERR,"Illegal kspace_modify command");
iarg += 2;
} else if (strcmp(arg[iarg],"overlap") == 0) {
@ -391,17 +399,17 @@ void KSpace::modify_params(int narg, char **arg)
iarg += 2;
} else if (strcmp(arg[iarg],"force") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
accuracy_absolute = atof(arg[iarg+1]);
accuracy_absolute = force->numeric(FLERR,arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"gewald") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
g_ewald = atof(arg[iarg+1]);
g_ewald = force->numeric(FLERR,arg[iarg+1]);
if (g_ewald == 0.0) gewaldflag = 0;
else gewaldflag = 1;
iarg += 2;
} else if (strcmp(arg[iarg],"gewald/disp") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
g_ewald_6 = atof(arg[iarg+1]);
g_ewald_6 = force->numeric(FLERR,arg[iarg+1]);
if (g_ewald_6 == 0.0) gewaldflag_6 = 0;
else gewaldflag_6 = 1;
iarg += 2;
@ -411,7 +419,7 @@ void KSpace::modify_params(int narg, char **arg)
slabflag = 2;
} else {
slabflag = 1;
slab_volfactor = atof(arg[iarg+1]);
slab_volfactor = force->numeric(FLERR,arg[iarg+1]);
if (slab_volfactor <= 1.0)
error->all(FLERR,"Bad kspace_modify slab parameter");
if (slab_volfactor < 2.0 && comm->me == 0)
@ -431,6 +439,12 @@ void KSpace::modify_params(int narg, char **arg)
else if (strcmp(arg[iarg+1],"no") == 0) fftbench = 0;
else error->all(FLERR,"Illegal kspace_modify command");
iarg += 2;
} else if (strcmp(arg[iarg],"collective") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
if (strcmp(arg[iarg+1],"yes") == 0) collective_flag = 1;
else if (strcmp(arg[iarg+1],"no") == 0) collective_flag = 0;
else error->all(FLERR,"Illegal kspace_modify command");
iarg += 2;
} else if (strcmp(arg[iarg],"diff") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
if (strcmp(arg[iarg+1],"ad") == 0) differentiation_flag = 1;

View File

@ -73,7 +73,7 @@ class KSpace : protected Pointers {
int compute_flag; // 0 if skip compute()
int fftbench; // 0 if skip FFT timing
int collective_flag; // 1 if use MPI collectives for FFT/remap
int stagger_flag; // 1 if using staggered PPPM grids
double splittol; // tolerance for when to truncate the splitting