Merge pull request #2280 from lammps/gridcomm-tiled

Support for tiled decompositions in PPPM
This commit is contained in:
Axel Kohlmeyer 2020-08-21 00:16:20 -04:00 committed by GitHub
commit 4c46119a48
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
50 changed files with 3818 additions and 2289 deletions

View File

@ -203,11 +203,7 @@ void PPPMGPU::compute(int eflag, int vflag)
// If need per-atom energies/virials, allocate per-atom arrays here
// so that particle map on host can be done concurrently with GPU calculations
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
if (triclinic == 0) {
bool success = true;
@ -258,10 +254,12 @@ void PPPMGPU::compute(int eflag, int vflag)
// remap from 3d decomposition to FFT decomposition
if (triclinic == 0) {
cg->reverse_comm(this,REVERSE_RHO_GPU);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_GPU,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft_gpu();
} else {
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
PPPM::brick2fft();
}
@ -274,16 +272,22 @@ void PPPMGPU::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
if (differentiation_flag == 1)
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
}
poisson_time += MPI_Wtime()-t3;
@ -510,8 +514,10 @@ void PPPMGPU::poisson_ik()
pack own values to buf to send to another proc
------------------------------------------------------------------------- */
void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == FORWARD_IK) {
@ -568,8 +574,10 @@ void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == FORWARD_IK) {
@ -626,8 +634,10 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
if (flag == REVERSE_RHO_GPU) {
FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
@ -643,8 +653,10 @@ void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMGPU::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
if (flag == REVERSE_RHO_GPU) {
FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
@ -818,7 +830,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
density_brick = density_A_brick;
density_fft = density_A_fft;
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// group B
@ -826,7 +839,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
density_brick = density_B_brick;
density_fft = density_B_fft;
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// switch back pointers

View File

@ -46,10 +46,10 @@ class PPPMGPU : public PPPM {
void brick2fft_gpu();
virtual void poisson_ik();
void pack_forward(int, FFT_SCALAR *, int, int *);
void unpack_forward(int, FFT_SCALAR *, int, int *);
void pack_reverse(int, FFT_SCALAR *, int, int *);
void unpack_reverse(int, FFT_SCALAR *, int, int *);
void pack_forward_grid(int, void *, int, int *);
void unpack_forward_grid(int, void *, int, int *);
void pack_reverse_grid(int, void *, int, int *);
void unpack_reverse_grid(int, void *, int, int *);
FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
FFT_SCALAR *, int);

File diff suppressed because it is too large Load Diff

View File

@ -14,81 +14,59 @@
#ifndef LMP_GRIDCOMM_KOKKOS_H
#define LMP_GRIDCOMM_KOKKOS_H
#include "pointers.h"
#include "gridcomm.h"
#include "kokkos_type.h"
#include "fftdata_kokkos.h"
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
namespace LAMMPS_NS {
template<class DeviceType>
class GridCommKokkos : protected Pointers {
class GridCommKokkos : public GridComm {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef FFTArrayTypes<DeviceType> FFT_AT;
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
~GridCommKokkos();
void ghost_notify();
int ghost_overlap();
void setup();
void forward_comm(class KSpace *, int);
void reverse_comm(class KSpace *, int);
double memory_usage();
virtual ~GridCommKokkos();
void forward_comm_kspace(class KSpace *, int, int,
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
void reverse_comm_kspace(class KSpace *, int, int,
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
private:
int me;
int nforward,nreverse;
MPI_Comm gridcomm;
MPI_Request request;
DAT::tdual_int_2d k_swap_packlist;
DAT::tdual_int_2d k_swap_unpacklist;
// in = inclusive indices of 3d grid chunk that I own
// out = inclusive indices of 3d grid chunk I own plus ghosts I use
// proc = 6 neighbor procs that surround me
// ghost = # of my owned grid planes needed from me
// by each of 6 neighbor procs to become their ghost planes
DAT::tdual_int_2d k_send_packlist;
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
DAT::tdual_int_2d k_recv_unpacklist;
int nbuf;
//FFT_SCALAR *buf1,*buf2;
FFT_DAT::tdual_FFT_SCALAR_1d k_buf1;
FFT_DAT::tdual_FFT_SCALAR_1d k_buf2;
DAT::tdual_int_2d k_copy_packlist;
DAT::tdual_int_2d k_copy_unpacklist;
struct Swap {
int sendproc; // proc to send to for forward comm
int recvproc; // proc to recv from for forward comm
int npack; // # of datums to pack
int nunpack; // # of datums to unpack
//int *packlist; // 3d array offsets to pack
//int *unpacklist; // 3d array offsets to unpack
};
// -------------------------------------------
// internal methods
// -------------------------------------------
DAT::tdual_int_2d k_packlist;
DAT::tdual_int_2d k_unpacklist;
void setup_regular(int &, int &);
void setup_tiled(int &, int &);
int nswap;
Swap *swap;
void forward_comm_kspace_regular(class KSpace *, int, int,
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
void forward_comm_kspace_tiled(class KSpace *, int, int,
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
void reverse_comm_kspace_regular(class KSpace *, int, int,
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
void reverse_comm_kspace_tiled(class KSpace *, int, int,
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
void grow_swap();
int indices(DAT::tdual_int_2d &, int, int, int, int, int, int, int);
};

View File

@ -23,10 +23,10 @@ class KokkosBaseFFT {
KokkosBaseFFT() {}
//Kspace
virtual void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
};
}

View File

@ -96,8 +96,7 @@ PPPMKokkos<DeviceType>::PPPMKokkos(LAMMPS *lmp) : PPPM(lmp)
fft1 = fft2 = NULL;
remap = NULL;
cg = NULL;
cg_peratom = NULL;
gc = NULL;
nmax = 0;
//part2grid = NULL;
@ -192,9 +191,6 @@ void PPPMKokkos<DeviceType>::init()
"slab correction");
if (domain->dimension == 2) error->all(FLERR,
"Cannot use PPPM with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPM can only currently be used with "
"comm_style brick");
if (!atomKK->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
@ -255,9 +251,7 @@ void PPPMKokkos<DeviceType>::init()
// or overlap is allowed, then done
// else reduce order and try again
int (*procneigh)[2] = comm->procneigh;
GridCommKokkos<DeviceType> *cgtmp = NULL;
GridCommKokkos<DeviceType> *gctmp = NULL;
int iteration = 0;
while (order >= minorder) {
@ -269,24 +263,23 @@ void PPPMKokkos<DeviceType>::init()
set_grid_local();
if (overlap_allowed) break;
cgtmp = new GridCommKokkos<DeviceType>(lmp,world,1,1,
gctmp = new GridCommKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
int tmp1,tmp2;
gctmp->setup(tmp1,tmp2);
if (!gctmp->ghost_adjacent()) break;
delete gctmp;
order--;
iteration++;
}
if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
if (!overlap_allowed && cgtmp->ghost_overlap())
if (!overlap_allowed && gctmp->ghost_adjacent())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
if (cgtmp) delete cgtmp;
if (gctmp) delete gctmp;
// adjust g_ewald
@ -320,8 +313,6 @@ void PPPMKokkos<DeviceType>::init()
// don't invoke allocate peratom(), will be allocated when needed
allocate();
cg->ghost_notify();
cg->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
@ -564,11 +555,9 @@ void PPPMKokkos<DeviceType>::setup_grid()
allocate();
cg->ghost_notify();
if (overlap_allowed == 0 && cg->ghost_overlap())
if (!overlap_allowed && !gc->ghost_adjacent())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
cg->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
@ -576,7 +565,7 @@ void PPPMKokkos<DeviceType>::setup_grid()
compute_gf_denom();
compute_rho_coeff();
// pre-compute volume-dependent coeffs
// pre-compute volume-dependent coeffs for portion of grid I now own
setup();
}
@ -609,11 +598,8 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
d_vatom = k_vatom.view<DeviceType>();
}
if (evflag_atom && !peratom_allocate_flag) {
if (evflag_atom && !peratom_allocate_flag)
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
@ -667,7 +653,8 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,REVERSE_RHO,
k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// compute potential gradient on my FFT grid and
@ -680,12 +667,14 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
cg->forward_comm(this,FORWARD_IK);
gc->forward_comm_kspace(this,3,FORWARD_IK,
k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
gc->forward_comm_kspace(this,7,FORWARD_IK_PERATOM,
k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR);
// calculate the force on my particles
@ -844,14 +833,18 @@ void PPPMKokkos<DeviceType>::allocate()
1,0,0,FFT_PRECISION,collective_flag,cuda_aware_flag);
// create ghost grid object for rho and electric field communication
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods
int (*procneigh)[2] = comm->procneigh;
cg = new GridCommKokkos<DeviceType>(lmp,world,3,1,
gc = new GridCommKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
gc->setup(ngc_buf1,ngc_buf2);
npergrid = 3;
k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1);
k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2);
}
/* ----------------------------------------------------------------------
@ -876,8 +869,8 @@ void PPPMKokkos<DeviceType>::deallocate()
fft2 = NULL;
delete remap;
remap = NULL;
delete cg;
cg = NULL;
delete gc;
gc = NULL;
}
/* ----------------------------------------------------------------------
@ -899,16 +892,13 @@ void PPPMKokkos<DeviceType>::allocate_peratom()
d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
// create ghost grid object for rho and electric field communication
// use same GC ghost grid object for peratom grid communication
// but need to reallocate a larger gc_buf1 and gc_buf2
int (*procneigh)[2] = comm->procneigh;
npergrid = 7;
cg_peratom =
new GridCommKokkos<DeviceType>(lmp,world,7,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1);
k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2);
}
/* ----------------------------------------------------------------------
@ -919,9 +909,6 @@ template<class DeviceType>
void PPPMKokkos<DeviceType>::deallocate_peratom()
{
peratom_allocate_flag = 0;
delete cg_peratom;
cg_peratom = NULL;
}
/* ----------------------------------------------------------------------
@ -1185,153 +1172,11 @@ double PPPMKokkos<DeviceType>::final_accuracy()
template<class DeviceType>
void PPPMKokkos<DeviceType>::set_grid_local()
{
// global indices of PPPM grid range from 0 to N-1
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
// global PPPM grid that I own without ghost cells
// for slab PPPM, assign z grid as if it were not extended
PPPM::set_grid_local();
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
nzlo_in = static_cast<int>
(comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
nzhi_in = static_cast<int>
(comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
// nlower,nupper = stencil size for mapping particles to PPPM grid
nlower = -(order-1)/2;
nupper = order/2;
// shift values for particle <-> grid mapping
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
if (order % 2) shift = OFFSET + 0.5;
else shift = OFFSET;
if (order % 2) shiftone = 0.0;
else shiftone = 0.5;
// nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
// global PPPM grid that my particles can contribute charge to
// effectively nlo_in,nhi_in + ghost cells
// nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
// position a particle in my box can be at
// dist[3] = particle position bound = subbox + skin/2.0 + qdist
// qdist = offset due to TIP4P fictitious charge
// convert to triclinic if necessary
// nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
// for slab PPPM, assign z grid as if it were not extended
double *prd,*sublo,*subhi;
if (triclinic == 0) {
prd = domain->prd;
boxlo[0] = domain->boxlo[0];
boxlo[1] = domain->boxlo[1];
boxlo[2] = domain->boxlo[2];
sublo = domain->sublo;
subhi = domain->subhi;
} else {
prd = domain->prd_lamda;
boxlo[0] = domain->boxlo_lamda[0];
boxlo[1] = domain->boxlo_lamda[1];
boxlo[2] = domain->boxlo_lamda[2];
domain->x2lamda(atomKK->nlocal);
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double zprd_slab = zprd*slab_volfactor;
double dist[3];
double cuthalf = 0.5*neighbor->skin + qdist;
if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
else kspacebbox(cuthalf,&dist[0]);
int nlo,nhi;
nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
nx_pppm/xprd + shift) - OFFSET;
nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
nx_pppm/xprd + shift) - OFFSET;
nxlo_out = nlo + nlower;
nxhi_out = nhi + nupper;
nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
ny_pppm/yprd + shift) - OFFSET;
nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
ny_pppm/yprd + shift) - OFFSET;
nylo_out = nlo + nlower;
nyhi_out = nhi + nupper;
nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
nz_pppm/zprd_slab + shift) - OFFSET;
nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
nz_pppm/zprd_slab + shift) - OFFSET;
nzlo_out = nlo + nlower;
nzhi_out = nhi + nupper;
// for slab PPPM, change the grid boundary for processors at +z end
// to include the empty volume between periodically repeating slabs
// for slab PPPM, want charge data communicated from -z proc to +z proc,
// but not vice versa, also want field data communicated from +z proc to
// -z proc, but not vice versa
// this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
// also insure no other procs use ghost cells beyond +z limit
if (slabflag == 1) {
if (comm->myloc[2] == comm->procgrid[2]-1)
nzhi_in = nzhi_out = nz_pppm - 1;
nzhi_out = MIN(nzhi_out,nz_pppm-1);
}
// decomposition of FFT mesh
// global indices range from 0 to N-1
// proc owns entire x-dimension, clumps of columns in y,z dimensions
// npey_fft,npez_fft = # of procs in y,z dims
// if nprocs is small enough, proc can own 1 or more entire xy planes,
// else proc owns 2d sub-blocks of yz plane
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
// nlo_fft,nhi_fft = lower/upper limit of the section
// of the global FFT mesh that I own
int npey_fft,npez_fft;
if (nz_pppm >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_fft = 0;
nxhi_fft = nx_pppm - 1;
nylo_fft = me_y*ny_pppm/npey_fft;
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
nzlo_fft = me_z*nz_pppm/npez_fft;
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
// PPPM grid pts owned by this proc, including ghosts
ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
// FFT grids owned by this proc, without ghosts
// nfft = FFT points in FFT decomposition on this proc
// nfft_brick = FFT points in 3d brick-decomposition on this proc
// nfft_both = greater of 2 values
nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
(nzhi_fft-nzlo_fft+1);
int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
(nzhi_in-nzlo_in+1);
nfft_both = MAX(nfft,nfft_brick);
}
/* ----------------------------------------------------------------------
@ -2568,7 +2413,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_fieldforce_peratom, const int &i
------------------------------------------------------------------------- */
template<class DeviceType>
void PPPMKokkos<DeviceType>::pack_forward_kspace_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
void PPPMKokkos<DeviceType>::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
{
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
@ -2624,11 +2469,12 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_pack_forward2, const int &i) con
------------------------------------------------------------------------- */
template<class DeviceType>
void PPPMKokkos<DeviceType>::unpack_forward_kspace_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
void PPPMKokkos<DeviceType>::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index)
{
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
d_buf = k_buf.view<DeviceType>();
unpack_offset = offset;
nx = (nxhi_out-nxlo_out+1);
ny = (nyhi_out-nylo_out+1);
@ -2652,9 +2498,9 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward1, const int &i) c
const int iz = (int) (dlist/(nx*ny));
const int iy = (int) ((dlist - iz*nx*ny)/nx);
const int ix = d_list_index[i] - iz*nx*ny - iy*nx;
d_vdx_brick(iz,iy,ix) = d_buf[3*i];
d_vdy_brick(iz,iy,ix) = d_buf[3*i+1];
d_vdz_brick(iz,iy,ix) = d_buf[3*i+2];
d_vdx_brick(iz,iy,ix) = d_buf[3*i + unpack_offset];
d_vdy_brick(iz,iy,ix) = d_buf[3*i+1 + unpack_offset];
d_vdz_brick(iz,iy,ix) = d_buf[3*i+2 + unpack_offset];
}
template<class DeviceType>
@ -2667,12 +2513,12 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward2, const int &i) c
const int ix = d_list_index[i] - iz*nx*ny - iy*nx;
if (eflag_atom) d_u_brick(iz,iy,ix) = d_buf[7*i];
if (vflag_atom) {
d_v0_brick(iz,iy,ix) = d_buf[7*i+1];
d_v1_brick(iz,iy,ix) = d_buf[7*i+2];
d_v2_brick(iz,iy,ix) = d_buf[7*i+3];
d_v3_brick(iz,iy,ix) = d_buf[7*i+4];
d_v4_brick(iz,iy,ix) = d_buf[7*i+5];
d_v5_brick(iz,iy,ix) = d_buf[7*i+6];
d_v0_brick(iz,iy,ix) = d_buf[7*i+1 + unpack_offset];
d_v1_brick(iz,iy,ix) = d_buf[7*i+2 + unpack_offset];
d_v2_brick(iz,iy,ix) = d_buf[7*i+3 + unpack_offset];
d_v3_brick(iz,iy,ix) = d_buf[7*i+4 + unpack_offset];
d_v4_brick(iz,iy,ix) = d_buf[7*i+5 + unpack_offset];
d_v5_brick(iz,iy,ix) = d_buf[7*i+6 + unpack_offset];
}
}
@ -2681,7 +2527,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward2, const int &i) c
------------------------------------------------------------------------- */
template<class DeviceType>
void PPPMKokkos<DeviceType>::pack_reverse_kspace_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
void PPPMKokkos<DeviceType>::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
{
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
@ -2711,11 +2557,12 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_pack_reverse, const int &i) cons
------------------------------------------------------------------------- */
template<class DeviceType>
void PPPMKokkos<DeviceType>::unpack_reverse_kspace_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
void PPPMKokkos<DeviceType>::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index)
{
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
d_buf = k_buf.view<DeviceType>();
unpack_offset = offset;
nx = (nxhi_out-nxlo_out+1);
ny = (nyhi_out-nylo_out+1);
@ -2733,7 +2580,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_reverse, const int &i) co
const int iz = (int) (dlist/(nx*ny));
const int iy = (int) ((dlist - iz*nx*ny)/nx);
const int ix = d_list_index[i] - iz*nx*ny - iy*nx;
d_density_brick(iz,iy,ix) += d_buf[i];
d_density_brick(iz,iy,ix) += d_buf[i + unpack_offset];
}
/* ----------------------------------------------------------------------
@ -3044,7 +2891,9 @@ double PPPMKokkos<DeviceType>::memory_usage()
if (peratom_allocate_flag)
bytes += 6 * nbrick * sizeof(FFT_SCALAR);
if (cg) bytes += cg->memory_usage();
// two GridComm bufs
bytes += (ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
return bytes;
}

View File

@ -311,6 +311,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT {
int nx,ny,nz;
typename AT::t_int_1d_um d_list_index;
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf;
int unpack_offset;
DAT::tdual_int_scalar k_flag;
@ -353,10 +354,14 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT {
//double **acons;
typename Kokkos::DualView<F_FLOAT[8][7],Kokkos::LayoutRight,DeviceType>::t_host acons;
// FFTs and grid communication
FFT3dKokkos<DeviceType> *fft1,*fft2;
RemapKokkos<DeviceType> *remap;
GridCommKokkos<DeviceType> *cg;
GridCommKokkos<DeviceType> *cg_peratom;
GridCommKokkos<DeviceType> *gc;
FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2;
int ngc_buf1,ngc_buf2,npergrid;
//int **part2grid; // storage for particle -> grid mapping
typename AT::t_int_1d_3 d_part2grid;
@ -403,10 +408,10 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT {
// grid communication
void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int);
void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int);
// triclinic

View File

@ -198,6 +198,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
(FFT_SCALAR *) plan->scratch, plan->post_plan);
// scaling if required
if (flag == 1 && plan->scaled) {
norm = plan->norm;
num = plan->normnum;

File diff suppressed because it is too large Load Diff

View File

@ -16,55 +16,57 @@
#include "pointers.h"
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
namespace LAMMPS_NS {
class GridComm : protected Pointers {
public:
GridComm(class LAMMPS *, MPI_Comm, int, int,
GridComm(class LAMMPS *, MPI_Comm, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
GridComm(class LAMMPS *, MPI_Comm, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
GridComm(class LAMMPS *, MPI_Comm, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
~GridComm();
void ghost_notify();
int ghost_overlap();
void setup();
void forward_comm(class KSpace *, int);
void reverse_comm(class KSpace *, int);
double memory_usage();
virtual ~GridComm();
void setup(int &, int &);
int ghost_adjacent();
void forward_comm_kspace(class KSpace *, int, int, int,
void *, void *, MPI_Datatype);
void reverse_comm_kspace(class KSpace *, int, int, int,
void *, void *, MPI_Datatype);
private:
int me;
int nforward,nreverse;
MPI_Comm gridcomm;
MPI_Request request;
protected:
int me,nprocs;
int layout; // REGULAR or TILED
MPI_Comm gridcomm; // communicator for this class
// usually world, but MSM calls with subset
// in = inclusive indices of 3d grid chunk that I own
// out = inclusive indices of 3d grid chunk I own plus ghosts I use
// proc = 6 neighbor procs that surround me
// ghost = # of my owned grid planes needed from me
// by each of 6 neighbor procs to become their ghost planes
// inputs from caller via constructor
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
int nx,ny,nz; // size of global grid in all 3 dims
int inxlo,inxhi; // inclusive extent of my grid chunk
int inylo,inyhi; // 0 <= in <= N-1
int inzlo,inzhi;
int outxlo,outxhi; // inclusive extent of my grid chunk plus
int outylo,outyhi; // ghost cells in all 6 directions
int outzlo,outzhi; // lo indices can be < 0, hi indices can be >= N
int fullxlo,fullxhi; // extent of grid chunk that caller stores
int fullylo,fullyhi; // can be same as out indices or larger
int fullzlo,fullzhi;
int nbuf;
FFT_SCALAR *buf1,*buf2;
// -------------------------------------------
// internal variables for REGULAR layout
// -------------------------------------------
int procxlo,procxhi; // 6 neighbor procs that adjoin me
int procylo,procyhi; // not used for comm_style = tiled
int proczlo,proczhi;
int ghostxlo,ghostxhi; // # of my owned grid planes needed
int ghostylo,ghostyhi; // by neighobr procs in each dir as their ghost planes
int ghostzlo,ghostzhi;
// swap = exchange of owned and ghost grid cells between 2 procs, including self
struct Swap {
int sendproc; // proc to send to for forward comm
@ -75,9 +77,130 @@ class GridComm : protected Pointers {
int *unpacklist; // 3d array offsets to unpack
};
int nswap;
int nswap,maxswap;
Swap *swap;
// -------------------------------------------
// internal variables for TILED layout
// -------------------------------------------
int *overlap_procs; // length of Nprocs in communicator
MPI_Request *requests; // length of max messages this proc receives
// RCB tree of cut info
// each proc contributes one value, except proc 0
struct RCBinfo {
int dim; // 0,1,2 = which dim the cut is in
int cut; // grid index of lowest cell in upper half of cut
};
RCBinfo *rcbinfo;
// overlap = a proc whose owned cells overlap with my extended ghost box
// includes overlaps across periodic boundaries, can also be self
struct Overlap {
int proc; // proc whose owned cells overlap my ghost cells
int box[6]; // box that overlaps otherproc's owned cells
// this box is wholly contained within global grid
int pbc[3]; // PBC offsets to convert box to a portion of my ghost box
// my ghost box may extend beyond global grid
};
int noverlap,maxoverlap;
Overlap *overlap;
// request = sent to each proc whose owned cells overlap my ghost cells
struct Request {
int sender; // sending proc
int index; // index of overlap on sender
int box[6]; // box that overlaps receiver's owned cells
// wholly contained within global grid
};
Request *srequest,*rrequest;
// response = reply from each proc whose owned cells overlap my ghost cells
struct Response {
int index; // index of my overlap for the initial request
int box[6]; // box that overlaps responder's owned cells
// wholly contained within global grid
// has to unwrapped by PBC to map to my ghost cells
};
Response *sresponse,*rresponse;
// send = proc to send a subset of my owned cells to, for forward comm
// for reverse comm, proc I receive ghost overlaps with my owned cells from
// offset used in reverse comm to recv a message in middle of a large buffer
struct Send {
int proc;
int npack;
int *packlist;
int offset;
};
// recv = proc to recv a subset of my ghost cells from, for forward comm
// for reverse comm, proc I send a subset of my ghost cells to
// offset used in forward comm to recv a message in middle of a large buffer
struct Recv {
int proc;
int nunpack;
int *unpacklist;
int offset;
};
int adjacent; // 0 on a proc who receives ghosts from a non-neighbor proc
// copy = subset of my owned cells to copy into subset of my ghost cells
// that describes forward comm, for reverse comm it is the opposite
struct Copy {
int npack;
int nunpack;
int *packlist;
int *unpacklist;
};
int nsend,nrecv,ncopy;
Send *send;
Recv *recv;
Copy *copy;
// -------------------------------------------
// internal methods
// -------------------------------------------
void initialize(MPI_Comm, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
virtual void setup_regular(int &, int &);
virtual void setup_tiled(int &, int &);
void ghost_box_drop(int *, int *);
void box_drop_grid(int *, int, int, int &, int *);
int ghost_adjacent_regular();
int ghost_adjacent_tiled();
void forward_comm_kspace_regular(class KSpace *, int, int, int,
void *, void *, MPI_Datatype);
void forward_comm_kspace_tiled(class KSpace *, int, int, int,
void *, void *, MPI_Datatype);
void reverse_comm_kspace_regular(class KSpace *, int, int, int,
void *, void *, MPI_Datatype);
void reverse_comm_kspace_tiled(class KSpace *, int, int, int,
void *, void *, MPI_Datatype);
virtual void grow_swap();
void grow_overlap();
int indices(int *&, int, int, int, int, int, int);
};

View File

@ -42,21 +42,24 @@ using namespace MathConst;
enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
/* ---------------------------------------------------------------------- */
MSM::MSM(LAMMPS *lmp) : KSpace(lmp),
MSM::MSM(LAMMPS *lmp)
: KSpace(lmp),
factors(NULL), delxinv(NULL), delyinv(NULL), delzinv(NULL), nx_msm(NULL),
ny_msm(NULL), nz_msm(NULL), nxlo_in(NULL), nylo_in(NULL), nzlo_in(NULL),
nxhi_in(NULL), nyhi_in(NULL), nzhi_in(NULL), nxlo_out(NULL), nylo_out(NULL),
nzlo_out(NULL), nxhi_out(NULL), nyhi_out(NULL), nzhi_out(NULL), ngrid(NULL),
active_flag(NULL), alpha(NULL), betax(NULL), betay(NULL), betaz(NULL), peratom_allocate_flag(0),
levels(0), world_levels(NULL), qgrid(NULL), egrid(NULL), v0grid(NULL), v1grid(NULL),
v2grid(NULL), v3grid(NULL), v4grid(NULL), v5grid(NULL), g_direct(NULL),
v0_direct(NULL), v1_direct(NULL), v2_direct(NULL), v3_direct(NULL), v4_direct(NULL),
v5_direct(NULL), g_direct_top(NULL), v0_direct_top(NULL), v1_direct_top(NULL),
v2_direct_top(NULL), v3_direct_top(NULL), v4_direct_top(NULL), v5_direct_top(NULL),
phi1d(NULL), dphi1d(NULL), procneigh_levels(NULL), cg(NULL), cg_peratom(NULL),
cg_all(NULL), cg_peratom_all(NULL), part2grid(NULL), boxlo(NULL)
active_flag(NULL), alpha(NULL), betax(NULL), betay(NULL), betaz(NULL),
peratom_allocate_flag(0),levels(0),world_levels(NULL),qgrid(NULL),egrid(NULL),
v0grid(NULL), v1grid(NULL),v2grid(NULL),v3grid(NULL),v4grid(NULL),v5grid(NULL),
g_direct(NULL),v0_direct(NULL),v1_direct(NULL),v2_direct(NULL),v3_direct(NULL),
v4_direct(NULL),v5_direct(NULL),g_direct_top(NULL),v0_direct_top(NULL),
v1_direct_top(NULL),v2_direct_top(NULL),v3_direct_top(NULL),v4_direct_top(NULL),
v5_direct_top(NULL),phi1d(NULL),dphi1d(NULL),procneigh_levels(NULL),gcall(NULL),
gc(NULL),gcall_buf1(NULL),gcall_buf2(NULL),gc_buf1(NULL),gc_buf2(NULL),
ngc_buf1(NULL),ngc_buf2(NULL),part2grid(NULL),boxlo(NULL)
{
msmflag = 1;
@ -117,6 +120,7 @@ MSM::~MSM()
delete [] factors;
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
deallocate_levels();
memory->destroy(part2grid);
memory->destroy(g_direct);
memory->destroy(g_direct_top);
@ -132,7 +136,6 @@ MSM::~MSM()
memory->destroy(v3_direct_top);
memory->destroy(v4_direct_top);
memory->destroy(v5_direct_top);
deallocate_levels();
}
/* ----------------------------------------------------------------------
@ -397,17 +400,6 @@ void MSM::setup()
// don't invoke allocate_peratom(), compute() will allocate when needed
allocate();
// setup commgrid
cg_all->ghost_notify();
cg_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg[n]->ghost_notify();
cg[n]->setup();
}
}
/* ----------------------------------------------------------------------
@ -448,16 +440,7 @@ void MSM::compute(int eflag, int vflag)
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
}
if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
// convert atoms from box to lamda coords
@ -483,7 +466,8 @@ void MSM::compute(int eflag, int vflag)
// to fully sum contribution in their 3d grid
current_level = 0;
cg_all->reverse_comm(this,REVERSE_RHO);
gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
@ -491,8 +475,8 @@ void MSM::compute(int eflag, int vflag)
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
direct(n);
restriction(n);
}
@ -503,11 +487,18 @@ void MSM::compute(int eflag, int vflag)
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
gc[levels-1]->
forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
gc[levels-1]->
reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[levels-1]->
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
@ -515,7 +506,9 @@ void MSM::compute(int eflag, int vflag)
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[levels-1]->
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
}
}
@ -527,24 +520,28 @@ void MSM::compute(int eflag, int vflag)
prolongation(n);
current_level = n;
cg[n]->reverse_comm(this,REVERSE_AD);
gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
// extra per-atom virial communication
if (vflag_atom)
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
}
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg_all->forward_comm(this,FORWARD_AD);
gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// calculate the force on my particles (interpolation)
@ -603,8 +600,7 @@ void MSM::compute(int eflag, int vflag)
// convert atoms back from lamda to box coords
if (triclinic)
domain->lamda2x(atom->nlocal);
if (triclinic) domain->lamda2x(atom->nlocal);
}
/* ----------------------------------------------------------------------
@ -621,15 +617,18 @@ void MSM::allocate()
// commgrid using all processors for finest grid level
int (*procneigh_all)[2] = comm->procneigh;
gcall = new GridComm(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
nxlo_in[0],nxhi_in[0],nylo_in[0],
nyhi_in[0],nzlo_in[0],nzhi_in[0],
nxlo_out_all,nxhi_out_all,nylo_out_all,
nyhi_out_all,nzlo_out_all,nzhi_out_all,
nxlo_out[0],nxhi_out[0],nylo_out[0],
nyhi_out[0],nzlo_out[0],nzhi_out[0]);
cg_all = new GridComm(lmp,world,1,1,
nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
gcall->setup(ngcall_buf1,ngcall_buf2);
npergrid = 1;
memory->create(gcall_buf1,npergrid*ngcall_buf1,"msm:gcall_buf1");
memory->create(gcall_buf2,npergrid*ngcall_buf2,"msm:gcall_buf2");
// allocate memory for each grid level
@ -644,12 +643,23 @@ void MSM::allocate()
if (active_flag[n]) {
int **procneigh = procneigh_levels[n];
cg[n] = new GridComm(lmp,world_levels[n],1,1,
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
gc[n] = new GridComm(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
nzlo_in[n],nzhi_in[n],
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],
nzlo_out[n],nzhi_out[n],
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
} else cg[n] = nullptr;
gc[n]->setup(ngc_buf1[n],ngc_buf2[n]);
npergrid = 1;
memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"msm:gc_buf1");
memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"msm:gc_buf2");
} else {
gc[n] = nullptr;
gc_buf1[n] = gc_buf2[n] = nullptr;
}
}
}
@ -662,8 +672,11 @@ void MSM::deallocate()
memory->destroy2d_offset(phi1d,-order_allocated);
memory->destroy2d_offset(dphi1d,-order_allocated);
if (cg_all) delete cg_all;
cg_all = nullptr;
if (gcall) delete gcall;
memory->destroy(gcall_buf1);
memory->destroy(gcall_buf2);
gcall = nullptr;
gcall_buf1 = gcall_buf2 = nullptr;
for (int n=0; n<levels; n++) {
if (qgrid[n])
@ -676,10 +689,13 @@ void MSM::deallocate()
if (world_levels[n] != MPI_COMM_NULL)
MPI_Comm_free(&world_levels[n]);
if (cg) {
if (cg[n]) {
delete cg[n];
cg[n] = nullptr;
if (gc) {
if (gc[n]) {
delete gc[n];
memory->destroy(gc_buf1[n]);
memory->destroy(gc_buf2[n]);
gc[n] = nullptr;
gc_buf1[n] = gc_buf2[n] = nullptr;
}
}
}
@ -695,15 +711,11 @@ void MSM::allocate_peratom()
// create commgrid object for per-atom virial using all processors
int (*procneigh_all)[2] = comm->procneigh;
cg_peratom_all =
new GridComm(lmp,world,6,6,
nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
npergrid = 6;
memory->destroy(gcall_buf1);
memory->destroy(gcall_buf2);
memory->create(gcall_buf1,npergrid*ngcall_buf1,"pppm:gcall_buf1");
memory->create(gcall_buf2,npergrid*ngcall_buf2,"pppm:gcall_buf2");
// allocate memory for each grid level
@ -724,13 +736,11 @@ void MSM::allocate_peratom()
// create commgrid object for per-atom virial
if (active_flag[n]) {
int **procneigh = procneigh_levels[n];
cg_peratom[n] =
new GridComm(lmp,world_levels[n],6,6,
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
npergrid = 6;
memory->destroy(gc_buf1[n]);
memory->destroy(gc_buf2[n]);
memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"pppm:gc_buf1");
memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"pppm:gc_buf2");
}
}
}
@ -743,8 +753,6 @@ void MSM::deallocate_peratom()
{
peratom_allocate_flag = 0;
if (cg_peratom_all) delete cg_peratom_all;
for (int n=0; n<levels; n++) {
if (v0grid[n])
memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
@ -758,9 +766,6 @@ void MSM::deallocate_peratom()
memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (v5grid[n])
memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (cg_peratom)
if (cg_peratom[n]) delete cg_peratom[n];
}
}
@ -772,8 +777,11 @@ void MSM::allocate_levels()
{
ngrid = new int[levels];
cg = new GridComm*[levels];
cg_peratom = new GridComm*[levels];
gc = new GridComm*[levels];
gc_buf1 = new double*[levels];
gc_buf2 = new double*[levels];
ngc_buf1 = new int[levels];
ngc_buf2 = new int[levels];
memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
world_levels = new MPI_Comm[levels];
@ -819,9 +827,8 @@ void MSM::allocate_levels()
v5grid = new double***[levels];
for (int n=0; n<levels; n++) {
cg[n] = NULL;
gc[n] = NULL;
world_levels[n] = MPI_COMM_NULL;
cg_peratom[n] = NULL;
qgrid[n] = NULL;
egrid[n] = NULL;
@ -833,7 +840,6 @@ void MSM::allocate_levels()
v4grid[n] = NULL;
v5grid[n] = NULL;
}
}
/* ----------------------------------------------------------------------
@ -842,15 +848,18 @@ void MSM::allocate_levels()
void MSM::deallocate_levels()
{
if (cg) deallocate();
delete [] ngrid;
ngrid = nullptr;
memory->destroy(procneigh_levels);
delete [] world_levels;
delete [] active_flag;
delete [] cg;
delete [] cg_peratom;
delete [] gc;
delete [] gc_buf1;
delete [] gc_buf2;
delete [] ngc_buf1;
delete [] ngc_buf2;
delete [] alpha;
delete [] betax;
@ -893,8 +902,8 @@ void MSM::deallocate_levels()
world_levels = nullptr;
active_flag = nullptr;
cg = nullptr;
cg_peratom = nullptr;
gc = nullptr;
gc_buf1 = gc_buf2 = nullptr;
alpha = nullptr;
betax = nullptr;
@ -1377,7 +1386,7 @@ void MSM::set_proc_grid(int n)
// define a new MPI communicator for this grid level that only includes active procs
if(world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
if (world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
MPI_Comm_split(world,color,me,&world_levels[n]);
if (!active_flag[n]) return;
@ -2434,6 +2443,7 @@ void MSM::prolongation(int n)
be cheaper than using nearest-neighbor communication (commgrid), right
now only works for periodic boundary conditions
------------------------------------------------------------------------- */
void MSM::grid_swap_forward(int n, double*** &gridn)
{
double ***gridn_tmp;
@ -2533,32 +2543,31 @@ void MSM::grid_swap_reverse(int n, double*** &gridn)
pack own values to buf to send to another proc (used by commgrid)
------------------------------------------------------------------------- */
void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
void MSM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
double *buf = (double *) vbuf;
int n = current_level;
int k = 0;
if (flag == FORWARD_RHO) {
double ***qgridn = qgrid[n];
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = qsrc[list[i]];
}
} else if (flag == FORWARD_AD) {
double ***egridn = egrid[n];
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == FORWARD_AD_PERATOM) {
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == FORWARD_RHO) {
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = qsrc[list[i]];
}
} else if (flag == FORWARD_AD) {
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == FORWARD_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -2580,32 +2589,31 @@ void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
void MSM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
double *buf = (double *) vbuf;
int n = current_level;
int k = 0;
if (flag == FORWARD_RHO) {
double ***qgridn = qgrid[n];
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
dest[list[i]] = buf[k++];
}
} else if (flag == FORWARD_AD) {
double ***egridn = egrid[n];
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[k++];
} else if (flag == FORWARD_AD_PERATOM) {
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == FORWARD_RHO) {
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
dest[list[i]] = buf[k++];
}
} else if (flag == FORWARD_AD) {
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
dest[list[i]] = buf[k++];
} else if (flag == FORWARD_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -2627,32 +2635,31 @@ void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
void MSM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
double *buf = (double *) vbuf;
int n = current_level;
int k = 0;
if (flag == REVERSE_RHO) {
double ***qgridn = qgrid[n];
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = qsrc[list[i]];
}
} else if (flag == REVERSE_AD) {
double ***egridn = egrid[n];
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == REVERSE_AD_PERATOM) {
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == REVERSE_RHO) {
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
buf[k++] = qsrc[list[i]];
}
} else if (flag == REVERSE_AD) {
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
buf[i] = src[list[i]];
} else if (flag == REVERSE_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -2674,32 +2681,31 @@ void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void MSM::unpack_reverse(int flag, double *buf, int nlist, int *list)
void MSM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
double *buf = (double *) vbuf;
int n = current_level;
int k = 0;
if (flag == REVERSE_RHO) {
double ***qgridn = qgrid[n];
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
dest[list[i]] += buf[k++];
}
} else if (flag == REVERSE_AD) {
double ***egridn = egrid[n];
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[k++];
} else if (flag == REVERSE_AD_PERATOM) {
double ***v0gridn = v0grid[n];
double ***v1gridn = v1grid[n];
double ***v2gridn = v2grid[n];
double ***v3gridn = v3grid[n];
double ***v4gridn = v4grid[n];
double ***v5gridn = v5grid[n];
int k = 0;
if (flag == REVERSE_RHO) {
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++) {
dest[list[i]] += buf[k++];
}
} else if (flag == REVERSE_AD) {
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
for (int i = 0; i < nlist; i++)
dest[list[i]] += buf[k++];
} else if (flag == REVERSE_AD_PERATOM) {
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
@ -3416,3 +3422,24 @@ void MSM::get_virial_direct_top(int n)
}
}
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double MSM::memory_usage()
{
double bytes = 0;
// NOTE: Stan, fill in other memory allocations here
// all GridComm bufs
bytes += (ngcall_buf1 + ngcall_buf2) * npergrid * sizeof(double);
for (int n=0; n<levels; n++)
if (active_flag[n])
bytes += (ngc_buf1[n] + ngc_buf2[n]) * npergrid * sizeof(double);
return bytes;
}

View File

@ -32,6 +32,7 @@ class MSM : public KSpace {
void setup();
virtual void settings(int, char **);
virtual void compute(int, int);
virtual double memory_usage();
protected:
int me,nprocs;
@ -79,16 +80,21 @@ class MSM : public KSpace {
int procgrid[3]; // procs assigned in each dim of 3d grid
int myloc[3]; // which proc I am in each dim
int ***procneigh_levels; // my 6 neighboring procs, 0/1 = left/right
class GridComm **cg;
class GridComm **cg_peratom;
class GridComm *cg_all;
class GridComm *cg_peratom_all;
class GridComm *gcall; // GridComm class for finest level grid
class GridComm **gc; // GridComm classes for each hierarchical level
double *gcall_buf1,*gcall_buf2;
double **gc_buf1,**gc_buf2;
int ngcall_buf1,ngcall_buf2,npergrid;
int *ngc_buf1,*ngc_buf2;
int current_level;
int **part2grid; // storage for particle -> grid mapping
int nmax;
int triclinic;
double *boxlo;
void set_grid_global();
@ -126,15 +132,12 @@ class MSM : public KSpace {
void get_g_direct_top(int);
void get_virial_direct_top(int);
// triclinic
int triclinic;
// grid communication
void pack_forward(int, double *, int, int *);
void unpack_forward(int, double *, int, int *);
void pack_reverse(int, double *, int, int *);
void unpack_reverse(int, double *, int, int *);
void pack_forward_grid(int, void *, int, int *);
void unpack_forward_grid(int, void *, int, int *);
void pack_reverse_grid(int, void *, int, int *);
void unpack_reverse_grid(int, void *, int, int *);
};
}

View File

@ -91,17 +91,7 @@ void MSMCG::compute(int eflag, int vflag)
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
peratom_allocate_flag = 1;
}
if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
// extend size of per-atom arrays if necessary
@ -171,7 +161,8 @@ void MSMCG::compute(int eflag, int vflag)
// to fully sum contribution in their 3d grid
current_level = 0;
cg_all->reverse_comm(this,REVERSE_RHO);
gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
@ -179,24 +170,30 @@ void MSMCG::compute(int eflag, int vflag)
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
direct(n);
restriction(n);
}
// compute direct interaction for top grid level for non-periodic
// and for second from top grid level for periodic
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
gc[levels-1]->
forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
gc[levels-1]->
reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[levels-1]->
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
@ -204,7 +201,9 @@ void MSMCG::compute(int eflag, int vflag)
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[levels-1]->
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
}
}
@ -216,24 +215,28 @@ void MSMCG::compute(int eflag, int vflag)
prolongation(n);
current_level = n;
cg[n]->reverse_comm(this,REVERSE_AD);
gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
// extra per-atom virial communication
if (vflag_atom)
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
}
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg_all->forward_comm(this,FORWARD_AD);
gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// calculate the force on my particles (interpolation)
@ -536,6 +539,9 @@ void MSMCG::fieldforce_peratom()
}
}
/* ----------------------------------------------------------------------
memory usage of local arrays
------------------------------------------------------------------------- */
double MSMCG::memory_usage()
{

View File

@ -70,11 +70,12 @@ PPPM::PPPM(LAMMPS *lmp) : KSpace(lmp),
u_brick(NULL), v0_brick(NULL), v1_brick(NULL), v2_brick(NULL), v3_brick(NULL),
v4_brick(NULL), v5_brick(NULL), greensfn(NULL), vg(NULL), fkx(NULL), fky(NULL),
fkz(NULL), density_fft(NULL), work1(NULL), work2(NULL), gf_b(NULL), rho1d(NULL),
rho_coeff(NULL), drho1d(NULL), drho_coeff(NULL), sf_precoeff1(NULL), sf_precoeff2(NULL),
sf_precoeff3(NULL), sf_precoeff4(NULL), sf_precoeff5(NULL), sf_precoeff6(NULL),
acons(NULL), density_A_brick(NULL), density_B_brick(NULL), density_A_fft(NULL),
density_B_fft(NULL), fft1(NULL), fft2(NULL), remap(NULL), cg(NULL), cg_peratom(NULL),
part2grid(NULL), boxlo(NULL)
rho_coeff(NULL), drho1d(NULL), drho_coeff(NULL),
sf_precoeff1(NULL), sf_precoeff2(NULL), sf_precoeff3(NULL),
sf_precoeff4(NULL), sf_precoeff5(NULL), sf_precoeff6(NULL),
acons(NULL), fft1(NULL), fft2(NULL), remap(NULL), gc(NULL),
gc_buf1(NULL), gc_buf2(NULL), density_A_brick(NULL), density_B_brick(NULL), density_A_fft(NULL),
density_B_fft(NULL), part2grid(NULL), boxlo(NULL)
{
peratom_allocate_flag = 0;
group_allocate_flag = 0;
@ -117,8 +118,8 @@ PPPM::PPPM(LAMMPS *lmp) : KSpace(lmp),
fft1 = fft2 = NULL;
remap = NULL;
cg = NULL;
cg_peratom = NULL;
gc = NULL;
gc_buf1 = gc_buf2 = NULL;
nmax = 0;
part2grid = NULL;
@ -205,9 +206,6 @@ void PPPM::init()
"slab correction");
if (domain->dimension == 2)
error->all(FLERR,"Cannot use PPPM with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPM can only currently be used with "
"comm_style brick");
if (!atom->q_flag)
error->all(FLERR,"Kspace style requires atom attribute q");
@ -297,9 +295,7 @@ void PPPM::init()
// or overlap is allowed, then done
// else reduce order and try again
int (*procneigh)[2] = comm->procneigh;
GridComm *cgtmp = NULL;
GridComm *gctmp = NULL;
int iteration = 0;
while (order >= minorder) {
@ -312,24 +308,24 @@ void PPPM::init()
set_grid_local();
if (overlap_allowed) break;
cgtmp = new GridComm(lmp,world,1,1,
gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
int tmp1,tmp2;
gctmp->setup(tmp1,tmp2);
if (gctmp->ghost_adjacent()) break;
delete gctmp;
order--;
iteration++;
}
if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
if (!overlap_allowed && cgtmp->ghost_overlap())
if (!overlap_allowed && !gctmp->ghost_adjacent())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
if (cgtmp) delete cgtmp;
if (gctmp) delete gctmp;
// adjust g_ewald
@ -363,8 +359,6 @@ void PPPM::init()
// don't invoke allocate peratom() or group(), will be allocated when needed
allocate();
cg->ghost_notify();
cg->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
@ -578,11 +572,9 @@ void PPPM::setup_grid()
allocate();
cg->ghost_notify();
if (overlap_allowed == 0 && cg->ghost_overlap())
if (!overlap_allowed && !gc->ghost_adjacent())
error->all(FLERR,"PPPM grid stencil extends "
"beyond nearest neighbor processor");
cg->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
@ -591,7 +583,7 @@ void PPPM::setup_grid()
if (differentiation_flag == 1) compute_sf_precoeff();
compute_rho_coeff();
// pre-compute volume-dependent coeffs
// pre-compute volume-dependent coeffs for portion of grid I now own
setup();
}
@ -609,11 +601,7 @@ void PPPM::compute(int eflag, int vflag)
ev_init(eflag,vflag);
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
// if atom count has changed, update qsum and qsqsum
@ -652,7 +640,8 @@ void PPPM::compute(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// compute potential gradient on my FFT grid and
@ -665,16 +654,22 @@ void PPPM::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
if (differentiation_flag == 1)
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
}
// calculate the force on my particles
@ -821,21 +816,19 @@ void PPPM::allocate()
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods
int (*procneigh)[2] = comm->procneigh;
gc = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
if (differentiation_flag == 1)
cg = new GridComm(lmp,world,1,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg = new GridComm(lmp,world,3,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
gc->setup(ngc_buf1,ngc_buf2);
if (differentiation_flag) npergrid = 1;
else npergrid = 3;
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
}
/* ----------------------------------------------------------------------
@ -886,7 +879,9 @@ void PPPM::deallocate()
delete fft1;
delete fft2;
delete remap;
delete cg;
delete gc;
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
}
/* ----------------------------------------------------------------------
@ -915,24 +910,16 @@ void PPPM::allocate_peratom()
memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:v5_brick");
// create ghost grid object for rho and electric field communication
// use same GC ghost grid object for peratom grid communication
// but need to reallocate a larger gc_buf1 and gc_buf2
int (*procneigh)[2] = comm->procneigh;
if (differentiation_flag) npergrid = 6;
else npergrid = 7;
if (differentiation_flag == 1)
cg_peratom =
new GridComm(lmp,world,6,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
else
cg_peratom =
new GridComm(lmp,world,7,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
}
/* ----------------------------------------------------------------------
@ -952,8 +939,6 @@ void PPPM::deallocate_peratom()
if (differentiation_flag != 1)
memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
delete cg_peratom;
}
/* ----------------------------------------------------------------------
@ -1002,7 +987,8 @@ void PPPM::set_grid_global()
int count = 0;
while (1) {
// set grid dimension
// set grid dimensions
nx_pppm = static_cast<int> (xprd/h_x);
ny_pppm = static_cast<int> (yprd/h_y);
nz_pppm = static_cast<int> (zprd_slab/h_z);
@ -1011,31 +997,16 @@ void PPPM::set_grid_global()
if (ny_pppm <= 1) ny_pppm = 2;
if (nz_pppm <= 1) nz_pppm = 2;
//set local grid dimension
int npey_fft,npez_fft;
if (nz_pppm >= nprocs) {
npey_fft = 1;
npez_fft = nprocs;
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
int me_y = me % npey_fft;
int me_z = me / npey_fft;
nxlo_fft = 0;
nxhi_fft = nx_pppm - 1;
nylo_fft = me_y*ny_pppm/npey_fft;
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
nzlo_fft = me_z*nz_pppm/npez_fft;
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
// estimate Kspace force error
double df_kspace = compute_df_kspace();
count++;
// break loop if the accuracy has been reached or
// too many loops have been performed
count++;
if (df_kspace <= accuracy) break;
if (count > 500) error->all(FLERR, "Could not compute grid size");
h *= 0.95;
h_x = h_y = h_z = h;
@ -1163,7 +1134,11 @@ double PPPM::compute_df_kspace()
double PPPM::compute_qopt()
{
double qopt = 0.0;
int k,l,m,nx,ny,nz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double u1,u2,sqk;
double sum1,sum2,sum3,sum4,dot2;
double *prd = domain->prd;
const double xprd = prd[0];
@ -1176,30 +1151,30 @@ double PPPM::compute_qopt()
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double u1, u2, sqk;
double sum1,sum2,sum3,sum4,dot2;
int k,l,m,nx,ny,nz;
const int twoorder = 2*order;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
// loop over entire FFT grid
// each proc calculates contributions from every Pth grid point
bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
int nxy_pppm = nx_pppm * ny_pppm;
double qopt = 0.0;
for (bigint i = me; i < ngridtotal; i += nprocs) {
k = i % nx_pppm;
l = (i/nx_pppm) % ny_pppm;
m = i / nxy_pppm;
const int kper = k - nx_pppm*(2*k/nx_pppm);
const int lper = l - ny_pppm*(2*l/ny_pppm);
const int mper = m - nz_pppm*(2*m/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
const int lper = l - ny_pppm*(2*l/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
const int kper = k - nx_pppm*(2*k/nx_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk == 0.0) continue;
if (sqk != 0.0) {
sum1 = sum2 = sum3 = sum4 = 0.0;
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
sum4 = 0.0;
for (nx = -2; nx <= 2; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
@ -1224,6 +1199,7 @@ double PPPM::compute_qopt()
dot2 = qx+qy+qz;
u1 = sx*sy*sz;
u2 = wx*wy*wz;
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
sum2 += u1 * u2 * MY_4PI;
sum3 += u2;
@ -1231,12 +1207,13 @@ double PPPM::compute_qopt()
}
}
}
sum2 *= sum2;
qopt += sum1 - sum2/(sum3*sum4);
}
}
}
}
// sum qopt over all procs
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
@ -1349,7 +1326,9 @@ void PPPM::set_grid_local()
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
// global PPPM grid that I own without ghost cells
// for slab PPPM, assign z grid as if it were not extended
// both non-tiled and tiled proc layouts use 0-1 fractional sumdomain info
if (comm->layout != Comm::LAYOUT_TILED) {
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
@ -1361,6 +1340,17 @@ void PPPM::set_grid_local()
nzhi_in = static_cast<int>
(comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
} else {
nxlo_in = static_cast<int> (comm->mysplit[0][0] * nx_pppm);
nxhi_in = static_cast<int> (comm->mysplit[0][1] * nx_pppm) - 1;
nylo_in = static_cast<int> (comm->mysplit[1][0] * ny_pppm);
nyhi_in = static_cast<int> (comm->mysplit[1][1] * ny_pppm) - 1;
nzlo_in = static_cast<int> (comm->mysplit[2][0] * nz_pppm/slab_volfactor);
nzhi_in = static_cast<int> (comm->mysplit[2][1] * nz_pppm/slab_volfactor) - 1;
}
// nlower,nupper = stencil size for mapping particles to PPPM grid
nlower = -(order-1)/2;
@ -1446,22 +1436,26 @@ void PPPM::set_grid_local()
// -z proc, but not vice versa
// this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
// also insure no other procs use ghost cells beyond +z limit
// differnet logic for non-tiled vs tiled decomposition
if (slabflag == 1) {
if (comm->myloc[2] == comm->procgrid[2]-1)
nzhi_in = nzhi_out = nz_pppm - 1;
if (comm->layout != Comm::LAYOUT_TILED) {
if (comm->myloc[2] == comm->procgrid[2]-1) nzhi_in = nzhi_out = nz_pppm - 1;
} else {
if (comm->mysplit[2][1] == 1.0) nzhi_in = nzhi_out = nz_pppm - 1;
}
nzhi_out = MIN(nzhi_out,nz_pppm-1);
}
// decomposition of FFT mesh
// x-pencil decomposition of FFT mesh
// global indices range from 0 to N-1
// proc owns entire x-dimension, clumps of columns in y,z dimensions
// each proc owns entire x-dimension, clumps of columns in y,z dimensions
// npey_fft,npez_fft = # of procs in y,z dims
// if nprocs is small enough, proc can own 1 or more entire xy planes,
// else proc owns 2d sub-blocks of yz plane
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
// nlo_fft,nhi_fft = lower/upper limit of the section
// of the global FFT mesh that I own
// of the global FFT mesh that I own in x-pencil decomposition
int npey_fft,npez_fft;
if (nz_pppm >= nprocs) {
@ -1479,13 +1473,13 @@ void PPPM::set_grid_local()
nzlo_fft = me_z*nz_pppm/npez_fft;
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
// PPPM grid pts owned by this proc, including ghosts
// ngrid = count of PPPM grid pts owned by this proc, including ghosts
ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
// FFT grids owned by this proc, without ghosts
// nfft = FFT points in FFT decomposition on this proc
// count of FFT grids pts owned by this proc, without ghosts
// nfft = FFT points in x-pencil FFT decomposition on this proc
// nfft_brick = FFT points in 3d brick-decomposition on this proc
// nfft_both = greater of 2 values
@ -2637,8 +2631,10 @@ void PPPM::fieldforce_peratom()
pack own values to buf to send to another proc
------------------------------------------------------------------------- */
void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == FORWARD_IK) {
@ -2695,8 +2691,10 @@ void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == FORWARD_IK) {
@ -2753,8 +2751,10 @@ void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
if (flag == REVERSE_RHO) {
FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
@ -2766,8 +2766,10 @@ void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
if (flag == REVERSE_RHO) {
FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (int i = 0; i < nlist; i++)
@ -3056,6 +3058,7 @@ int PPPM::timing_3d(int n, double &time3d)
double PPPM::memory_usage()
{
double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
if (differentiation_flag == 1) {
@ -3063,6 +3066,7 @@ double PPPM::memory_usage()
} else {
bytes += 4 * nbrick * sizeof(FFT_SCALAR);
}
if (triclinic) bytes += 3 * nfft_both * sizeof(double);
bytes += 6 * nfft_both * sizeof(double);
bytes += nfft_both * sizeof(double);
@ -3076,7 +3080,9 @@ double PPPM::memory_usage()
bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
}
if (cg) bytes += cg->memory_usage();
// two GridComm bufs
bytes += (ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
return bytes;
}
@ -3134,7 +3140,8 @@ void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
density_brick = density_A_brick;
density_fft = density_A_fft;
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// group B
@ -3142,7 +3149,8 @@ void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
density_brick = density_B_brick;
density_fft = density_B_fft;
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// switch back pointers
@ -3436,7 +3444,7 @@ void PPPM::poisson_groups_triclinic()
}
/* ----------------------------------------------------------------------
Slab-geometry correction term to dampen inter-slab interactions between
slab-geometry correction term to dampen inter-slab interactions between
periodically repeating slabs. Yields good approximation to 2D Ewald if
adequate empty space is left between repeating slabs (J. Chem. Phys.
111, 3155). Slabs defined here to be parallel to the xy plane. Also

View File

@ -96,17 +96,21 @@ class PPPM : public KSpace {
double sf_coeff[6]; // coefficients for calculating ad self-forces
double **acons;
// FFTs and grid communication
class FFT3d *fft1,*fft2;
class Remap *remap;
class GridComm *gc;
FFT_SCALAR *gc_buf1,*gc_buf2;
int ngc_buf1,ngc_buf2,npergrid;
// group-group interactions
int group_allocate_flag;
FFT_SCALAR ***density_A_brick,***density_B_brick;
FFT_SCALAR *density_A_fft,*density_B_fft;
class FFT3d *fft1,*fft2;
class Remap *remap;
class GridComm *cg;
class GridComm *cg_peratom;
int **part2grid; // storage for particle -> grid mapping
int nmax;
@ -160,10 +164,10 @@ class PPPM : public KSpace {
// grid communication
virtual void pack_forward(int, FFT_SCALAR *, int, int *);
virtual void unpack_forward(int, FFT_SCALAR *, int, int *);
virtual void pack_reverse(int, FFT_SCALAR *, int, int *);
virtual void unpack_reverse(int, FFT_SCALAR *, int, int *);
virtual void pack_forward_grid(int, void *, int, int *);
virtual void unpack_forward_grid(int, void *, int, int *);
virtual void pack_reverse_grid(int, void *, int, int *);
virtual void unpack_reverse_grid(int, void *, int, int *);
// triclinic

View File

@ -90,11 +90,7 @@ void PPPMCG::compute(int eflag, int vflag)
ev_init(eflag,vflag);
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
// if atom count has changed, update qsum and qsqsum
@ -162,6 +158,7 @@ void PPPMCG::compute(int eflag, int vflag)
}
// only need to rebuild this list after a neighbor list update
if (neighbor->ago == 0) {
num_charged = 0;
for (int i = 0; i < atom->nlocal; ++i) {
@ -182,7 +179,8 @@ void PPPMCG::compute(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// compute potential gradient on my FFT grid and
@ -195,16 +193,22 @@ void PPPMCG::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
if (differentiation_flag == 1)
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
}
// calculate the force on my particles

View File

@ -79,8 +79,7 @@ PPPMDipole::PPPMDipole(LAMMPS *lmp) : PPPM(lmp),
dipoleflag = 1;
group_group_enable = 0;
cg_dipole = NULL;
cg_peratom_dipole = NULL;
gc_dipole = NULL;
}
/* ----------------------------------------------------------------------
@ -93,10 +92,6 @@ PPPMDipole::~PPPMDipole()
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
fft1 = NULL;
fft2 = NULL;
remap = NULL;
cg_dipole = NULL;
}
/* ----------------------------------------------------------------------
@ -195,9 +190,7 @@ void PPPMDipole::init()
// or overlap is allowed, then done
// else reduce order and try again
int (*procneigh)[2] = comm->procneigh;
GridComm *cgtmp = NULL;
GridComm *gctmp = NULL;
int iteration = 0;
while (order >= minorder) {
@ -210,24 +203,24 @@ void PPPMDipole::init()
set_grid_local();
if (overlap_allowed) break;
cgtmp = new GridComm(lmp,world,1,1,
gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
int tmp1,tmp2;
gctmp->setup(tmp1,tmp2);
if (gctmp->ghost_adjacent()) break;
delete gctmp;
order--;
iteration++;
}
if (order < minorder) error->all(FLERR,"PPPMDipole order < minimum allowed order");
if (!overlap_allowed && cgtmp->ghost_overlap())
if (!overlap_allowed && !gctmp->ghost_adjacent())
error->all(FLERR,"PPPMDipole grid stencil extends "
"beyond nearest neighbor processor");
if (cgtmp) delete cgtmp;
if (gctmp) delete gctmp;
// adjust g_ewald
@ -261,8 +254,6 @@ void PPPMDipole::init()
// don't invoke allocate peratom(), will be allocated when needed
allocate();
cg_dipole->ghost_notify();
cg_dipole->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
@ -385,11 +376,9 @@ void PPPMDipole::setup_grid()
allocate();
cg_dipole->ghost_notify();
if (overlap_allowed == 0 && cg_dipole->ghost_overlap())
if (!overlap_allowed && !gc_dipole->ghost_adjacent())
error->all(FLERR,"PPPMDipole grid stencil extends "
"beyond nearest neighbor processor");
cg_dipole->setup();
// pre-compute Green's function denomiator expansion
// pre-compute 1d charge distribution coefficients
@ -421,11 +410,7 @@ void PPPMDipole::compute(int eflag, int vflag)
error->all(FLERR,"Cannot (yet) compute per-atom virial "
"with kspace style pppm/dipole");
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_dipole->ghost_notify();
cg_peratom_dipole->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
// if atom count has changed, update qsum and qsqsum
@ -460,7 +445,8 @@ void PPPMDipole::compute(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg_dipole->reverse_comm(this,REVERSE_MU);
gc_dipole->reverse_comm_kspace(this,3,sizeof(FFT_SCALAR),REVERSE_MU,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft_dipole();
// compute potential gradient on my FFT grid and
@ -473,13 +459,14 @@ void PPPMDipole::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
cg_dipole->forward_comm(this,FORWARD_MU);
gc_dipole->forward_comm_kspace(this,9,sizeof(FFT_SCALAR),FORWARD_MU,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
cg_peratom_dipole->forward_comm(this,FORWARD_MU_PERATOM);
}
if (evflag_atom)
gc->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_MU_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// calculate the force on my particles
@ -522,7 +509,8 @@ void PPPMDipole::compute(int eflag, int vflag)
if (eflag_atom) {
for (i = 0; i < nlocal; i++) {
eatom[i] *= 0.5;
eatom[i] -= (mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] + mu[i][2]*mu[i][2])*2.0*g3/3.0/MY_PIS;
eatom[i] -= (mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] +
mu[i][2]*mu[i][2])*2.0*g3/3.0/MY_PIS;
eatom[i] *= qscale;
}
}
@ -619,14 +607,18 @@ void PPPMDipole::allocate()
1,0,0,FFT_PRECISION,collective_flag);
// create ghost grid object for rho and electric field communication
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods
int (*procneigh)[2] = comm->procneigh;
cg_dipole = new GridComm(lmp,world,9,3,
gc_dipole = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
gc->setup(ngc_buf1,ngc_buf2);
npergrid = 9;
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
}
/* ----------------------------------------------------------------------
@ -674,7 +666,9 @@ void PPPMDipole::deallocate()
delete fft1;
delete fft2;
delete remap;
delete cg_dipole;
delete gc_dipole;
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
}
/* ----------------------------------------------------------------------
@ -724,16 +718,15 @@ void PPPMDipole::allocate_peratom()
memory->create3d_offset(v5z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm_dipole:v5z_brick_dipole");
// create ghost grid object for rho and electric field communication
// use same GC ghost grid object for peratom grid communication
// but need to reallocate a larger gc_buf1 and gc_buf2
int (*procneigh)[2] = comm->procneigh;
npergrid = 18;
cg_peratom_dipole =
new GridComm(lmp,world,18,1,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
memory->destroy(gc_buf1);
memory->destroy(gc_buf2);
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
}
/* ----------------------------------------------------------------------
@ -764,8 +757,6 @@ void PPPMDipole::deallocate_peratom()
memory->destroy3d_offset(v3z_brick_dipole,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v4z_brick_dipole,nzlo_out,nylo_out,nxlo_out);
memory->destroy3d_offset(v5z_brick_dipole,nzlo_out,nylo_out,nxlo_out);
delete cg_peratom_dipole;
}
/* ----------------------------------------------------------------------
@ -2171,8 +2162,10 @@ void PPPMDipole::fieldforce_peratom_dipole()
pack own values to buf to send to another proc
------------------------------------------------------------------------- */
void PPPMDipole::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMDipole::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == FORWARD_MU) {
@ -2242,8 +2235,10 @@ void PPPMDipole::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
unpack another proc's own values from buf and set own ghost values
------------------------------------------------------------------------- */
void PPPMDipole::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMDipole::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == FORWARD_MU) {
@ -2313,8 +2308,10 @@ void PPPMDipole::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
pack ghost values into buf to send to another proc
------------------------------------------------------------------------- */
void PPPMDipole::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMDipole::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == REVERSE_MU) {
FFT_SCALAR *src_dipole0 = &densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out];
@ -2332,8 +2329,10 @@ void PPPMDipole::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
unpack another proc's ghost values from buf and add to own values
------------------------------------------------------------------------- */
void PPPMDipole::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
void PPPMDipole::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
{
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
int n = 0;
if (flag == REVERSE_MU) {
FFT_SCALAR *dest_dipole0 = &densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out];
@ -2484,6 +2483,7 @@ int PPPMDipole::timing_3d(int n, double &time3d)
double PPPMDipole::memory_usage()
{
double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
bytes += 6 * nfft_both * sizeof(double); // vg
@ -2495,8 +2495,9 @@ double PPPMDipole::memory_usage()
if (peratom_allocate_flag)
bytes += 21 * nbrick * sizeof(FFT_SCALAR);
if (cg_dipole) bytes += cg_dipole->memory_usage();
if (cg_peratom_dipole) bytes += cg_peratom_dipole->memory_usage();
// two GridComm bufs
bytes += (ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
return bytes;
}

View File

@ -50,10 +50,10 @@ class PPPMDipole : public PPPM {
// grid communication
void pack_forward(int, FFT_SCALAR *, int, int *);
void unpack_forward(int, FFT_SCALAR *, int, int *);
void pack_reverse(int, FFT_SCALAR *, int, int *);
void unpack_reverse(int, FFT_SCALAR *, int, int *);
void pack_forward_grid(int, void *, int, int *);
void unpack_forward_grid(int, void *, int, int *);
void pack_reverse_grid(int, void *, int, int *);
void unpack_reverse_grid(int, void *, int, int *);
// dipole
@ -69,10 +69,12 @@ class PPPMDipole : public PPPM {
FFT_SCALAR ***v3z_brick_dipole,***v4z_brick_dipole,***v5z_brick_dipole;
FFT_SCALAR *work3,*work4;
FFT_SCALAR *densityx_fft_dipole,*densityy_fft_dipole,*densityz_fft_dipole;
class GridComm *cg_dipole;
class GridComm *cg_peratom_dipole;
class GridComm *gc_dipole;
int only_dipole_flag;
double musum,musqsum,mu2;
double find_gewald_dipole(double, double, bigint, double, double);
double newton_raphson_f_dipole(double, double, bigint, double, double);
double derivf_dipole(double, double, bigint, double, double);
@ -87,7 +89,6 @@ class PPPMDipole : public PPPM {
void fieldforce_peratom_dipole();
double final_accuracy_dipole();
void musum_musq();
};
}

View File

@ -81,7 +81,7 @@ PPPMDipoleSpin::~PPPMDipoleSpin()
fft1 = NULL;
fft2 = NULL;
remap = NULL;
cg_dipole = NULL;
gc_dipole = NULL;
}
/* ----------------------------------------------------------------------
@ -175,9 +175,7 @@ void PPPMDipoleSpin::init()
// or overlap is allowed, then done
// else reduce order and try again
int (*procneigh)[2] = comm->procneigh;
GridComm *cgtmp = NULL;
GridComm *gctmp = NULL;
int iteration = 0;
while (order >= minorder) {
@ -190,24 +188,24 @@ void PPPMDipoleSpin::init()
set_grid_local();
if (overlap_allowed) break;
cgtmp = new GridComm(lmp,world,1,1,
gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
procneigh[0][0],procneigh[0][1],procneigh[1][0],
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
cgtmp->ghost_notify();
if (!cgtmp->ghost_overlap()) break;
delete cgtmp;
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
int tmp1,tmp2;
gctmp->setup(tmp1,tmp2);
if (gctmp->ghost_adjacent()) break;
delete gctmp;
order--;
iteration++;
}
if (order < minorder) error->all(FLERR,"PPPMDipoleSpin order < minimum allowed order");
if (!overlap_allowed && cgtmp->ghost_overlap())
if (!overlap_allowed && !gctmp->ghost_adjacent())
error->all(FLERR,"PPPMDipoleSpin grid stencil extends "
"beyond nearest neighbor processor");
if (cgtmp) delete cgtmp;
if (gctmp) delete gctmp;
// adjust g_ewald
@ -241,8 +239,6 @@ void PPPMDipoleSpin::init()
// don't invoke allocate peratom(), will be allocated when needed
allocate();
cg_dipole->ghost_notify();
cg_dipole->setup();
// pre-compute Green's function denominator expansion
// pre-compute 1d charge distribution coefficients
@ -270,11 +266,7 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
error->all(FLERR,"Cannot (yet) compute per-atom virial "
"with kspace style pppm/dipole/spin");
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_dipole->ghost_notify();
cg_peratom_dipole->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
// if atom count has changed, update qsum and qsqsum
@ -309,7 +301,8 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg_dipole->reverse_comm(this,REVERSE_MU);
gc_dipole->reverse_comm_kspace(this,3,sizeof(FFT_SCALAR),REVERSE_MU,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft_dipole();
// compute potential gradient on my FFT grid and
@ -322,13 +315,14 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
cg_dipole->forward_comm(this,FORWARD_MU);
gc_dipole->forward_comm_kspace(this,9,sizeof(FFT_SCALAR),FORWARD_MU,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
cg_peratom_dipole->forward_comm(this,FORWARD_MU_PERATOM);
}
if (evflag_atom)
gc->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_MU_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// calculate the force on my particles

View File

@ -46,7 +46,6 @@ class PPPMDipoleSpin : public PPPMDipole {
void fieldforce_ik_spin();
void fieldforce_peratom_spin();
void spsum_spsq();
};
}

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,6 @@ typedef double FFT_SCALAR;
namespace LAMMPS_NS {
#define EWALD_MAXORDER 6
#define EWALD_FUNCS 4
@ -190,15 +189,14 @@ Variables needed for calculating the 1/r and 1/r^6 potential
FFT_SCALAR *work1,*work2;
FFT_SCALAR *work1_6, *work2_6;
class FFT3d *fft1,*fft2 ;
class FFT3d *fft1_6, *fft2_6;
class Remap *remap;
class Remap *remap_6;
class GridComm *cg;
class GridComm *cg_peratom;
class GridComm *cg_6;
class GridComm *cg_peratom_6;
class FFT3d *fft1_6,*fft2_6;
class Remap *remap,*remap_6;
class GridComm *gc,*gc6;
FFT_SCALAR *gc_buf1,*gc_buf2,*gc6_buf1,*gc6_buf2;
int ngc_buf1,ngc_buf2,npergrid;
int ngc6_buf1,ngc6_buf2,npergrid6;
int **part2grid; // storage for particle -> grid mapping
int **part2grid_6;
@ -257,7 +255,6 @@ Variables needed for calculating the 1/r and 1/r^6 potential
void compute_gf_denom(double*, int);
double gf_denom(double, double, double, double*, int);
void compute_sf_precoeff(int, int, int, int,
int, int, int,
int, int, int,
@ -268,7 +265,6 @@ Variables needed for calculating the 1/r and 1/r^6 potential
void compute_gf_6();
void compute_sf_coeff_6();
virtual void particle_map(double, double, double,
double, int **, int, int,
int, int, int,
@ -295,8 +291,10 @@ Variables needed for calculating the 1/r and 1/r^6 potential
int, int, int, double&, double *,
double *, double *, double *,
double *, double *, double *,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, double *, double **, double **,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, double *, double **, double **,
FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
virtual void poisson_ad(FFT_SCALAR*, FFT_SCALAR*,
@ -317,14 +315,18 @@ Variables needed for calculating the 1/r and 1/r^6 potential
virtual void poisson_2s_ik(FFT_SCALAR *, FFT_SCALAR *,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
virtual void poisson_2s_ad(FFT_SCALAR *, FFT_SCALAR *,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
virtual void poisson_2s_peratom(FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
@ -339,9 +341,11 @@ Variables needed for calculating the 1/r and 1/r^6 potential
virtual void poisson_none_ik(int, int, FFT_SCALAR *, FFT_SCALAR *,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****,
FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****,
FFT_SCALAR ****,
FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****);
virtual void poisson_none_peratom(int, int, FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
virtual void poisson_none_peratom(int, int,
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***);
@ -369,10 +373,10 @@ Variables needed for calculating the 1/r and 1/r^6 potential
// grid communication
void pack_forward(int, FFT_SCALAR *, int, int *);
void unpack_forward(int, FFT_SCALAR *, int, int *);
void pack_reverse(int, FFT_SCALAR *, int, int *);
void unpack_reverse(int, FFT_SCALAR *, int, int *);
void pack_forward_grid(int, void *, int, int *);
void unpack_forward_grid(int, void *, int, int *);
void pack_reverse_grid(int, void *, int, int *);
void unpack_reverse_grid(int, void *, int, int *);
};
}

View File

@ -123,11 +123,7 @@ void PPPMStagger::compute(int eflag, int vflag)
ev_init(eflag,vflag);
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
// convert atoms from box to lamda coords
@ -160,7 +156,8 @@ void PPPMStagger::compute(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// compute potential gradient on my FFT grid and
@ -173,16 +170,22 @@ void PPPMStagger::compute(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
if (differentiation_flag == 1)
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
}
// calculate the force on my particles
@ -268,10 +271,16 @@ void PPPMStagger::compute(int eflag, int vflag)
double PPPMStagger::compute_qopt()
{
if (differentiation_flag == 1)
return compute_qopt_ad();
if (differentiation_flag == 1) return compute_qopt_ad();
int k,l,m,nx,ny,nz;
double snx,sny,snz;
double cnx,cny,cnz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2,dot1,dot2;
double numerator,denominator;
double u1,u2,u3,sqk;
double qopt = 0.0;
const double * const prd = domain->prd;
const double xprd = prd[0];
@ -282,43 +291,43 @@ double PPPMStagger::compute_qopt()
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double snx,sny,snz;
double cnx,cny,cnz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2,dot1,dot2;
double numerator,denominator;
double u1,u2,u3,sqk;
int k,l,m,nx,ny,nz,kper,lper,mper;
const int nbx = 2;
const int nby = 2;
const int nbz = 2;
const int twoorder = 2*order;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
// loop over entire FFT grid
// each proc calculates contributions from every Pth grid point
bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
int nxy_pppm = nx_pppm * ny_pppm;
double qopt = 0.0;
for (bigint i = me; i < ngridtotal; i += nprocs) {
k = i % nx_pppm;
l = (i/nx_pppm) % ny_pppm;
m = i / nxy_pppm;
const int kper = k - nx_pppm*(2*k/nx_pppm);
const int lper = l - ny_pppm*(2*l/ny_pppm);
const int mper = m - nz_pppm*(2*m/nz_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk == 0.0) continue;
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
cny = cos(0.5*unitky*lper*yprd/ny_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm);
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
cny = cos(0.5*unitky*lper*yprd/ny_pppm);
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
sum1 = 0.0;
sum2 = 0.0;
sum1 = sum2 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
@ -348,11 +357,10 @@ double PPPMStagger::compute_qopt()
}
}
}
qopt += sum1 - sum2/denominator;
}
}
}
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;
@ -364,7 +372,11 @@ double PPPMStagger::compute_qopt()
double PPPMStagger::compute_qopt_ad()
{
double qopt = 0.0;
int k,l,m,nx,ny,nz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
double u1,u2,sqk;
const double * const prd = domain->prd;
const double xprd = prd[0];
@ -375,36 +387,33 @@ double PPPMStagger::compute_qopt_ad()
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
double u1,u2,sqk;
int k,l,m,nx,ny,nz,kper,lper,mper;
const int nbx = 2;
const int nby = 2;
const int nbz = 2;
const int twoorder = 2*order;
for (m = nzlo_fft; m <= nzhi_fft; m++) {
mper = m - nz_pppm*(2*m/nz_pppm);
// loop over entire FFT grid
// each proc calculates contributions from every Pth grid point
for (l = nylo_fft; l <= nyhi_fft; l++) {
lper = l - ny_pppm*(2*l/ny_pppm);
bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
int nxy_pppm = nx_pppm * ny_pppm;
for (k = nxlo_fft; k <= nxhi_fft; k++) {
kper = k - nx_pppm*(2*k/nx_pppm);
double qopt = 0.0;
for (bigint i = me; i < ngridtotal; i += nprocs) {
k = i % nx_pppm;
l = (i/nx_pppm) % ny_pppm;
m = i / nxy_pppm;
const int kper = k - nx_pppm*(2*k/nx_pppm);
const int lper = l - ny_pppm*(2*l/ny_pppm);
const int mper = m - nz_pppm*(2*m/nz_pppm);
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk == 0.0) continue;
if (sqk != 0.0) {
sum1 = 0.0;
sum2 = 0.0;
sum3 = 0.0;
sum4 = 0.0;
sum5 = 0.0;
sum6 = 0.0;
sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
@ -436,11 +445,10 @@ double PPPMStagger::compute_qopt_ad()
}
}
}
qopt += sum1 - sum2/(0.5*(sum3*sum4 + sum5*sum6));
}
}
}
}
double qopt_all;
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
return qopt_all;

View File

@ -50,7 +50,6 @@ class PPPMStagger : public PPPM {
virtual void fieldforce_ad();
virtual void fieldforce_peratom();
inline double gf_denom2(const double &x, const double &y,
const double &z) const
{

View File

@ -131,9 +131,10 @@ void PPPMDispIntel::init()
// For vectorization, we need some padding in the end
// The first thread computes on the global density
if ((comm->nthreads > 1) && !_use_lrt) {
int mygrid = MAX(ngrid,ngrid_6);
memory->destroy(perthread_density);
memory->create(perthread_density, comm->nthreads-1,
ngrid + INTEL_P3M_ALIGNED_MAXORDER,
mygrid + INTEL_P3M_ALIGNED_MAXORDER,
"pppmdispintel:perthread_density");
}
@ -173,23 +174,15 @@ void PPPMDispIntel::compute(int eflag, int vflag)
return;
}
#endif
int i;
// convert atoms from box to lamda coords
ev_init(eflag,vflag);
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
if (function[0]) {
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (function[1] + function[2] + function[3]) {
cg_peratom_6->ghost_notify();
cg_peratom_6->setup();
}
peratom_allocate_flag = 1;
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
@ -299,7 +292,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
make_rho_c<float,float>(fix->get_single_buffers());
}
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
density_brick, density_fft, work1,remap);
@ -312,7 +306,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
energy_1, greensfn, virial_1, vg,vg2, u_brick, v0_brick,
v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
cg->forward_comm(this,FORWARD_AD);
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_c_ad<float,double>(fix->get_mixed_buffers());
@ -322,7 +317,9 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_c_ad<float,float>(fix->get_single_buffers());
}
if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
if (vflag_atom)
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
} else {
poisson_ik(work1, work2, density_fft, fft1, fft2,
@ -334,7 +331,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick,
v5_brick);
cg->forward_comm(this, FORWARD_IK);
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_c_ik<float,double>(fix->get_mixed_buffers());
@ -344,12 +342,15 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_c_ik<float,float>(fix->get_single_buffers());
}
if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
if (evflag_atom)
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
}
if (evflag_atom) fieldforce_c_peratom();
}
if (function[1]) {
//perform calculations for geometric mixing
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
@ -375,14 +376,13 @@ void PPPMDispIntel::compute(int eflag, int vflag)
make_rho_g<float,float>(fix->get_single_buffers());
}
cg_6->reverse_comm(this, REVERSE_RHO_G);
gc6->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_G,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
density_brick_g, density_fft_g, work1_6,remap_6);
if (differentiation_flag == 1) {
poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6,
@ -391,7 +391,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
virial_6, vg_6, vg2_6, u_brick_g, v0_brick_g, v1_brick_g,
v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
cg_6->forward_comm(this,FORWARD_AD_G);
gc6->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD_G,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_g_ad<float,double>(fix->get_mixed_buffers());
@ -401,7 +402,9 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_g_ad<float,float>(fix->get_single_buffers());
}
if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
if (vflag_atom)
gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_G,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
} else {
poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
@ -413,7 +416,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
vdz_brick_g, virial_6, vg_6, vg2_6, u_brick_g, v0_brick_g,
v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
cg_6->forward_comm(this,FORWARD_IK_G);
gc6->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK_G,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_g_ik<float,double>(fix->get_mixed_buffers());
@ -423,9 +427,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_g_ik<float,float>(fix->get_single_buffers());
}
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
if (evflag_atom)
gc6->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_G,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
}
if (evflag_atom) fieldforce_g_peratom();
}
@ -455,12 +461,12 @@ void PPPMDispIntel::compute(int eflag, int vflag)
make_rho_a<float,float>(fix->get_single_buffers());
}
cg_6->reverse_comm(this, REVERSE_RHO_A);
gc->reverse_comm_kspace(this,7,sizeof(FFT_SCALAR),REVERSE_RHO_A,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft_a();
if ( differentiation_flag == 1) {
if (differentiation_flag == 1) {
poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6,
nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
@ -481,7 +487,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
v5_brick_a2, u_brick_a4, v0_brick_a4, v1_brick_a4,
v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
cg_6->forward_comm(this, FORWARD_AD_A);
gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_AD_A,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_a_ad<float,double>(fix->get_mixed_buffers());
@ -491,10 +498,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_a_ad<float,float>(fix->get_single_buffers());
}
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
if (evflag_atom)
gc6->forward_comm_kspace(this,42,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_A,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
} else {
poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6,
nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
@ -522,7 +530,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4,
v3_brick_a4, v4_brick_a4, v5_brick_a4);
cg_6->forward_comm(this, FORWARD_IK_A);
gc6->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_IK_A,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_a_ik<float,double>(fix->get_mixed_buffers());
@ -532,13 +541,17 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_a_ik<float,float>(fix->get_single_buffers());
}
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
if (evflag_atom)
gc6->forward_comm_kspace(this,49,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_A,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
}
if (evflag_atom) fieldforce_a_peratom();
}
if (function[3]) {
//perform calculations if no mixing rule applies
// perform calculations if no mixing rule applies
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
@ -563,7 +576,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
make_rho_none<float,float>(fix->get_single_buffers());
}
cg_6->reverse_comm(this, REVERSE_RHO_NONE);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_NONE,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft_none();
@ -578,7 +592,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
n += 2;
}
cg_6->forward_comm(this,FORWARD_AD_NONE);
gc6->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD_NONE,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_none_ad<float,double>(fix->get_mixed_buffers());
@ -588,7 +603,9 @@ void PPPMDispIntel::compute(int eflag, int vflag)
fieldforce_none_ad<float,float>(fix->get_single_buffers());
}
if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
if (vflag_atom)
gc6->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_NONE,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
} else {
int n = 0;
@ -604,7 +621,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
n += 2;
}
cg_6->forward_comm(this,FORWARD_IK_NONE);
gc6->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK_NONE,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
fieldforce_none_ik<float,double>(fix->get_mixed_buffers());
@ -615,8 +633,10 @@ void PPPMDispIntel::compute(int eflag, int vflag)
}
if (evflag_atom)
cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_NONE,
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
}
if (evflag_atom) fieldforce_none_peratom();
}
@ -655,7 +675,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
if (function[1]+function[2]+function[3]){
if (function[1]+function[2]+function[3]) {
double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
virial[0] -= a;
virial[1] -= a;
@ -2964,10 +2984,10 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
void PPPMDispIntel::precompute_rho()
{
half_rho_scale = (rho_points - 1.)/2.;
half_rho_scale_plus = half_rho_scale + 0.5;
if (function[0]) {
for (int i = 0; i < rho_points; i++) {
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
#if defined(LMP_SIMD_COMPILER)
@ -2999,6 +3019,9 @@ void PPPMDispIntel::precompute_rho()
}
}
}
}
if (function[1]+function[2]+function[3]) {
for (int i = 0; i < rho_points; i++) {
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
#if defined(LMP_SIMD_COMPILER)
@ -3030,6 +3053,7 @@ void PPPMDispIntel::precompute_rho()
}
}
}
}
}
/* ----------------------------------------------------------------------

View File

@ -70,8 +70,6 @@ namespace LAMMPS_NS {
FFT_SCALAR *particle_eky6;
FFT_SCALAR *particle_ekz6;
int _use_table;
int rho_points;
FFT_SCALAR **rho_lookup;
@ -82,7 +80,6 @@ namespace LAMMPS_NS {
int _use_packing;
#ifdef _LMP_INTEL_OFFLOAD
int _use_base;
#endif

View File

@ -164,11 +164,7 @@ void PPPMIntel::compute_first(int eflag, int vflag)
ev_init(eflag,vflag);
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
// if atom count has changed, update qsum and qsqsum
@ -232,7 +228,8 @@ void PPPMIntel::compute_first(int eflag, int vflag)
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg->reverse_comm(this,REVERSE_RHO);
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
brick2fft();
// compute potential gradient on my FFT grid and
@ -246,16 +243,22 @@ void PPPMIntel::compute_first(int eflag, int vflag)
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
if (differentiation_flag == 1)
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
// extra per-atom energy/virial communication
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
}
}

View File

@ -95,17 +95,7 @@ void MSMCGOMP::compute(int eflag, int vflag)
// invoke allocate_peratom() if needed for first time
if (vflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom_all->ghost_notify();
cg_peratom_all->setup();
for (int n=0; n<levels; n++) {
if (!active_flag[n]) continue;
cg_peratom[n]->ghost_notify();
cg_peratom[n]->setup();
}
peratom_allocate_flag = 1;
}
if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
// extend size of per-atom arrays if necessary
@ -175,7 +165,8 @@ void MSMCGOMP::compute(int eflag, int vflag)
// to fully sum contribution in their 3d grid
current_level = 0;
cg_all->reverse_comm(this,REVERSE_RHO);
gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// forward communicate charge density values to fill ghost grid points
// compute direct sum interaction and then restrict to coarser grid
@ -183,24 +174,30 @@ void MSMCGOMP::compute(int eflag, int vflag)
for (int n=0; n<=levels-2; n++) {
if (!active_flag[n]) continue;
current_level = n;
cg[n]->forward_comm(this,FORWARD_RHO);
gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
direct(n);
restriction(n);
}
// compute direct interaction for top grid level for non-periodic
// and for second from top grid level for periodic
if (active_flag[levels-1]) {
if (domain->nonperiodic) {
current_level = levels-1;
cg[levels-1]->forward_comm(this,FORWARD_RHO);
gc[levels-1]->
forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
direct_top(levels-1);
cg[levels-1]->reverse_comm(this,REVERSE_AD);
gc[levels-1]->
reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[levels-1]->
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
} else {
// Here using MPI_Allreduce is cheaper than using commgrid
grid_swap_forward(levels-1,qgrid[levels-1]);
@ -208,7 +205,9 @@ void MSMCGOMP::compute(int eflag, int vflag)
grid_swap_reverse(levels-1,egrid[levels-1]);
current_level = levels-1;
if (vflag_atom)
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[levels-1]->
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
}
}
@ -220,24 +219,28 @@ void MSMCGOMP::compute(int eflag, int vflag)
prolongation(n);
current_level = n;
cg[n]->reverse_comm(this,REVERSE_AD);
gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
// extra per-atom virial communication
if (vflag_atom)
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
}
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
current_level = 0;
cg_all->forward_comm(this,FORWARD_AD);
gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// extra per-atom energy/virial communication
if (vflag_atom)
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
gcall_buf1,gcall_buf2,MPI_DOUBLE);
// calculate the force on my particles (interpolation)
@ -556,6 +559,7 @@ void MSMCGOMP::fieldforce_peratom()
}
}
/* ---------------------------------------------------------------------- */
double MSMCGOMP::memory_usage()
{

View File

@ -387,20 +387,20 @@ void Balance::command(int narg, char **arg)
MPI_Wtime()-start_time);
mesg += fmt::format(" iteration count = {}\n",niter);
for (int i = 0; i < nimbalance; ++i) mesg += imbalances[i]->info();
mesg += fmt::format(" initial/final maximal load/proc = {} {}\n"
" initial/final imbalance factor = {:.6g} {:.6g}\n",
mesg += fmt::format(" initial/final maximal load/proc = {:.8} {:.8}\n"
" initial/final imbalance factor = {:.8} {:.8}\n",
maxinit,maxfinal,imbinit,imbfinal);
if (style != BISECTION) {
mesg += " x cuts:";
for (int i = 0; i <= comm->procgrid[0]; i++)
mesg += fmt::format(" {}",comm->xsplit[i]);
mesg += fmt::format(" {:.8}",comm->xsplit[i]);
mesg += "\n y cuts:";
for (int i = 0; i <= comm->procgrid[1]; i++)
mesg += fmt::format(" {}",comm->ysplit[i]);
mesg += fmt::format(" {:.8}",comm->ysplit[i]);
mesg += "\n z cuts:";
for (int i = 0; i <= comm->procgrid[2]; i++)
mesg += fmt::format(" {}",comm->zsplit[i]);
mesg += fmt::format(" {:.8}",comm->zsplit[i]);
mesg += "\n";
}

View File

@ -664,10 +664,6 @@ void Force::create_kspace(const std::string &style, int trysuffix)
int sflag;
kspace = new_kspace(style,trysuffix,sflag);
store_style(kspace_style,style,sflag);
if (comm->style == 1 && !kspace_match("ewald",0))
error->all(FLERR,
"Cannot yet use KSpace solver with grid with comm style tiled");
}
/* ----------------------------------------------------------------------

View File

@ -121,10 +121,10 @@ class KSpace : protected Pointers {
virtual void compute(int, int) = 0;
virtual void compute_group_group(int, int, int) {};
virtual void pack_forward(int, FFT_SCALAR *, int, int *) {};
virtual void unpack_forward(int, FFT_SCALAR *, int, int *) {};
virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {};
virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {};
virtual void pack_forward_grid(int, void *, int, int *) {};
virtual void unpack_forward_grid(int, void *, int, int *) {};
virtual void pack_reverse_grid(int, void *, int, int *) {};
virtual void unpack_reverse_grid(int, void *, int, int *) {};
virtual int timing(int, double &, double &) {return 0;}
virtual int timing_1d(int, double &) {return 0;}

View File

@ -0,0 +1,94 @@
---
lammps_version: 30 Jun 2020
date_generated: Sun Jul 12 19:14:30 202
epsilon: 7.5e-14
prerequisites: ! |
atom full
pair coul/long
kspace pppm/cg
pre_commands: ! ""
post_commands: ! |
set atom 22*23 charge 0.0
set atom 25*26 charge 0.0
set atom 28*29 charge 0.0
set type 5 charge 0.0
comm_style tiled
pair_modify compute no
kspace_style pppm/cg 1.0e-6
kspace_modify gewald 0.3
balance 0.0 rcb
input_file: in.fourmol
pair_style: coul/long 8.0
pair_coeff: ! |
* *
extract: ! ""
natoms: 29
init_vdwl: 0
init_coul: 0
init_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
init_forces: ! |2
1 -2.6277259148670029e-01 6.3371291789473080e-02 2.5711876152341273e-01
2 7.2367296933642605e-02 -2.6377287441660424e-01 -7.6051513821789138e-02
3 -2.0405952297641711e-02 -7.8572370620207509e-03 1.6901994166300244e-02
4 1.3913539877995293e-01 2.2591757798275315e-03 -4.0397816193944613e-02
5 7.8220840962476321e-02 5.8018673691041286e-02 -6.0298251586383647e-02
6 1.6642601835640428e-02 5.4813636460583826e-01 -5.9948263006137181e-01
7 7.4422159409041455e-02 -5.2976037824286260e-01 2.8745376504092163e-01
8 5.1715645698484136e-01 -8.4224353212928849e-01 3.9252943496496401e-01
9 -2.8698074414725644e-01 3.8846246184711297e-01 -8.8735165696061394e-02
10 -1.4119088048501882e-01 1.7101059256234674e-01 -2.1093128275588843e-02
11 -1.9802109162868770e-01 2.3799905238132363e-01 -6.9205308272233729e-02
12 5.9829442228003460e-01 -6.6570589694338889e-01 1.8052289946052447e-01
13 -1.4432183109305963e-01 2.1119613677613439e-01 -3.7953547334912435e-02
14 -2.1057539442776216e-01 1.9833568369952351e-01 -4.3863660550604380e-02
15 -1.8510184013940739e-01 1.4558620355697388e-01 -6.3736986322272177e-02
16 -8.7068540567994446e-01 8.0781838347116530e-01 7.4023620199055895e-01
17 6.8573068239024604e-01 -5.4365714223217854e-01 -9.1523962812961923e-01
18 9.0711753065407197e-01 1.6157336473023263e+00 -1.6708191809635238e+00
19 -3.2812987976759644e-01 -8.1654277506424433e-01 9.6628327138680947e-01
20 -4.4090177907687206e-01 -7.7838783137249912e-01 8.4583048867481425e-01
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_vdwl: 0
run_coul: 0
run_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_forces: ! |2
1 -2.5272870059014418e-01 7.7993693891669019e-02 2.7413192480528514e-01
2 6.0317817543279174e-02 -2.7726219497773258e-01 -9.2328466795041211e-02
3 -2.0381925802263216e-02 -7.4169903634883827e-03 1.7718770630259381e-02
4 1.4007637687058852e-01 6.0294233349724097e-04 -4.4118635982017075e-02
5 7.7661555892458781e-02 5.6660597072423394e-02 -6.2527393985284924e-02
6 1.8472776517392236e-02 5.4080910460464371e-01 -6.2683322637258831e-01
7 7.1172361943358894e-02 -5.3339808852688553e-01 3.1182624682757953e-01
8 5.1931682876258478e-01 -8.4009257264924786e-01 4.1437713723636871e-01
9 -2.8926485235033661e-01 3.8545218869697995e-01 -9.6445715585120301e-02
10 -1.4130393273230371e-01 1.7154601844601167e-01 -2.4319589810512892e-02
11 -1.9820651458435656e-01 2.3998861307221755e-01 -7.2850168561931314e-02
12 6.0180481293740029e-01 -6.6352386739919189e-01 1.9241766558675899e-01
13 -1.4512210035912929e-01 2.1001807770143230e-01 -4.0833849803347325e-02
14 -2.1221325118484802e-01 1.9911652392243312e-01 -4.7359593324180368e-02
15 -1.8584239277230100e-01 1.4355785962059972e-01 -6.8625732013099072e-02
16 -8.6818324744564379e-01 8.1248951063969599e-01 7.2669223824162077e-01
17 6.8330419262914510e-01 -5.4635175697697436e-01 -9.0599066050100585e-01
18 9.4795190549104413e-01 1.6607978960168899e+00 -1.6003547491287762e+00
19 -3.4101870572315529e-01 -8.2840317570845334e-01 9.3657904772766076e-01
20 -4.6581300504277079e-01 -8.0258437941651817e-01 8.0884475080737095e-01
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
...

View File

@ -0,0 +1,99 @@
---
lammps_version: 21 Jul 2020
date_generated: Thu Aug 20 23:05:29 202
epsilon: 2.5e-13
prerequisites: ! |
atom full
pair lj/long/coul/long
kspace pppm/disp
pre_commands: ! ""
post_commands: ! |
pair_modify compute no
kspace_style pppm/disp 1.0e-4
kspace_modify gewald 0.5
kspace_modify force/disp/real 0.001
kspace_modify force/disp/kspace 0.005
kspace_modify diff ad
input_file: in.fourmol
pair_style: lj/long/coul/long long off 7.0
pair_coeff: ! |
1 1 0.02 2.5
2 2 0.005 1.0
2 4 0.005 0.5
3 3 0.02 3.2
4 4 0.015 3.1
5 5 0.015 3.1
extract: ! |
epsilon 2
sigma 2
cut_coul 0
natoms: 29
init_vdwl: 0
init_coul: 0
init_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
init_forces: ! |2
1 6.9724769730763625e-11 -6.2917196472420914e-11 -1.7553582754737637e-11
2 7.6094811340731255e-13 -1.1540411942409495e-12 1.6298559231496556e-13
3 3.9430185022927007e-11 -1.9017673718233126e-11 -1.6651673285376923e-13
4 1.5917087679856494e-12 -7.5219417810712124e-13 3.9732355364959770e-13
5 1.3754210730241487e-12 -5.2393248740046831e-13 -5.8786972569898321e-13
6 2.4324626429202970e-11 -5.9541975521081885e-12 1.1615602625372913e-11
7 3.9335920395601978e-11 -4.5805387197461763e-12 4.9818207226960900e-11
8 1.9840262237451370e-11 6.2076746170376731e-12 -4.0963480752166205e-13
9 6.7032491388560462e-14 2.8806602906466370e-14 -5.1168576901689580e-13
10 -9.5639153188399775e-12 1.6260508185384930e-11 5.9852790016982401e-12
11 -2.1047310133618033e-13 7.9345765138879343e-13 7.5666446278699520e-13
12 -2.9173061030991263e-11 3.7633064986308384e-12 1.7460466677353034e-11
13 -1.4524529065361568e-12 3.3470067557100611e-13 6.2185091609144014e-13
14 -7.3138369212027322e-13 1.0248711043717948e-13 1.1305113696402957e-12
15 -9.5037504870165340e-13 -3.5714658367712204e-13 3.9441042026885766e-13
16 -2.2415639659259037e-11 2.7640113997479121e-11 -1.1642230752605363e-11
17 -2.1208473166391679e-11 3.8522058586534765e-11 -4.6846216614724886e-11
18 -1.8040308770875740e-11 -5.7279008382924961e-11 7.2376750640813174e-11
19 -3.8160168113867275e-14 -1.0975133018216530e-12 1.5219157003617188e-12
20 -8.2805108197841928e-13 -1.1974415437385753e-12 1.4978331961150702e-12
21 -8.9475625142751869e-11 6.3090972691297525e-11 7.4019219615381729e-11
22 -1.5546663673414098e-12 1.1916129263874209e-12 1.5208406092298733e-12
23 -1.6319521654857783e-12 1.1826485038559488e-12 1.4904048534083759e-12
24 -1.5641127395622623e-11 -5.8519640828036183e-11 -7.6239449591252700e-11
25 1.1910315964875356e-13 -1.1889645936751337e-12 -1.4555354381968814e-12
26 -7.1757130498965896e-13 -1.1745493969901884e-12 -1.5212605958363234e-12
27 8.0820809208387831e-11 6.3170487105530568e-11 -6.5474684007301223e-11
28 1.6363504379062180e-12 1.1991903709725124e-12 -1.1721879859607168e-12
29 1.5216247551162307e-12 1.1846825159982258e-12 -1.4010202583443579e-12
run_vdwl: 0
run_coul: 0
run_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_forces: ! |2
1 6.9638112975627505e-11 -6.2952655433984510e-11 -1.7520579341404558e-11
2 7.5441280063952564e-13 -1.1565278235052112e-12 1.5428964869214424e-13
3 3.9495752841065062e-11 -1.9092781431624742e-11 -1.4273797798954887e-13
4 1.5925753071119834e-12 -7.5018590838065349e-13 4.0694176768714727e-13
5 1.3773389032846222e-12 -5.2518353267880336e-13 -5.8790169309916104e-13
6 2.4346713154520441e-11 -6.0219217022078622e-12 1.1556637696922417e-11
7 3.9348886643182682e-11 -4.5159528947341432e-12 4.9885111934485096e-11
8 1.9778099538765962e-11 6.2897367226276710e-12 -4.3131958485496316e-13
9 6.3695687649969267e-14 3.3892963902521213e-14 -5.1413748665229894e-13
10 -9.6012722021579935e-12 1.6233041014363616e-11 6.0003996378633241e-12
11 -2.0949350984685278e-13 7.9562479682529135e-13 7.6275214731002570e-13
12 -2.9254571969811829e-11 3.7981192304699482e-12 1.7545839680382277e-11
13 -1.4582673940336726e-12 3.2818654537516632e-13 6.2347550527444227e-13
14 -7.3106004851428835e-13 1.1022739695233736e-13 1.1369294512424691e-12
15 -9.4060413626822731e-13 -3.5126188279055496e-13 3.8676904179416883e-13
16 -2.2408460889929651e-11 2.7657483426256059e-11 -1.1741964403647136e-11
17 -2.1298916095589507e-11 3.8489167711847335e-11 -4.6910251494943480e-11
18 -1.7957271474310740e-11 -5.7234788024451294e-11 7.2383088876515692e-11
19 -3.6288894307680100e-14 -1.0943572602766692e-12 1.5210320121776364e-12
20 -8.3935575478300129e-13 -1.1966327701053927e-12 1.4966158763399045e-12
21 -8.9447202173297866e-11 6.3082241565048134e-11 7.4014350079233694e-11
22 -1.5510607489175700e-12 1.1907296946509683e-12 1.5210644252358221e-12
23 -1.6318182177140126e-12 1.1841207226717162e-12 1.4885209322729697e-12
24 -1.5676362809971050e-11 -5.8529118094744956e-11 -7.6215807688416359e-11
25 1.0742373795185689e-13 -1.1875074064094843e-12 -1.4571756878225340e-12
26 -7.1286524148732120e-13 -1.1757034686897436e-12 -1.5207002392359802e-12
27 8.0828500068665394e-11 6.3165679701490667e-11 -6.5424272562687016e-11
28 1.6362386625442489e-12 1.1989853840181747e-12 -1.1753814351696325e-12
29 1.5246414975786595e-12 1.1846002567244795e-12 -1.4036250137375415e-12
...

View File

@ -0,0 +1,98 @@
---
lammps_version: 21 Jul 2020
date_generated: Thu Aug 20 23:05:29 202
epsilon: 2.5e-13
prerequisites: ! |
atom full
pair lj/long/coul/long
kspace pppm/disp
pre_commands: ! ""
post_commands: ! |
pair_modify compute no
kspace_style pppm/disp 1.0e-4
kspace_modify gewald 0.5
kspace_modify force/disp/real 0.001
kspace_modify force/disp/kspace 0.005
input_file: in.fourmol
pair_style: lj/long/coul/long long off 7.0
pair_coeff: ! |
1 1 0.02 2.5
2 2 0.005 1.0
2 4 0.005 0.5
3 3 0.02 3.2
4 4 0.015 3.1
5 5 0.015 3.1
extract: ! |
epsilon 2
sigma 2
cut_coul 0
natoms: 29
init_vdwl: 0
init_coul: 0
init_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
init_forces: ! |2
1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_vdwl: 0
run_coul: 0
run_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_forces: ! |2
1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
...

View File

@ -0,0 +1,90 @@
---
lammps_version: 21 Jul 2020
date_generated: Mon Aug 3 23:27:35 202
epsilon: 7.5e-14
prerequisites: ! |
atom full
pair coul/long
kspace pppm/stagger
pre_commands: ! ""
post_commands: ! |
comm_style tiled
pair_modify compute no
kspace_style pppm/stagger 1.0e-6
kspace_modify gewald 0.3
balance 0.0 rcb
input_file: in.fourmol
pair_style: coul/long 8.0
pair_coeff: ! |
* *
extract: ! ""
natoms: 29
init_vdwl: 0
init_coul: 0
init_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
init_forces: ! |2
1 -5.2227715900845750e-01 8.1950519891037896e-02 2.1568864750376832e-01
2 2.1709329984201631e-01 -2.7910826043908610e-01 -1.3501796562404628e-01
3 -3.4431410110235108e-02 -9.3026609475793179e-03 1.9970153652663108e-02
4 1.6294612553322482e-01 2.8820208915782589e-02 -7.8041537992552090e-02
5 1.6018614143950777e-01 7.5432356753115826e-02 -3.7722588054429150e-02
6 5.6492988309033354e-01 4.1696814706534630e-01 -6.7673476362799612e-01
7 -3.4216205471660566e-01 -4.0009165524095192e-01 3.9325085501474977e-01
8 -1.4121658810415072e-01 -6.1694926607232958e-01 3.3967088097622000e-01
9 1.8213420211015810e-01 3.2023550501078568e-01 5.0865716213608053e-02
10 -5.1659753410710330e-02 1.1063713004312387e-01 -1.4434062323990951e-02
11 -8.4675426404111437e-02 1.5093767469541053e-01 -3.9273222694910556e-02
12 4.5743278086428246e-01 -4.2657803621344026e-01 3.4765791147615285e-02
13 -1.5598326968651899e-01 1.1609333380277490e-01 2.6827585674639255e-02
14 -1.7229830712699734e-01 1.3660265515995373e-01 1.0364293545061238e-02
15 -1.3779624948415931e-01 8.5611514314178239e-02 -1.4417936578185790e-02
16 -3.4309620718952316e-01 4.3358259515061448e-01 5.3264911488862143e-01
17 1.3394866253126306e-01 -4.1287506953147796e-01 -7.8819987038465467e-01
18 7.3032854805187175e-01 1.5459002190369358e+00 -1.3876618467094484e+00
19 -2.5946241349817661e-01 -7.7450328984918526e-01 7.7107291114413901e-01
20 -3.9364379163465191e-01 -7.0318115064463305e-01 7.3145133273582563e-01
21 5.1854207039779388e-01 5.4316140431986648e-01 -1.1630561612460866e+00
22 -2.9474924657955082e-01 -1.2314722330232061e-01 5.8311514555951505e-01
23 -2.8773056143507980e-01 -2.9281856868591627e-01 5.5619506923778372e-01
24 6.2752036539684239e-02 1.7441478631220706e+00 -2.7849000426516041e-01
25 1.2955239352175907e-01 -7.0427160638100861e-01 2.2582608971039136e-01
26 -2.2227598237037974e-01 -9.7470415379148345e-01 7.4514829391794934e-02
27 -8.5917766336431800e-01 1.6506499588643100e+00 -9.3704596621935576e-01
28 5.7091533654326099e-01 -9.1760299361767794e-01 5.4073727763352530e-01
29 4.1187460365847078e-01 -8.0559715142821642e-01 4.4313023169089372e-01
run_vdwl: 0
run_coul: 0
run_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_forces: ! |2
1 -5.0888488962950285e-01 8.3430404180325907e-02 2.3883070350075583e-01
2 2.0290231222263222e-01 -2.8552521628160221e-01 -1.4795322214007761e-01
3 -3.4147227596864727e-02 -9.1044423234983921e-03 2.1050772312966981e-02
4 1.6403639693437871e-01 2.7510909903057143e-02 -8.1700222473303191e-02
5 1.5793837855428028e-01 7.5134684921661865e-02 -4.3070400148934276e-02
6 5.5874989482134696e-01 4.1097082705147381e-01 -7.0880717747024347e-01
7 -3.4244419763693645e-01 -4.0336810330768602e-01 4.1416013762933518e-01
8 -1.2867011168103898e-01 -6.1360944335177758e-01 3.7373621646565264e-01
9 1.7179361391284365e-01 3.1570434896702448e-01 2.9166730933101400e-02
10 -5.3386439917058298e-02 1.1157104966535075e-01 -1.9069019877901452e-02
11 -8.6641568612711800e-02 1.5447778751280100e-01 -4.3302035139313286e-02
12 4.6277613291799979e-01 -4.2640261166652782e-01 5.4642934467963714e-02
13 -1.5768096764682599e-01 1.1678927823658505e-01 2.0726708870285715e-02
14 -1.7362409264878331e-01 1.3769878891777848e-01 5.6914483198930487e-03
15 -1.3804334923445472e-01 8.4232584350085404e-02 -2.2242211531143450e-02
16 -3.5505453326610353e-01 4.4007379281236625e-01 5.1108930583976653e-01
17 1.4393262437860022e-01 -4.0681625150263201e-01 -7.6550494257092949e-01
18 7.7411316765167837e-01 1.6013845677120058e+00 -1.3406161587038208e+00
19 -2.7109483507409249e-01 -7.9220736692047411e-01 7.5606765561780620e-01
20 -4.2022391372143653e-01 -7.3398564926630305e-01 7.0862056036484811e-01
21 5.2066744810063004e-01 4.5601151336785911e-01 -1.1109669200204870e+00
22 -2.9131419157235938e-01 -8.0660148622295896e-02 5.6037505807873789e-01
23 -2.8851937111607945e-01 -2.5707916023569388e-01 5.3115241105748146e-01
24 7.7132406285747662e-02 1.6936935947231240e+00 -2.6197671637095088e-01
25 1.1739827770831035e-01 -6.8172720908194795e-01 2.1292828073010400e-01
26 -2.2399952098390596e-01 -9.4806186651755442e-01 6.5237310702708273e-02
27 -8.6781949013350235e-01 1.6453631212617197e+00 -8.8867885380482659e-01
28 5.7526918079966904e-01 -9.1110567278710985e-01 5.1441520143469011e-01
29 4.1483886618353916e-01 -8.0439411171811659e-01 4.1599644392583446e-01
...

View File

@ -0,0 +1,90 @@
---
lammps_version: 30 Jun 2020
date_generated: Sun Jul 12 19:14:29 202
epsilon: 7.5e-14
prerequisites: ! |
atom full
pair coul/long
kspace pppm
pre_commands: ! ""
post_commands: ! |
comm_style tiled
pair_modify compute no
kspace_style pppm 1.0e-6
kspace_modify gewald 0.3
balance 0.0 rcb
input_file: in.fourmol
pair_style: coul/long 8.0
pair_coeff: ! |
* *
extract: ! ""
natoms: 29
init_vdwl: 0
init_coul: 0
init_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
init_forces: ! |2
1 -5.2239274535568314e-01 8.2051545744881466e-02 2.1533594847972076e-01
2 2.1712968366442176e-01 -2.7928074334318026e-01 -1.3471540076656802e-01
3 -3.4442019165638028e-02 -9.3084265599194874e-03 1.9948062571124484e-02
4 1.6298334373562443e-01 2.8852998088186425e-02 -7.8001870103674154e-02
5 1.6024289196964533e-01 7.5428818157230709e-02 -3.7746220978715959e-02
6 5.6503043686117405e-01 4.1669523647698320e-01 -6.7638762712651512e-01
7 -3.4224573570118516e-01 -3.9969025602522534e-01 3.9331747529410527e-01
8 -1.4133104801408738e-01 -6.1685378954692482e-01 3.3931746208503027e-01
9 1.8219762821810317e-01 3.2009822401929577e-01 5.0881307357289934e-02
10 -5.1688860353236589e-02 1.1069131959908671e-01 -1.4422029744161480e-02
11 -8.4689878918105269e-02 1.5099315110947911e-01 -3.9231342126204188e-02
12 4.5754413540574290e-01 -4.2644798683690410e-01 3.4587713233253971e-02
13 -1.5596780753830558e-01 1.1607584778590280e-01 2.6865880696619902e-02
14 -1.7231427615749528e-01 1.3653099035839830e-01 1.0392517888507409e-02
15 -1.3787738509698347e-01 8.5569383216123673e-02 -1.4365596072224287e-02
16 -3.4322564010548312e-01 4.3371633953160166e-01 5.3259611401138551e-01
17 1.3414272886699793e-01 -4.1322529572771644e-01 -7.8812435933765979e-01
18 7.3073447759345089e-01 1.5456517688814524e+00 -1.3881786173290165e+00
19 -2.5943625025418654e-01 -7.7424664728587522e-01 7.7105598737678260e-01
20 -3.9409193260988501e-01 -7.0311103001458264e-01 7.3171724652214931e-01
21 5.1856078926614546e-01 5.4286369838352699e-01 -1.1629548434823531e+00
22 -2.9453203152655405e-01 -1.2298517567747463e-01 5.8298446261040782e-01
23 -2.8798525475710529e-01 -2.9277384277527774e-01 5.5631883166904628e-01
24 6.2753212217437501e-02 1.7443957830145815e+00 -2.7814103479849506e-01
25 1.2986161832727383e-01 -7.0443921770565177e-01 2.2578528867489417e-01
26 -2.2254044464386455e-01 -9.7470640011041609e-01 7.4360754308868779e-02
27 -8.5917998510192983e-01 1.6512375326941557e+00 -9.3680672362601536e-01
28 5.7118802253451917e-01 -9.1790362039827855e-01 5.4063664700585301e-01
29 4.1157232663919069e-01 -8.0588020505345637e-01 4.4297396570656278e-01
run_vdwl: 0
run_coul: 0
run_stress: ! |2-
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
run_forces: ! |2
1 -5.0900177524332380e-01 8.3541312360770559e-02 2.3847626933913957e-01
2 2.0293760437213748e-01 -2.8570861986354196e-01 -1.4764907529922344e-01
3 -3.4158170718364404e-02 -9.1103261103567309e-03 2.1028676255224140e-02
4 1.6407381836362508e-01 2.7543938443804754e-02 -8.1661581263582694e-02
5 1.5799678256598612e-01 7.5131693181657300e-02 -4.3093731729494773e-02
6 5.5884729319027959e-01 4.1070142887655381e-01 -7.0845348552640675e-01
7 -3.4252252488516066e-01 -4.0296980848156633e-01 4.1421689597710110e-01
8 -1.2877561673829283e-01 -6.1352311819313976e-01 3.7338100957587267e-01
9 1.7184965305565320e-01 3.1557352532666366e-01 2.9184166085517198e-02
10 -5.3416314160604578e-02 1.1162618970928689e-01 -1.9056641624128120e-02
11 -8.6656970143397835e-02 1.5453217862588842e-01 -4.3260381230061914e-02
12 4.6288691229484780e-01 -4.2627837697985460e-01 5.4462739307820834e-02
13 -1.5766510975402218e-01 1.1677248221389504e-01 2.0765584899621815e-02
14 -1.7364037400975366e-01 1.3763067517397995e-01 5.7227400340144263e-03
15 -1.3812467553300772e-01 8.4189156813104071e-02 -2.2189341317061961e-02
16 -3.5518552554301230e-01 4.4020942653600509e-01 5.1103809754957041e-01
17 1.4412978936104159e-01 -4.0716931333084577e-01 -7.6542748703258823e-01
18 7.7452008907550252e-01 1.6011444029485182e+00 -1.3411322152836536e+00
19 -2.7106696803117064e-01 -7.9194312531475608e-01 7.5606651125297675e-01
20 -4.2067655880426469e-01 -7.3392054862184575e-01 7.0888202878660511e-01
21 5.2066009642687827e-01 4.5571443252966459e-01 -1.1108598505670140e+00
22 -2.9107834596471255e-01 -8.0497477430417727e-02 5.6024739795440914e-01
23 -2.8876713232015855e-01 -2.5703387138535833e-01 5.3126570223700098e-01
24 7.7134065884385553e-02 1.6939505170091371e+00 -2.6162012529552015e-01
25 1.1769649007263155e-01 -6.8190526638151339e-01 2.1287906681296132e-01
26 -2.2425351267435142e-01 -9.4806947839154443e-01 6.5072664668127181e-02
27 -8.6783221113018416e-01 1.6459489466934927e+00 -8.8842216340655056e-01
28 5.7554532106965894e-01 -9.1140444835486800e-01 5.1430957288504053e-01
29 4.1454386992115411e-01 -8.0467652760281361e-01 4.1582695595428282e-01
...