mirror of https://github.com/lammps/lammps.git
Merge pull request #2280 from lammps/gridcomm-tiled
Support for tiled decompositions in PPPM
This commit is contained in:
commit
4c46119a48
|
@ -73,7 +73,7 @@ ensure that the new bonds created by this style do not create spurious
|
|||
Specifically 1-2 interactions must have weights of zero, 1-3
|
||||
interactions must either have weights of unity or :doc:`special_bonds
|
||||
angle yes <special_bonds>` must be used, and 1-4 interactions must
|
||||
have weights of unity or :doc:`special_bonds dihedral yes <special_bonds>`
|
||||
have weights of unity or :doc:`special_bonds dihedral yes <special_bonds>`
|
||||
must be used.
|
||||
|
||||
If this command is used to create bonded interactions between
|
||||
|
@ -95,7 +95,7 @@ compute interactions for individual pairs of atoms. Manybody potentials
|
|||
are not compatible in general, but also some other pair styles are missing
|
||||
the required functionality and thus will cause an error.
|
||||
|
||||
This command is not compatible with long-range Coulombic interactions. If a
|
||||
This command is not compatible with long-range Coulombic interactions. If a
|
||||
`kspace_style <kspace_style>` is declared, an error will be issued.
|
||||
|
||||
Related commands
|
||||
|
|
|
@ -203,11 +203,7 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
// If need per-atom energies/virials, allocate per-atom arrays here
|
||||
// so that particle map on host can be done concurrently with GPU calculations
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
if (triclinic == 0) {
|
||||
bool success = true;
|
||||
|
@ -258,10 +254,12 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
if (triclinic == 0) {
|
||||
cg->reverse_comm(this,REVERSE_RHO_GPU);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_GPU,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft_gpu();
|
||||
} else {
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
PPPM::brick2fft();
|
||||
}
|
||||
|
||||
|
@ -274,16 +272,22 @@ void PPPMGPU::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
|
||||
else cg->forward_comm(this,FORWARD_IK);
|
||||
if (differentiation_flag == 1)
|
||||
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else
|
||||
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
if (differentiation_flag == 1 && vflag_atom)
|
||||
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else if (differentiation_flag == 0)
|
||||
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
poisson_time += MPI_Wtime()-t3;
|
||||
|
@ -510,8 +514,10 @@ void PPPMGPU::poisson_ik()
|
|||
pack own values to buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMGPU::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (flag == FORWARD_IK) {
|
||||
|
@ -568,8 +574,10 @@ void PPPMGPU::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
unpack another proc's own values from buf and set own ghost values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMGPU::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (flag == FORWARD_IK) {
|
||||
|
@ -626,8 +634,10 @@ void PPPMGPU::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
pack ghost values into buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMGPU::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
if (flag == REVERSE_RHO_GPU) {
|
||||
FFT_SCALAR *src = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
|
@ -643,8 +653,10 @@ void PPPMGPU::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
unpack another proc's ghost values from buf and add to own values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMGPU::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMGPU::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
if (flag == REVERSE_RHO_GPU) {
|
||||
FFT_SCALAR *dest = &density_brick_gpu[nzlo_out][nylo_out][nxlo_out];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
|
@ -818,7 +830,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
|
|||
density_brick = density_A_brick;
|
||||
density_fft = density_A_fft;
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// group B
|
||||
|
@ -826,7 +839,8 @@ void PPPMGPU::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
|
|||
density_brick = density_B_brick;
|
||||
density_fft = density_B_fft;
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// switch back pointers
|
||||
|
|
|
@ -46,10 +46,10 @@ class PPPMGPU : public PPPM {
|
|||
void brick2fft_gpu();
|
||||
virtual void poisson_ik();
|
||||
|
||||
void pack_forward(int, FFT_SCALAR *, int, int *);
|
||||
void unpack_forward(int, FFT_SCALAR *, int, int *);
|
||||
void pack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
void unpack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
void pack_forward_grid(int, void *, int, int *);
|
||||
void unpack_forward_grid(int, void *, int, int *);
|
||||
void pack_reverse_grid(int, void *, int, int *);
|
||||
void unpack_reverse_grid(int, void *, int, int *);
|
||||
|
||||
FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
|
||||
FFT_SCALAR *, int);
|
||||
|
|
|
@ -1560,7 +1560,7 @@ void FixWallGran::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to Nth set of extra values
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
|
||||
m++;
|
||||
|
|
|
@ -498,7 +498,7 @@ void FixWallGranRegion::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to Nth set of extra values
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
|
||||
m++;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -14,81 +14,59 @@
|
|||
#ifndef LMP_GRIDCOMM_KOKKOS_H
|
||||
#define LMP_GRIDCOMM_KOKKOS_H
|
||||
|
||||
#include "pointers.h"
|
||||
#include "gridcomm.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "fftdata_kokkos.h"
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
#else
|
||||
typedef double FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class GridCommKokkos : protected Pointers {
|
||||
class GridCommKokkos : public GridComm {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||
|
||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
|
||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
~GridCommKokkos();
|
||||
void ghost_notify();
|
||||
int ghost_overlap();
|
||||
void setup();
|
||||
void forward_comm(class KSpace *, int);
|
||||
void reverse_comm(class KSpace *, int);
|
||||
double memory_usage();
|
||||
virtual ~GridCommKokkos();
|
||||
void forward_comm_kspace(class KSpace *, int, int,
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
|
||||
void reverse_comm_kspace(class KSpace *, int, int,
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
|
||||
|
||||
private:
|
||||
int me;
|
||||
int nforward,nreverse;
|
||||
MPI_Comm gridcomm;
|
||||
MPI_Request request;
|
||||
DAT::tdual_int_2d k_swap_packlist;
|
||||
DAT::tdual_int_2d k_swap_unpacklist;
|
||||
|
||||
// in = inclusive indices of 3d grid chunk that I own
|
||||
// out = inclusive indices of 3d grid chunk I own plus ghosts I use
|
||||
// proc = 6 neighbor procs that surround me
|
||||
// ghost = # of my owned grid planes needed from me
|
||||
// by each of 6 neighbor procs to become their ghost planes
|
||||
DAT::tdual_int_2d k_send_packlist;
|
||||
|
||||
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
|
||||
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
|
||||
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
|
||||
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
|
||||
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
|
||||
DAT::tdual_int_2d k_recv_unpacklist;
|
||||
|
||||
int nbuf;
|
||||
//FFT_SCALAR *buf1,*buf2;
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d k_buf1;
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d k_buf2;
|
||||
DAT::tdual_int_2d k_copy_packlist;
|
||||
DAT::tdual_int_2d k_copy_unpacklist;
|
||||
|
||||
struct Swap {
|
||||
int sendproc; // proc to send to for forward comm
|
||||
int recvproc; // proc to recv from for forward comm
|
||||
int npack; // # of datums to pack
|
||||
int nunpack; // # of datums to unpack
|
||||
//int *packlist; // 3d array offsets to pack
|
||||
//int *unpacklist; // 3d array offsets to unpack
|
||||
};
|
||||
// -------------------------------------------
|
||||
// internal methods
|
||||
// -------------------------------------------
|
||||
|
||||
DAT::tdual_int_2d k_packlist;
|
||||
DAT::tdual_int_2d k_unpacklist;
|
||||
void setup_regular(int &, int &);
|
||||
void setup_tiled(int &, int &);
|
||||
|
||||
int nswap;
|
||||
Swap *swap;
|
||||
void forward_comm_kspace_regular(class KSpace *, int, int,
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
|
||||
void forward_comm_kspace_tiled(class KSpace *, int, int,
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
|
||||
void reverse_comm_kspace_regular(class KSpace *, int, int,
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
|
||||
void reverse_comm_kspace_tiled(class KSpace *, int, int,
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d &, FFT_DAT::tdual_FFT_SCALAR_1d &, MPI_Datatype);
|
||||
|
||||
void grow_swap();
|
||||
|
||||
int indices(DAT::tdual_int_2d &, int, int, int, int, int, int, int);
|
||||
};
|
||||
|
|
|
@ -23,10 +23,10 @@ class KokkosBaseFFT {
|
|||
KokkosBaseFFT() {}
|
||||
|
||||
//Kspace
|
||||
virtual void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int) {};
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -718,7 +718,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUi,const int ia
|
|||
const int iatom = iatom_mod + iatom_div * 32;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (j > twojmax) return;
|
||||
if (j > twojmax) return;
|
||||
|
||||
int elem_count = chemflag ? nelements : 1;
|
||||
|
||||
|
@ -739,7 +739,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUi,const int ia
|
|||
|
||||
// Store
|
||||
my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
|
||||
|
||||
|
||||
// Also zero yi
|
||||
my_sna.ylist_pack_re(iatom_mod, idxu_half, ielem, iatom_div) = 0.;
|
||||
my_sna.ylist_pack_im(iatom_mod, idxu_half, ielem, iatom_div) = 0.;
|
||||
|
@ -944,7 +944,7 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUiCPU, const in
|
|||
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (j > twojmax) return;
|
||||
if (j > twojmax) return;
|
||||
|
||||
int elem_count = chemflag ? nelements : 1;
|
||||
|
||||
|
|
|
@ -96,8 +96,7 @@ PPPMKokkos<DeviceType>::PPPMKokkos(LAMMPS *lmp) : PPPM(lmp)
|
|||
|
||||
fft1 = fft2 = NULL;
|
||||
remap = NULL;
|
||||
cg = NULL;
|
||||
cg_peratom = NULL;
|
||||
gc = NULL;
|
||||
|
||||
nmax = 0;
|
||||
//part2grid = NULL;
|
||||
|
@ -192,9 +191,6 @@ void PPPMKokkos<DeviceType>::init()
|
|||
"slab correction");
|
||||
if (domain->dimension == 2) error->all(FLERR,
|
||||
"Cannot use PPPM with 2d simulation");
|
||||
if (comm->style != 0)
|
||||
error->universe_all(FLERR,"PPPM can only currently be used with "
|
||||
"comm_style brick");
|
||||
|
||||
if (!atomKK->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
|
||||
|
||||
|
@ -255,9 +251,7 @@ void PPPMKokkos<DeviceType>::init()
|
|||
// or overlap is allowed, then done
|
||||
// else reduce order and try again
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
|
||||
GridCommKokkos<DeviceType> *cgtmp = NULL;
|
||||
GridCommKokkos<DeviceType> *gctmp = NULL;
|
||||
int iteration = 0;
|
||||
|
||||
while (order >= minorder) {
|
||||
|
@ -269,24 +263,23 @@ void PPPMKokkos<DeviceType>::init()
|
|||
set_grid_local();
|
||||
if (overlap_allowed) break;
|
||||
|
||||
cgtmp = new GridCommKokkos<DeviceType>(lmp,world,1,1,
|
||||
gctmp = new GridCommKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
cgtmp->ghost_notify();
|
||||
if (!cgtmp->ghost_overlap()) break;
|
||||
delete cgtmp;
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
int tmp1,tmp2;
|
||||
gctmp->setup(tmp1,tmp2);
|
||||
if (!gctmp->ghost_adjacent()) break;
|
||||
delete gctmp;
|
||||
|
||||
order--;
|
||||
iteration++;
|
||||
}
|
||||
|
||||
if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
|
||||
if (!overlap_allowed && cgtmp->ghost_overlap())
|
||||
if (!overlap_allowed && gctmp->ghost_adjacent())
|
||||
error->all(FLERR,"PPPM grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
if (cgtmp) delete cgtmp;
|
||||
if (gctmp) delete gctmp;
|
||||
|
||||
// adjust g_ewald
|
||||
|
||||
|
@ -320,8 +313,6 @@ void PPPMKokkos<DeviceType>::init()
|
|||
// don't invoke allocate peratom(), will be allocated when needed
|
||||
|
||||
allocate();
|
||||
cg->ghost_notify();
|
||||
cg->setup();
|
||||
|
||||
// pre-compute Green's function denomiator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -564,11 +555,9 @@ void PPPMKokkos<DeviceType>::setup_grid()
|
|||
|
||||
allocate();
|
||||
|
||||
cg->ghost_notify();
|
||||
if (overlap_allowed == 0 && cg->ghost_overlap())
|
||||
if (!overlap_allowed && !gc->ghost_adjacent())
|
||||
error->all(FLERR,"PPPM grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
cg->setup();
|
||||
|
||||
// pre-compute Green's function denomiator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -576,7 +565,7 @@ void PPPMKokkos<DeviceType>::setup_grid()
|
|||
compute_gf_denom();
|
||||
compute_rho_coeff();
|
||||
|
||||
// pre-compute volume-dependent coeffs
|
||||
// pre-compute volume-dependent coeffs for portion of grid I now own
|
||||
|
||||
setup();
|
||||
}
|
||||
|
@ -609,11 +598,8 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
|
|||
d_vatom = k_vatom.view<DeviceType>();
|
||||
}
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
if (evflag_atom && !peratom_allocate_flag)
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
|
@ -667,7 +653,8 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,REVERSE_RHO,
|
||||
k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -680,12 +667,14 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
cg->forward_comm(this,FORWARD_IK);
|
||||
gc->forward_comm_kspace(this,3,FORWARD_IK,
|
||||
k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom)
|
||||
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||
gc->forward_comm_kspace(this,7,FORWARD_IK_PERATOM,
|
||||
k_gc_buf1,k_gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// calculate the force on my particles
|
||||
|
||||
|
@ -844,14 +833,18 @@ void PPPMKokkos<DeviceType>::allocate()
|
|||
1,0,0,FFT_PRECISION,collective_flag,cuda_aware_flag);
|
||||
|
||||
// create ghost grid object for rho and electric field communication
|
||||
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
|
||||
cg = new GridCommKokkos<DeviceType>(lmp,world,3,1,
|
||||
gc = new GridCommKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
gc->setup(ngc_buf1,ngc_buf2);
|
||||
|
||||
npergrid = 3;
|
||||
|
||||
k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1);
|
||||
k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -876,8 +869,8 @@ void PPPMKokkos<DeviceType>::deallocate()
|
|||
fft2 = NULL;
|
||||
delete remap;
|
||||
remap = NULL;
|
||||
delete cg;
|
||||
cg = NULL;
|
||||
delete gc;
|
||||
gc = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -899,16 +892,13 @@ void PPPMKokkos<DeviceType>::allocate_peratom()
|
|||
d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||
|
||||
|
||||
// create ghost grid object for rho and electric field communication
|
||||
// use same GC ghost grid object for peratom grid communication
|
||||
// but need to reallocate a larger gc_buf1 and gc_buf2
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
npergrid = 7;
|
||||
|
||||
cg_peratom =
|
||||
new GridCommKokkos<DeviceType>(lmp,world,7,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
k_gc_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf1",npergrid*ngc_buf1);
|
||||
k_gc_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("pppm:gc_buf2",npergrid*ngc_buf2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -919,9 +909,6 @@ template<class DeviceType>
|
|||
void PPPMKokkos<DeviceType>::deallocate_peratom()
|
||||
{
|
||||
peratom_allocate_flag = 0;
|
||||
|
||||
delete cg_peratom;
|
||||
cg_peratom = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -1185,153 +1172,11 @@ double PPPMKokkos<DeviceType>::final_accuracy()
|
|||
template<class DeviceType>
|
||||
void PPPMKokkos<DeviceType>::set_grid_local()
|
||||
{
|
||||
// global indices of PPPM grid range from 0 to N-1
|
||||
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
|
||||
// global PPPM grid that I own without ghost cells
|
||||
// for slab PPPM, assign z grid as if it were not extended
|
||||
PPPM::set_grid_local();
|
||||
|
||||
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
|
||||
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
|
||||
|
||||
nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
|
||||
nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
|
||||
|
||||
nzlo_in = static_cast<int>
|
||||
(comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
|
||||
nzhi_in = static_cast<int>
|
||||
(comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
|
||||
|
||||
// nlower,nupper = stencil size for mapping particles to PPPM grid
|
||||
|
||||
nlower = -(order-1)/2;
|
||||
nupper = order/2;
|
||||
|
||||
// shift values for particle <-> grid mapping
|
||||
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
|
||||
|
||||
if (order % 2) shift = OFFSET + 0.5;
|
||||
else shift = OFFSET;
|
||||
if (order % 2) shiftone = 0.0;
|
||||
else shiftone = 0.5;
|
||||
|
||||
// nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
|
||||
// global PPPM grid that my particles can contribute charge to
|
||||
// effectively nlo_in,nhi_in + ghost cells
|
||||
// nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
|
||||
// position a particle in my box can be at
|
||||
// dist[3] = particle position bound = subbox + skin/2.0 + qdist
|
||||
// qdist = offset due to TIP4P fictitious charge
|
||||
// convert to triclinic if necessary
|
||||
// nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
|
||||
// for slab PPPM, assign z grid as if it were not extended
|
||||
|
||||
double *prd,*sublo,*subhi;
|
||||
|
||||
if (triclinic == 0) {
|
||||
prd = domain->prd;
|
||||
boxlo[0] = domain->boxlo[0];
|
||||
boxlo[1] = domain->boxlo[1];
|
||||
boxlo[2] = domain->boxlo[2];
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
prd = domain->prd_lamda;
|
||||
boxlo[0] = domain->boxlo_lamda[0];
|
||||
boxlo[1] = domain->boxlo_lamda[1];
|
||||
boxlo[2] = domain->boxlo_lamda[2];
|
||||
domain->x2lamda(atomKK->nlocal);
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
double xprd = prd[0];
|
||||
double yprd = prd[1];
|
||||
double zprd = prd[2];
|
||||
double zprd_slab = zprd*slab_volfactor;
|
||||
|
||||
double dist[3];
|
||||
double cuthalf = 0.5*neighbor->skin + qdist;
|
||||
if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
|
||||
else kspacebbox(cuthalf,&dist[0]);
|
||||
|
||||
int nlo,nhi;
|
||||
|
||||
nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
|
||||
nx_pppm/xprd + shift) - OFFSET;
|
||||
nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
|
||||
nx_pppm/xprd + shift) - OFFSET;
|
||||
nxlo_out = nlo + nlower;
|
||||
nxhi_out = nhi + nupper;
|
||||
|
||||
nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
|
||||
ny_pppm/yprd + shift) - OFFSET;
|
||||
nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
|
||||
ny_pppm/yprd + shift) - OFFSET;
|
||||
nylo_out = nlo + nlower;
|
||||
nyhi_out = nhi + nupper;
|
||||
|
||||
nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
|
||||
nz_pppm/zprd_slab + shift) - OFFSET;
|
||||
nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
|
||||
nz_pppm/zprd_slab + shift) - OFFSET;
|
||||
nzlo_out = nlo + nlower;
|
||||
nzhi_out = nhi + nupper;
|
||||
|
||||
// for slab PPPM, change the grid boundary for processors at +z end
|
||||
// to include the empty volume between periodically repeating slabs
|
||||
// for slab PPPM, want charge data communicated from -z proc to +z proc,
|
||||
// but not vice versa, also want field data communicated from +z proc to
|
||||
// -z proc, but not vice versa
|
||||
// this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
|
||||
// also insure no other procs use ghost cells beyond +z limit
|
||||
|
||||
if (slabflag == 1) {
|
||||
if (comm->myloc[2] == comm->procgrid[2]-1)
|
||||
nzhi_in = nzhi_out = nz_pppm - 1;
|
||||
nzhi_out = MIN(nzhi_out,nz_pppm-1);
|
||||
}
|
||||
|
||||
// decomposition of FFT mesh
|
||||
// global indices range from 0 to N-1
|
||||
// proc owns entire x-dimension, clumps of columns in y,z dimensions
|
||||
// npey_fft,npez_fft = # of procs in y,z dims
|
||||
// if nprocs is small enough, proc can own 1 or more entire xy planes,
|
||||
// else proc owns 2d sub-blocks of yz plane
|
||||
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
|
||||
// nlo_fft,nhi_fft = lower/upper limit of the section
|
||||
// of the global FFT mesh that I own
|
||||
|
||||
int npey_fft,npez_fft;
|
||||
if (nz_pppm >= nprocs) {
|
||||
npey_fft = 1;
|
||||
npez_fft = nprocs;
|
||||
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
|
||||
|
||||
int me_y = me % npey_fft;
|
||||
int me_z = me / npey_fft;
|
||||
|
||||
nxlo_fft = 0;
|
||||
nxhi_fft = nx_pppm - 1;
|
||||
nylo_fft = me_y*ny_pppm/npey_fft;
|
||||
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
|
||||
nzlo_fft = me_z*nz_pppm/npez_fft;
|
||||
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
|
||||
|
||||
// PPPM grid pts owned by this proc, including ghosts
|
||||
|
||||
ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
|
||||
// FFT grids owned by this proc, without ghosts
|
||||
// nfft = FFT points in FFT decomposition on this proc
|
||||
// nfft_brick = FFT points in 3d brick-decomposition on this proc
|
||||
// nfft_both = greater of 2 values
|
||||
|
||||
nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
|
||||
(nzhi_fft-nzlo_fft+1);
|
||||
int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
|
||||
(nzhi_in-nzlo_in+1);
|
||||
nfft_both = MAX(nfft,nfft_brick);
|
||||
boxlo[0] = domain->boxlo[0];
|
||||
boxlo[1] = domain->boxlo[1];
|
||||
boxlo[2] = domain->boxlo[2];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -2568,7 +2413,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_fieldforce_peratom, const int &i
|
|||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PPPMKokkos<DeviceType>::pack_forward_kspace_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
void PPPMKokkos<DeviceType>::pack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
{
|
||||
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
|
||||
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
|
||||
|
@ -2624,11 +2469,12 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_pack_forward2, const int &i) con
|
|||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PPPMKokkos<DeviceType>::unpack_forward_kspace_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
void PPPMKokkos<DeviceType>::unpack_forward_grid_kokkos(int flag, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
{
|
||||
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
|
||||
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
|
||||
d_buf = k_buf.view<DeviceType>();
|
||||
unpack_offset = offset;
|
||||
|
||||
nx = (nxhi_out-nxlo_out+1);
|
||||
ny = (nyhi_out-nylo_out+1);
|
||||
|
@ -2652,9 +2498,9 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward1, const int &i) c
|
|||
const int iz = (int) (dlist/(nx*ny));
|
||||
const int iy = (int) ((dlist - iz*nx*ny)/nx);
|
||||
const int ix = d_list_index[i] - iz*nx*ny - iy*nx;
|
||||
d_vdx_brick(iz,iy,ix) = d_buf[3*i];
|
||||
d_vdy_brick(iz,iy,ix) = d_buf[3*i+1];
|
||||
d_vdz_brick(iz,iy,ix) = d_buf[3*i+2];
|
||||
d_vdx_brick(iz,iy,ix) = d_buf[3*i + unpack_offset];
|
||||
d_vdy_brick(iz,iy,ix) = d_buf[3*i+1 + unpack_offset];
|
||||
d_vdz_brick(iz,iy,ix) = d_buf[3*i+2 + unpack_offset];
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -2667,12 +2513,12 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward2, const int &i) c
|
|||
const int ix = d_list_index[i] - iz*nx*ny - iy*nx;
|
||||
if (eflag_atom) d_u_brick(iz,iy,ix) = d_buf[7*i];
|
||||
if (vflag_atom) {
|
||||
d_v0_brick(iz,iy,ix) = d_buf[7*i+1];
|
||||
d_v1_brick(iz,iy,ix) = d_buf[7*i+2];
|
||||
d_v2_brick(iz,iy,ix) = d_buf[7*i+3];
|
||||
d_v3_brick(iz,iy,ix) = d_buf[7*i+4];
|
||||
d_v4_brick(iz,iy,ix) = d_buf[7*i+5];
|
||||
d_v5_brick(iz,iy,ix) = d_buf[7*i+6];
|
||||
d_v0_brick(iz,iy,ix) = d_buf[7*i+1 + unpack_offset];
|
||||
d_v1_brick(iz,iy,ix) = d_buf[7*i+2 + unpack_offset];
|
||||
d_v2_brick(iz,iy,ix) = d_buf[7*i+3 + unpack_offset];
|
||||
d_v3_brick(iz,iy,ix) = d_buf[7*i+4 + unpack_offset];
|
||||
d_v4_brick(iz,iy,ix) = d_buf[7*i+5 + unpack_offset];
|
||||
d_v5_brick(iz,iy,ix) = d_buf[7*i+6 + unpack_offset];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2681,7 +2527,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward2, const int &i) c
|
|||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PPPMKokkos<DeviceType>::pack_reverse_kspace_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
void PPPMKokkos<DeviceType>::pack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
{
|
||||
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
|
||||
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
|
||||
|
@ -2711,11 +2557,12 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_pack_reverse, const int &i) cons
|
|||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PPPMKokkos<DeviceType>::unpack_reverse_kspace_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
void PPPMKokkos<DeviceType>::unpack_reverse_grid_kokkos(int /*flag*/, FFT_DAT::tdual_FFT_SCALAR_1d &k_buf, int offset, int nlist, DAT::tdual_int_2d &k_list, int index)
|
||||
{
|
||||
typename AT::t_int_2d_um d_list = k_list.view<DeviceType>();
|
||||
d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL());
|
||||
d_buf = k_buf.view<DeviceType>();
|
||||
unpack_offset = offset;
|
||||
|
||||
nx = (nxhi_out-nxlo_out+1);
|
||||
ny = (nyhi_out-nylo_out+1);
|
||||
|
@ -2733,7 +2580,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_reverse, const int &i) co
|
|||
const int iz = (int) (dlist/(nx*ny));
|
||||
const int iy = (int) ((dlist - iz*nx*ny)/nx);
|
||||
const int ix = d_list_index[i] - iz*nx*ny - iy*nx;
|
||||
d_density_brick(iz,iy,ix) += d_buf[i];
|
||||
d_density_brick(iz,iy,ix) += d_buf[i + unpack_offset];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -3044,7 +2891,9 @@ double PPPMKokkos<DeviceType>::memory_usage()
|
|||
if (peratom_allocate_flag)
|
||||
bytes += 6 * nbrick * sizeof(FFT_SCALAR);
|
||||
|
||||
if (cg) bytes += cg->memory_usage();
|
||||
// two GridComm bufs
|
||||
|
||||
bytes += (ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -311,6 +311,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT {
|
|||
int nx,ny,nz;
|
||||
typename AT::t_int_1d_um d_list_index;
|
||||
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf;
|
||||
int unpack_offset;
|
||||
|
||||
DAT::tdual_int_scalar k_flag;
|
||||
|
||||
|
@ -353,10 +354,14 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT {
|
|||
//double **acons;
|
||||
typename Kokkos::DualView<F_FLOAT[8][7],Kokkos::LayoutRight,DeviceType>::t_host acons;
|
||||
|
||||
// FFTs and grid communication
|
||||
|
||||
FFT3dKokkos<DeviceType> *fft1,*fft2;
|
||||
RemapKokkos<DeviceType> *remap;
|
||||
GridCommKokkos<DeviceType> *cg;
|
||||
GridCommKokkos<DeviceType> *cg_peratom;
|
||||
GridCommKokkos<DeviceType> *gc;
|
||||
|
||||
FFT_DAT::tdual_FFT_SCALAR_1d k_gc_buf1,k_gc_buf2;
|
||||
int ngc_buf1,ngc_buf2,npergrid;
|
||||
|
||||
//int **part2grid; // storage for particle -> grid mapping
|
||||
typename AT::t_int_1d_3 d_part2grid;
|
||||
|
@ -403,10 +408,10 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT {
|
|||
|
||||
// grid communication
|
||||
|
||||
void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
|
||||
void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
|
||||
void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
|
||||
void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
|
||||
void pack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
|
||||
void unpack_forward_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int);
|
||||
void pack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int);
|
||||
void unpack_reverse_grid_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, int, DAT::tdual_int_2d &, int);
|
||||
|
||||
// triclinic
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ public:
|
|||
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
|
||||
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
|
||||
|
||||
typedef Kokkos::View<CayleyKleinPack**, DeviceType> t_sna_2ckp;
|
||||
typedef Kokkos::View<CayleyKleinPack**, DeviceType> t_sna_2ckp;
|
||||
|
||||
inline
|
||||
SNAKokkos() {};
|
||||
|
|
|
@ -120,7 +120,7 @@ void SNAKokkos<DeviceType>::build_indexlist()
|
|||
idxu_max = idxu_count;
|
||||
Kokkos::deep_copy(idxu_block,h_idxu_block);
|
||||
|
||||
// index list for half uarray
|
||||
// index list for half uarray
|
||||
idxu_half_block = Kokkos::View<int*, DeviceType>("SNAKokkos::idxu_half_block",jdim);
|
||||
auto h_idxu_half_block = Kokkos::create_mirror_view(idxu_half_block);
|
||||
|
||||
|
@ -316,7 +316,7 @@ void SNAKokkos<DeviceType>::grow_rij(int newnatom, int newnmax)
|
|||
|
||||
/* ----------------------------------------------------------------------
|
||||
Precompute the Cayley-Klein parameters and the derivatives thereof.
|
||||
This routine better exploits parallelism than the GPU ComputeUi and
|
||||
This routine better exploits parallelism than the GPU ComputeUi and
|
||||
ComputeFusedDeidrj, which are one warp per atom-neighbor pair.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
|
@ -339,7 +339,7 @@ void SNAKokkos<DeviceType>::compute_cayley_klein(const int& iatom, const int& jn
|
|||
const double dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
||||
|
||||
//const double wj_local = wj(iatom, jnbor);
|
||||
double sfac, dsfac;
|
||||
double sfac, dsfac;
|
||||
compute_s_dsfac(r, rcut, sfac, dsfac);
|
||||
sfac *= wj_local;
|
||||
dsfac *= wj_local;
|
||||
|
@ -643,7 +643,7 @@ void SNAKokkos<DeviceType>::compute_zi(const int& iatom_mod, const int& jjz, con
|
|||
/* ----------------------------------------------------------------------
|
||||
compute Bi by summing conj(Ui)*Zi
|
||||
AoSoA data layout to take advantage of coalescing, avoiding warp
|
||||
divergence.
|
||||
divergence.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -1414,7 +1414,7 @@ void SNAKokkos<DeviceType>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy
|
|||
for(int ma = 0; ma <= j; ma++) {
|
||||
sum_tmp.x += dulist(jju_cache,iatom,jnbor,0).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,0).im * ylist(jju_half,jelem,iatom).im;
|
||||
sum_tmp.y += dulist(jju_cache,iatom,jnbor,1).re * ylist(jju_half,jelem,iatom).re +
|
||||
sum_tmp.y += dulist(jju_cache,iatom,jnbor,1).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,1).im * ylist(jju_half,jelem,iatom).im;
|
||||
sum_tmp.z += dulist(jju_cache,iatom,jnbor,2).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,2).im * ylist(jju_half,jelem,iatom).im;
|
||||
|
@ -1429,9 +1429,9 @@ void SNAKokkos<DeviceType>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy
|
|||
for(int ma = 0; ma < mb; ma++) {
|
||||
sum_tmp.x += dulist(jju_cache,iatom,jnbor,0).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,0).im * ylist(jju_half,jelem,iatom).im;
|
||||
sum_tmp.y += dulist(jju_cache,iatom,jnbor,1).re * ylist(jju_half,jelem,iatom).re +
|
||||
sum_tmp.y += dulist(jju_cache,iatom,jnbor,1).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,1).im * ylist(jju_half,jelem,iatom).im;
|
||||
sum_tmp.z += dulist(jju_cache,iatom,jnbor,2).re * ylist(jju_half,jelem,iatom).re +
|
||||
sum_tmp.z += dulist(jju_cache,iatom,jnbor,2).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,2).im * ylist(jju_half,jelem,iatom).im;
|
||||
jju_half++; jju_cache++;
|
||||
}
|
||||
|
@ -1439,9 +1439,9 @@ void SNAKokkos<DeviceType>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy
|
|||
//int ma = mb;
|
||||
sum_tmp.x += (dulist(jju_cache,iatom,jnbor,0).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,0).im * ylist(jju_half,jelem,iatom).im)*0.5;
|
||||
sum_tmp.y += (dulist(jju_cache,iatom,jnbor,1).re * ylist(jju_half,jelem,iatom).re +
|
||||
sum_tmp.y += (dulist(jju_cache,iatom,jnbor,1).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,1).im * ylist(jju_half,jelem,iatom).im)*0.5;
|
||||
sum_tmp.z += (dulist(jju_cache,iatom,jnbor,2).re * ylist(jju_half,jelem,iatom).re +
|
||||
sum_tmp.z += (dulist(jju_cache,iatom,jnbor,2).re * ylist(jju_half,jelem,iatom).re +
|
||||
dulist(jju_cache,iatom,jnbor,2).im * ylist(jju_half,jelem,iatom).im)*0.5;
|
||||
} // end if jeven
|
||||
|
||||
|
@ -2170,7 +2170,7 @@ double SNAKokkos<DeviceType>::memory_usage()
|
|||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
if (std::is_same<DeviceType,Kokkos::Cuda>::value) {
|
||||
|
||||
|
||||
auto natom_pad = (natom+32-1)/32;
|
||||
|
||||
bytes += natom * idxu_half_max * nelements * sizeof(double); // ulisttot_re
|
||||
|
|
|
@ -198,6 +198,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
|
|||
(FFT_SCALAR *) plan->scratch, plan->post_plan);
|
||||
|
||||
// scaling if required
|
||||
|
||||
if (flag == 1 && plan->scaled) {
|
||||
norm = plan->norm;
|
||||
num = plan->normnum;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -16,55 +16,57 @@
|
|||
|
||||
#include "pointers.h"
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
#else
|
||||
typedef double FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class GridComm : protected Pointers {
|
||||
public:
|
||||
GridComm(class LAMMPS *, MPI_Comm, int, int,
|
||||
GridComm(class LAMMPS *, MPI_Comm, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
GridComm(class LAMMPS *, MPI_Comm, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
GridComm(class LAMMPS *, MPI_Comm, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
~GridComm();
|
||||
void ghost_notify();
|
||||
int ghost_overlap();
|
||||
void setup();
|
||||
void forward_comm(class KSpace *, int);
|
||||
void reverse_comm(class KSpace *, int);
|
||||
double memory_usage();
|
||||
virtual ~GridComm();
|
||||
void setup(int &, int &);
|
||||
int ghost_adjacent();
|
||||
void forward_comm_kspace(class KSpace *, int, int, int,
|
||||
void *, void *, MPI_Datatype);
|
||||
void reverse_comm_kspace(class KSpace *, int, int, int,
|
||||
void *, void *, MPI_Datatype);
|
||||
|
||||
private:
|
||||
int me;
|
||||
int nforward,nreverse;
|
||||
MPI_Comm gridcomm;
|
||||
MPI_Request request;
|
||||
protected:
|
||||
int me,nprocs;
|
||||
int layout; // REGULAR or TILED
|
||||
MPI_Comm gridcomm; // communicator for this class
|
||||
// usually world, but MSM calls with subset
|
||||
|
||||
// in = inclusive indices of 3d grid chunk that I own
|
||||
// out = inclusive indices of 3d grid chunk I own plus ghosts I use
|
||||
// proc = 6 neighbor procs that surround me
|
||||
// ghost = # of my owned grid planes needed from me
|
||||
// by each of 6 neighbor procs to become their ghost planes
|
||||
// inputs from caller via constructor
|
||||
|
||||
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
|
||||
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
|
||||
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
|
||||
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
|
||||
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
|
||||
int nx,ny,nz; // size of global grid in all 3 dims
|
||||
int inxlo,inxhi; // inclusive extent of my grid chunk
|
||||
int inylo,inyhi; // 0 <= in <= N-1
|
||||
int inzlo,inzhi;
|
||||
int outxlo,outxhi; // inclusive extent of my grid chunk plus
|
||||
int outylo,outyhi; // ghost cells in all 6 directions
|
||||
int outzlo,outzhi; // lo indices can be < 0, hi indices can be >= N
|
||||
int fullxlo,fullxhi; // extent of grid chunk that caller stores
|
||||
int fullylo,fullyhi; // can be same as out indices or larger
|
||||
int fullzlo,fullzhi;
|
||||
|
||||
int nbuf;
|
||||
FFT_SCALAR *buf1,*buf2;
|
||||
// -------------------------------------------
|
||||
// internal variables for REGULAR layout
|
||||
// -------------------------------------------
|
||||
|
||||
int procxlo,procxhi; // 6 neighbor procs that adjoin me
|
||||
int procylo,procyhi; // not used for comm_style = tiled
|
||||
int proczlo,proczhi;
|
||||
|
||||
int ghostxlo,ghostxhi; // # of my owned grid planes needed
|
||||
int ghostylo,ghostyhi; // by neighobr procs in each dir as their ghost planes
|
||||
int ghostzlo,ghostzhi;
|
||||
|
||||
// swap = exchange of owned and ghost grid cells between 2 procs, including self
|
||||
|
||||
struct Swap {
|
||||
int sendproc; // proc to send to for forward comm
|
||||
|
@ -75,9 +77,130 @@ class GridComm : protected Pointers {
|
|||
int *unpacklist; // 3d array offsets to unpack
|
||||
};
|
||||
|
||||
int nswap;
|
||||
int nswap,maxswap;
|
||||
Swap *swap;
|
||||
|
||||
// -------------------------------------------
|
||||
// internal variables for TILED layout
|
||||
// -------------------------------------------
|
||||
|
||||
int *overlap_procs; // length of Nprocs in communicator
|
||||
MPI_Request *requests; // length of max messages this proc receives
|
||||
|
||||
// RCB tree of cut info
|
||||
// each proc contributes one value, except proc 0
|
||||
|
||||
struct RCBinfo {
|
||||
int dim; // 0,1,2 = which dim the cut is in
|
||||
int cut; // grid index of lowest cell in upper half of cut
|
||||
};
|
||||
|
||||
RCBinfo *rcbinfo;
|
||||
|
||||
// overlap = a proc whose owned cells overlap with my extended ghost box
|
||||
// includes overlaps across periodic boundaries, can also be self
|
||||
|
||||
struct Overlap {
|
||||
int proc; // proc whose owned cells overlap my ghost cells
|
||||
int box[6]; // box that overlaps otherproc's owned cells
|
||||
// this box is wholly contained within global grid
|
||||
int pbc[3]; // PBC offsets to convert box to a portion of my ghost box
|
||||
// my ghost box may extend beyond global grid
|
||||
};
|
||||
|
||||
int noverlap,maxoverlap;
|
||||
Overlap *overlap;
|
||||
|
||||
// request = sent to each proc whose owned cells overlap my ghost cells
|
||||
|
||||
struct Request {
|
||||
int sender; // sending proc
|
||||
int index; // index of overlap on sender
|
||||
int box[6]; // box that overlaps receiver's owned cells
|
||||
// wholly contained within global grid
|
||||
};
|
||||
|
||||
Request *srequest,*rrequest;
|
||||
|
||||
// response = reply from each proc whose owned cells overlap my ghost cells
|
||||
|
||||
struct Response {
|
||||
int index; // index of my overlap for the initial request
|
||||
int box[6]; // box that overlaps responder's owned cells
|
||||
// wholly contained within global grid
|
||||
// has to unwrapped by PBC to map to my ghost cells
|
||||
};
|
||||
|
||||
Response *sresponse,*rresponse;
|
||||
|
||||
// send = proc to send a subset of my owned cells to, for forward comm
|
||||
// for reverse comm, proc I receive ghost overlaps with my owned cells from
|
||||
// offset used in reverse comm to recv a message in middle of a large buffer
|
||||
|
||||
struct Send {
|
||||
int proc;
|
||||
int npack;
|
||||
int *packlist;
|
||||
int offset;
|
||||
};
|
||||
|
||||
// recv = proc to recv a subset of my ghost cells from, for forward comm
|
||||
// for reverse comm, proc I send a subset of my ghost cells to
|
||||
// offset used in forward comm to recv a message in middle of a large buffer
|
||||
|
||||
struct Recv {
|
||||
int proc;
|
||||
int nunpack;
|
||||
int *unpacklist;
|
||||
int offset;
|
||||
};
|
||||
|
||||
int adjacent; // 0 on a proc who receives ghosts from a non-neighbor proc
|
||||
|
||||
// copy = subset of my owned cells to copy into subset of my ghost cells
|
||||
// that describes forward comm, for reverse comm it is the opposite
|
||||
|
||||
struct Copy {
|
||||
int npack;
|
||||
int nunpack;
|
||||
int *packlist;
|
||||
int *unpacklist;
|
||||
};
|
||||
|
||||
int nsend,nrecv,ncopy;
|
||||
Send *send;
|
||||
Recv *recv;
|
||||
Copy *copy;
|
||||
|
||||
// -------------------------------------------
|
||||
// internal methods
|
||||
// -------------------------------------------
|
||||
|
||||
void initialize(MPI_Comm, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
virtual void setup_regular(int &, int &);
|
||||
virtual void setup_tiled(int &, int &);
|
||||
void ghost_box_drop(int *, int *);
|
||||
void box_drop_grid(int *, int, int, int &, int *);
|
||||
|
||||
int ghost_adjacent_regular();
|
||||
int ghost_adjacent_tiled();
|
||||
|
||||
void forward_comm_kspace_regular(class KSpace *, int, int, int,
|
||||
void *, void *, MPI_Datatype);
|
||||
void forward_comm_kspace_tiled(class KSpace *, int, int, int,
|
||||
void *, void *, MPI_Datatype);
|
||||
void reverse_comm_kspace_regular(class KSpace *, int, int, int,
|
||||
void *, void *, MPI_Datatype);
|
||||
void reverse_comm_kspace_tiled(class KSpace *, int, int, int,
|
||||
void *, void *, MPI_Datatype);
|
||||
|
||||
virtual void grow_swap();
|
||||
void grow_overlap();
|
||||
|
||||
int indices(int *&, int, int, int, int, int, int);
|
||||
};
|
||||
|
||||
|
|
|
@ -42,21 +42,24 @@ using namespace MathConst;
|
|||
|
||||
enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
|
||||
enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MSM::MSM(LAMMPS *lmp) : KSpace(lmp),
|
||||
factors(NULL), delxinv(NULL), delyinv(NULL), delzinv(NULL), nx_msm(NULL),
|
||||
ny_msm(NULL), nz_msm(NULL), nxlo_in(NULL), nylo_in(NULL), nzlo_in(NULL),
|
||||
nxhi_in(NULL), nyhi_in(NULL), nzhi_in(NULL), nxlo_out(NULL), nylo_out(NULL),
|
||||
nzlo_out(NULL), nxhi_out(NULL), nyhi_out(NULL), nzhi_out(NULL), ngrid(NULL),
|
||||
active_flag(NULL), alpha(NULL), betax(NULL), betay(NULL), betaz(NULL), peratom_allocate_flag(0),
|
||||
levels(0), world_levels(NULL), qgrid(NULL), egrid(NULL), v0grid(NULL), v1grid(NULL),
|
||||
v2grid(NULL), v3grid(NULL), v4grid(NULL), v5grid(NULL), g_direct(NULL),
|
||||
v0_direct(NULL), v1_direct(NULL), v2_direct(NULL), v3_direct(NULL), v4_direct(NULL),
|
||||
v5_direct(NULL), g_direct_top(NULL), v0_direct_top(NULL), v1_direct_top(NULL),
|
||||
v2_direct_top(NULL), v3_direct_top(NULL), v4_direct_top(NULL), v5_direct_top(NULL),
|
||||
phi1d(NULL), dphi1d(NULL), procneigh_levels(NULL), cg(NULL), cg_peratom(NULL),
|
||||
cg_all(NULL), cg_peratom_all(NULL), part2grid(NULL), boxlo(NULL)
|
||||
MSM::MSM(LAMMPS *lmp)
|
||||
: KSpace(lmp),
|
||||
factors(NULL), delxinv(NULL), delyinv(NULL), delzinv(NULL), nx_msm(NULL),
|
||||
ny_msm(NULL), nz_msm(NULL), nxlo_in(NULL), nylo_in(NULL), nzlo_in(NULL),
|
||||
nxhi_in(NULL), nyhi_in(NULL), nzhi_in(NULL), nxlo_out(NULL), nylo_out(NULL),
|
||||
nzlo_out(NULL), nxhi_out(NULL), nyhi_out(NULL), nzhi_out(NULL), ngrid(NULL),
|
||||
active_flag(NULL), alpha(NULL), betax(NULL), betay(NULL), betaz(NULL),
|
||||
peratom_allocate_flag(0),levels(0),world_levels(NULL),qgrid(NULL),egrid(NULL),
|
||||
v0grid(NULL), v1grid(NULL),v2grid(NULL),v3grid(NULL),v4grid(NULL),v5grid(NULL),
|
||||
g_direct(NULL),v0_direct(NULL),v1_direct(NULL),v2_direct(NULL),v3_direct(NULL),
|
||||
v4_direct(NULL),v5_direct(NULL),g_direct_top(NULL),v0_direct_top(NULL),
|
||||
v1_direct_top(NULL),v2_direct_top(NULL),v3_direct_top(NULL),v4_direct_top(NULL),
|
||||
v5_direct_top(NULL),phi1d(NULL),dphi1d(NULL),procneigh_levels(NULL),gcall(NULL),
|
||||
gc(NULL),gcall_buf1(NULL),gcall_buf2(NULL),gc_buf1(NULL),gc_buf2(NULL),
|
||||
ngc_buf1(NULL),ngc_buf2(NULL),part2grid(NULL),boxlo(NULL)
|
||||
{
|
||||
msmflag = 1;
|
||||
|
||||
|
@ -117,6 +120,7 @@ MSM::~MSM()
|
|||
delete [] factors;
|
||||
deallocate();
|
||||
if (peratom_allocate_flag) deallocate_peratom();
|
||||
deallocate_levels();
|
||||
memory->destroy(part2grid);
|
||||
memory->destroy(g_direct);
|
||||
memory->destroy(g_direct_top);
|
||||
|
@ -132,7 +136,6 @@ MSM::~MSM()
|
|||
memory->destroy(v3_direct_top);
|
||||
memory->destroy(v4_direct_top);
|
||||
memory->destroy(v5_direct_top);
|
||||
deallocate_levels();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -397,17 +400,6 @@ void MSM::setup()
|
|||
// don't invoke allocate_peratom(), compute() will allocate when needed
|
||||
|
||||
allocate();
|
||||
|
||||
// setup commgrid
|
||||
|
||||
cg_all->ghost_notify();
|
||||
cg_all->setup();
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
cg[n]->ghost_notify();
|
||||
cg[n]->setup();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -448,16 +440,7 @@ void MSM::compute(int eflag, int vflag)
|
|||
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (vflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom_all->ghost_notify();
|
||||
cg_peratom_all->setup();
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
cg_peratom[n]->ghost_notify();
|
||||
cg_peratom[n]->setup();
|
||||
}
|
||||
}
|
||||
if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// convert atoms from box to lamda coords
|
||||
|
||||
|
@ -483,7 +466,8 @@ void MSM::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d grid
|
||||
|
||||
current_level = 0;
|
||||
cg_all->reverse_comm(this,REVERSE_RHO);
|
||||
gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// forward communicate charge density values to fill ghost grid points
|
||||
// compute direct sum interaction and then restrict to coarser grid
|
||||
|
@ -491,8 +475,8 @@ void MSM::compute(int eflag, int vflag)
|
|||
for (int n=0; n<=levels-2; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
current_level = n;
|
||||
cg[n]->forward_comm(this,FORWARD_RHO);
|
||||
|
||||
gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
direct(n);
|
||||
restriction(n);
|
||||
}
|
||||
|
@ -503,11 +487,18 @@ void MSM::compute(int eflag, int vflag)
|
|||
if (active_flag[levels-1]) {
|
||||
if (domain->nonperiodic) {
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
gc[levels-1]->
|
||||
forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
direct_top(levels-1);
|
||||
cg[levels-1]->reverse_comm(this,REVERSE_AD);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
|
||||
} else {
|
||||
// Here using MPI_Allreduce is cheaper than using commgrid
|
||||
grid_swap_forward(levels-1,qgrid[levels-1]);
|
||||
|
@ -515,7 +506,9 @@ void MSM::compute(int eflag, int vflag)
|
|||
grid_swap_reverse(levels-1,egrid[levels-1]);
|
||||
current_level = levels-1;
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -527,24 +520,28 @@ void MSM::compute(int eflag, int vflag)
|
|||
prolongation(n);
|
||||
|
||||
current_level = n;
|
||||
cg[n]->reverse_comm(this,REVERSE_AD);
|
||||
gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
|
||||
// extra per-atom virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
}
|
||||
|
||||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
current_level = 0;
|
||||
cg_all->forward_comm(this,FORWARD_AD);
|
||||
gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// calculate the force on my particles (interpolation)
|
||||
|
||||
|
@ -603,8 +600,7 @@ void MSM::compute(int eflag, int vflag)
|
|||
|
||||
// convert atoms back from lamda to box coords
|
||||
|
||||
if (triclinic)
|
||||
domain->lamda2x(atom->nlocal);
|
||||
if (triclinic) domain->lamda2x(atom->nlocal);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -621,15 +617,18 @@ void MSM::allocate()
|
|||
|
||||
// commgrid using all processors for finest grid level
|
||||
|
||||
int (*procneigh_all)[2] = comm->procneigh;
|
||||
gcall = new GridComm(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
|
||||
nxlo_in[0],nxhi_in[0],nylo_in[0],
|
||||
nyhi_in[0],nzlo_in[0],nzhi_in[0],
|
||||
nxlo_out_all,nxhi_out_all,nylo_out_all,
|
||||
nyhi_out_all,nzlo_out_all,nzhi_out_all,
|
||||
nxlo_out[0],nxhi_out[0],nylo_out[0],
|
||||
nyhi_out[0],nzlo_out[0],nzhi_out[0]);
|
||||
|
||||
|
||||
cg_all = new GridComm(lmp,world,1,1,
|
||||
nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
|
||||
nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
|
||||
nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
|
||||
procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
|
||||
procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
|
||||
gcall->setup(ngcall_buf1,ngcall_buf2);
|
||||
npergrid = 1;
|
||||
memory->create(gcall_buf1,npergrid*ngcall_buf1,"msm:gcall_buf1");
|
||||
memory->create(gcall_buf2,npergrid*ngcall_buf2,"msm:gcall_buf2");
|
||||
|
||||
// allocate memory for each grid level
|
||||
|
||||
|
@ -644,12 +643,23 @@ void MSM::allocate()
|
|||
|
||||
if (active_flag[n]) {
|
||||
int **procneigh = procneigh_levels[n];
|
||||
cg[n] = new GridComm(lmp,world_levels[n],1,1,
|
||||
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
|
||||
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
} else cg[n] = nullptr;
|
||||
gc[n] = new GridComm(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
|
||||
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
|
||||
nzlo_in[n],nzhi_in[n],
|
||||
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],
|
||||
nzlo_out[n],nzhi_out[n],
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
|
||||
gc[n]->setup(ngc_buf1[n],ngc_buf2[n]);
|
||||
npergrid = 1;
|
||||
memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"msm:gc_buf1");
|
||||
memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"msm:gc_buf2");
|
||||
|
||||
} else {
|
||||
gc[n] = nullptr;
|
||||
gc_buf1[n] = gc_buf2[n] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -662,8 +672,11 @@ void MSM::deallocate()
|
|||
memory->destroy2d_offset(phi1d,-order_allocated);
|
||||
memory->destroy2d_offset(dphi1d,-order_allocated);
|
||||
|
||||
if (cg_all) delete cg_all;
|
||||
cg_all = nullptr;
|
||||
if (gcall) delete gcall;
|
||||
memory->destroy(gcall_buf1);
|
||||
memory->destroy(gcall_buf2);
|
||||
gcall = nullptr;
|
||||
gcall_buf1 = gcall_buf2 = nullptr;
|
||||
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (qgrid[n])
|
||||
|
@ -676,10 +689,13 @@ void MSM::deallocate()
|
|||
if (world_levels[n] != MPI_COMM_NULL)
|
||||
MPI_Comm_free(&world_levels[n]);
|
||||
|
||||
if (cg) {
|
||||
if (cg[n]) {
|
||||
delete cg[n];
|
||||
cg[n] = nullptr;
|
||||
if (gc) {
|
||||
if (gc[n]) {
|
||||
delete gc[n];
|
||||
memory->destroy(gc_buf1[n]);
|
||||
memory->destroy(gc_buf2[n]);
|
||||
gc[n] = nullptr;
|
||||
gc_buf1[n] = gc_buf2[n] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -695,15 +711,11 @@ void MSM::allocate_peratom()
|
|||
|
||||
// create commgrid object for per-atom virial using all processors
|
||||
|
||||
int (*procneigh_all)[2] = comm->procneigh;
|
||||
|
||||
cg_peratom_all =
|
||||
new GridComm(lmp,world,6,6,
|
||||
nxlo_in[0],nxhi_in[0],nylo_in[0],nyhi_in[0],nzlo_in[0],nzhi_in[0],
|
||||
nxlo_out_all,nxhi_out_all,nylo_out_all,nyhi_out_all,nzlo_out_all,nzhi_out_all,
|
||||
nxlo_out[0],nxhi_out[0],nylo_out[0],nyhi_out[0],nzlo_out[0],nzhi_out[0],
|
||||
procneigh_all[0][0],procneigh_all[0][1],procneigh_all[1][0],
|
||||
procneigh_all[1][1],procneigh_all[2][0],procneigh_all[2][1]);
|
||||
npergrid = 6;
|
||||
memory->destroy(gcall_buf1);
|
||||
memory->destroy(gcall_buf2);
|
||||
memory->create(gcall_buf1,npergrid*ngcall_buf1,"pppm:gcall_buf1");
|
||||
memory->create(gcall_buf2,npergrid*ngcall_buf2,"pppm:gcall_buf2");
|
||||
|
||||
// allocate memory for each grid level
|
||||
|
||||
|
@ -724,13 +736,11 @@ void MSM::allocate_peratom()
|
|||
// create commgrid object for per-atom virial
|
||||
|
||||
if (active_flag[n]) {
|
||||
int **procneigh = procneigh_levels[n];
|
||||
cg_peratom[n] =
|
||||
new GridComm(lmp,world_levels[n],6,6,
|
||||
nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],nzlo_in[n],nzhi_in[n],
|
||||
nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],nzlo_out[n],nzhi_out[n],
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
npergrid = 6;
|
||||
memory->destroy(gc_buf1[n]);
|
||||
memory->destroy(gc_buf2[n]);
|
||||
memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"pppm:gc_buf1");
|
||||
memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"pppm:gc_buf2");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -743,8 +753,6 @@ void MSM::deallocate_peratom()
|
|||
{
|
||||
peratom_allocate_flag = 0;
|
||||
|
||||
if (cg_peratom_all) delete cg_peratom_all;
|
||||
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (v0grid[n])
|
||||
memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
|
||||
|
@ -758,9 +766,6 @@ void MSM::deallocate_peratom()
|
|||
memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
|
||||
if (v5grid[n])
|
||||
memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
|
||||
|
||||
if (cg_peratom)
|
||||
if (cg_peratom[n]) delete cg_peratom[n];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -772,8 +777,11 @@ void MSM::allocate_levels()
|
|||
{
|
||||
ngrid = new int[levels];
|
||||
|
||||
cg = new GridComm*[levels];
|
||||
cg_peratom = new GridComm*[levels];
|
||||
gc = new GridComm*[levels];
|
||||
gc_buf1 = new double*[levels];
|
||||
gc_buf2 = new double*[levels];
|
||||
ngc_buf1 = new int[levels];
|
||||
ngc_buf2 = new int[levels];
|
||||
|
||||
memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
|
||||
world_levels = new MPI_Comm[levels];
|
||||
|
@ -819,9 +827,8 @@ void MSM::allocate_levels()
|
|||
v5grid = new double***[levels];
|
||||
|
||||
for (int n=0; n<levels; n++) {
|
||||
cg[n] = NULL;
|
||||
gc[n] = NULL;
|
||||
world_levels[n] = MPI_COMM_NULL;
|
||||
cg_peratom[n] = NULL;
|
||||
|
||||
qgrid[n] = NULL;
|
||||
egrid[n] = NULL;
|
||||
|
@ -833,7 +840,6 @@ void MSM::allocate_levels()
|
|||
v4grid[n] = NULL;
|
||||
v5grid[n] = NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -842,15 +848,18 @@ void MSM::allocate_levels()
|
|||
|
||||
void MSM::deallocate_levels()
|
||||
{
|
||||
if (cg) deallocate();
|
||||
delete [] ngrid;
|
||||
ngrid = nullptr;
|
||||
|
||||
memory->destroy(procneigh_levels);
|
||||
delete [] world_levels;
|
||||
delete [] active_flag;
|
||||
delete [] cg;
|
||||
delete [] cg_peratom;
|
||||
|
||||
delete [] gc;
|
||||
delete [] gc_buf1;
|
||||
delete [] gc_buf2;
|
||||
delete [] ngc_buf1;
|
||||
delete [] ngc_buf2;
|
||||
|
||||
delete [] alpha;
|
||||
delete [] betax;
|
||||
|
@ -893,8 +902,8 @@ void MSM::deallocate_levels()
|
|||
|
||||
world_levels = nullptr;
|
||||
active_flag = nullptr;
|
||||
cg = nullptr;
|
||||
cg_peratom = nullptr;
|
||||
gc = nullptr;
|
||||
gc_buf1 = gc_buf2 = nullptr;
|
||||
|
||||
alpha = nullptr;
|
||||
betax = nullptr;
|
||||
|
@ -1377,7 +1386,7 @@ void MSM::set_proc_grid(int n)
|
|||
|
||||
// define a new MPI communicator for this grid level that only includes active procs
|
||||
|
||||
if(world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
|
||||
if (world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
|
||||
MPI_Comm_split(world,color,me,&world_levels[n]);
|
||||
|
||||
if (!active_flag[n]) return;
|
||||
|
@ -2434,6 +2443,7 @@ void MSM::prolongation(int n)
|
|||
be cheaper than using nearest-neighbor communication (commgrid), right
|
||||
now only works for periodic boundary conditions
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MSM::grid_swap_forward(int n, double*** &gridn)
|
||||
{
|
||||
double ***gridn_tmp;
|
||||
|
@ -2533,32 +2543,31 @@ void MSM::grid_swap_reverse(int n, double*** &gridn)
|
|||
pack own values to buf to send to another proc (used by commgrid)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
|
||||
void MSM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
double *buf = (double *) vbuf;
|
||||
|
||||
int n = current_level;
|
||||
|
||||
double ***qgridn = qgrid[n];
|
||||
double ***egridn = egrid[n];
|
||||
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
|
||||
int k = 0;
|
||||
|
||||
if (flag == FORWARD_RHO) {
|
||||
double ***qgridn = qgrid[n];
|
||||
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++) {
|
||||
buf[k++] = qsrc[list[i]];
|
||||
}
|
||||
} else if (flag == FORWARD_AD) {
|
||||
double ***egridn = egrid[n];
|
||||
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
buf[i] = src[list[i]];
|
||||
} else if (flag == FORWARD_AD_PERATOM) {
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
|
@ -2580,32 +2589,31 @@ void MSM::pack_forward(int flag, double *buf, int nlist, int *list)
|
|||
unpack another proc's own values from buf and set own ghost values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
|
||||
void MSM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
double *buf = (double *) vbuf;
|
||||
|
||||
int n = current_level;
|
||||
|
||||
double ***qgridn = qgrid[n];
|
||||
double ***egridn = egrid[n];
|
||||
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
|
||||
int k = 0;
|
||||
|
||||
if (flag == FORWARD_RHO) {
|
||||
double ***qgridn = qgrid[n];
|
||||
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++) {
|
||||
dest[list[i]] = buf[k++];
|
||||
}
|
||||
} else if (flag == FORWARD_AD) {
|
||||
double ***egridn = egrid[n];
|
||||
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
dest[list[i]] = buf[k++];
|
||||
} else if (flag == FORWARD_AD_PERATOM) {
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
|
@ -2627,32 +2635,31 @@ void MSM::unpack_forward(int flag, double *buf, int nlist, int *list)
|
|||
pack ghost values into buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
|
||||
void MSM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
double *buf = (double *) vbuf;
|
||||
|
||||
int n = current_level;
|
||||
|
||||
double ***qgridn = qgrid[n];
|
||||
double ***egridn = egrid[n];
|
||||
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
|
||||
int k = 0;
|
||||
|
||||
if (flag == REVERSE_RHO) {
|
||||
double ***qgridn = qgrid[n];
|
||||
double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++) {
|
||||
buf[k++] = qsrc[list[i]];
|
||||
}
|
||||
} else if (flag == REVERSE_AD) {
|
||||
double ***egridn = egrid[n];
|
||||
double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
buf[i] = src[list[i]];
|
||||
} else if (flag == REVERSE_AD_PERATOM) {
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
|
@ -2674,32 +2681,31 @@ void MSM::pack_reverse(int flag, double *buf, int nlist, int *list)
|
|||
unpack another proc's ghost values from buf and add to own values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MSM::unpack_reverse(int flag, double *buf, int nlist, int *list)
|
||||
void MSM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
double *buf = (double *) vbuf;
|
||||
|
||||
int n = current_level;
|
||||
|
||||
double ***qgridn = qgrid[n];
|
||||
double ***egridn = egrid[n];
|
||||
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
|
||||
int k = 0;
|
||||
|
||||
if (flag == REVERSE_RHO) {
|
||||
double ***qgridn = qgrid[n];
|
||||
double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++) {
|
||||
dest[list[i]] += buf[k++];
|
||||
}
|
||||
} else if (flag == REVERSE_AD) {
|
||||
double ***egridn = egrid[n];
|
||||
double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
dest[list[i]] += buf[k++];
|
||||
} else if (flag == REVERSE_AD_PERATOM) {
|
||||
double ***v0gridn = v0grid[n];
|
||||
double ***v1gridn = v1grid[n];
|
||||
double ***v2gridn = v2grid[n];
|
||||
double ***v3gridn = v3grid[n];
|
||||
double ***v4gridn = v4grid[n];
|
||||
double ***v5gridn = v5grid[n];
|
||||
double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
|
||||
|
@ -3416,3 +3422,24 @@ void MSM::get_virial_direct_top(int n)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of local arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double MSM::memory_usage()
|
||||
{
|
||||
double bytes = 0;
|
||||
|
||||
// NOTE: Stan, fill in other memory allocations here
|
||||
|
||||
// all GridComm bufs
|
||||
|
||||
bytes += (ngcall_buf1 + ngcall_buf2) * npergrid * sizeof(double);
|
||||
|
||||
for (int n=0; n<levels; n++)
|
||||
if (active_flag[n])
|
||||
bytes += (ngc_buf1[n] + ngc_buf2[n]) * npergrid * sizeof(double);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ class MSM : public KSpace {
|
|||
void setup();
|
||||
virtual void settings(int, char **);
|
||||
virtual void compute(int, int);
|
||||
virtual double memory_usage();
|
||||
|
||||
protected:
|
||||
int me,nprocs;
|
||||
|
@ -79,16 +80,21 @@ class MSM : public KSpace {
|
|||
int procgrid[3]; // procs assigned in each dim of 3d grid
|
||||
int myloc[3]; // which proc I am in each dim
|
||||
int ***procneigh_levels; // my 6 neighboring procs, 0/1 = left/right
|
||||
class GridComm **cg;
|
||||
class GridComm **cg_peratom;
|
||||
class GridComm *cg_all;
|
||||
class GridComm *cg_peratom_all;
|
||||
|
||||
class GridComm *gcall; // GridComm class for finest level grid
|
||||
class GridComm **gc; // GridComm classes for each hierarchical level
|
||||
|
||||
double *gcall_buf1,*gcall_buf2;
|
||||
double **gc_buf1,**gc_buf2;
|
||||
int ngcall_buf1,ngcall_buf2,npergrid;
|
||||
int *ngc_buf1,*ngc_buf2;
|
||||
|
||||
int current_level;
|
||||
|
||||
int **part2grid; // storage for particle -> grid mapping
|
||||
int nmax;
|
||||
|
||||
int triclinic;
|
||||
double *boxlo;
|
||||
|
||||
void set_grid_global();
|
||||
|
@ -126,15 +132,12 @@ class MSM : public KSpace {
|
|||
void get_g_direct_top(int);
|
||||
void get_virial_direct_top(int);
|
||||
|
||||
// triclinic
|
||||
|
||||
int triclinic;
|
||||
|
||||
// grid communication
|
||||
void pack_forward(int, double *, int, int *);
|
||||
void unpack_forward(int, double *, int, int *);
|
||||
void pack_reverse(int, double *, int, int *);
|
||||
void unpack_reverse(int, double *, int, int *);
|
||||
|
||||
void pack_forward_grid(int, void *, int, int *);
|
||||
void unpack_forward_grid(int, void *, int, int *);
|
||||
void pack_reverse_grid(int, void *, int, int *);
|
||||
void unpack_reverse_grid(int, void *, int, int *);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -91,17 +91,7 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (vflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom_all->ghost_notify();
|
||||
cg_peratom_all->setup();
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
cg_peratom[n]->ghost_notify();
|
||||
cg_peratom[n]->setup();
|
||||
}
|
||||
peratom_allocate_flag = 1;
|
||||
}
|
||||
if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// extend size of per-atom arrays if necessary
|
||||
|
||||
|
@ -171,7 +161,8 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d grid
|
||||
|
||||
current_level = 0;
|
||||
cg_all->reverse_comm(this,REVERSE_RHO);
|
||||
gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// forward communicate charge density values to fill ghost grid points
|
||||
// compute direct sum interaction and then restrict to coarser grid
|
||||
|
@ -179,24 +170,30 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
for (int n=0; n<=levels-2; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
current_level = n;
|
||||
cg[n]->forward_comm(this,FORWARD_RHO);
|
||||
|
||||
gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
direct(n);
|
||||
restriction(n);
|
||||
}
|
||||
|
||||
|
||||
// compute direct interaction for top grid level for non-periodic
|
||||
// and for second from top grid level for periodic
|
||||
|
||||
if (active_flag[levels-1]) {
|
||||
if (domain->nonperiodic) {
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
gc[levels-1]->
|
||||
forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
direct_top(levels-1);
|
||||
cg[levels-1]->reverse_comm(this,REVERSE_AD);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
|
||||
} else {
|
||||
// Here using MPI_Allreduce is cheaper than using commgrid
|
||||
grid_swap_forward(levels-1,qgrid[levels-1]);
|
||||
|
@ -204,7 +201,9 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
grid_swap_reverse(levels-1,egrid[levels-1]);
|
||||
current_level = levels-1;
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -216,24 +215,28 @@ void MSMCG::compute(int eflag, int vflag)
|
|||
prolongation(n);
|
||||
|
||||
current_level = n;
|
||||
cg[n]->reverse_comm(this,REVERSE_AD);
|
||||
gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
|
||||
// extra per-atom virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
}
|
||||
|
||||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
current_level = 0;
|
||||
cg_all->forward_comm(this,FORWARD_AD);
|
||||
gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// calculate the force on my particles (interpolation)
|
||||
|
||||
|
@ -536,6 +539,9 @@ void MSMCG::fieldforce_peratom()
|
|||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of local arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
double MSMCG::memory_usage()
|
||||
{
|
||||
|
|
|
@ -70,11 +70,12 @@ PPPM::PPPM(LAMMPS *lmp) : KSpace(lmp),
|
|||
u_brick(NULL), v0_brick(NULL), v1_brick(NULL), v2_brick(NULL), v3_brick(NULL),
|
||||
v4_brick(NULL), v5_brick(NULL), greensfn(NULL), vg(NULL), fkx(NULL), fky(NULL),
|
||||
fkz(NULL), density_fft(NULL), work1(NULL), work2(NULL), gf_b(NULL), rho1d(NULL),
|
||||
rho_coeff(NULL), drho1d(NULL), drho_coeff(NULL), sf_precoeff1(NULL), sf_precoeff2(NULL),
|
||||
sf_precoeff3(NULL), sf_precoeff4(NULL), sf_precoeff5(NULL), sf_precoeff6(NULL),
|
||||
acons(NULL), density_A_brick(NULL), density_B_brick(NULL), density_A_fft(NULL),
|
||||
density_B_fft(NULL), fft1(NULL), fft2(NULL), remap(NULL), cg(NULL), cg_peratom(NULL),
|
||||
part2grid(NULL), boxlo(NULL)
|
||||
rho_coeff(NULL), drho1d(NULL), drho_coeff(NULL),
|
||||
sf_precoeff1(NULL), sf_precoeff2(NULL), sf_precoeff3(NULL),
|
||||
sf_precoeff4(NULL), sf_precoeff5(NULL), sf_precoeff6(NULL),
|
||||
acons(NULL), fft1(NULL), fft2(NULL), remap(NULL), gc(NULL),
|
||||
gc_buf1(NULL), gc_buf2(NULL), density_A_brick(NULL), density_B_brick(NULL), density_A_fft(NULL),
|
||||
density_B_fft(NULL), part2grid(NULL), boxlo(NULL)
|
||||
{
|
||||
peratom_allocate_flag = 0;
|
||||
group_allocate_flag = 0;
|
||||
|
@ -117,8 +118,8 @@ PPPM::PPPM(LAMMPS *lmp) : KSpace(lmp),
|
|||
|
||||
fft1 = fft2 = NULL;
|
||||
remap = NULL;
|
||||
cg = NULL;
|
||||
cg_peratom = NULL;
|
||||
gc = NULL;
|
||||
gc_buf1 = gc_buf2 = NULL;
|
||||
|
||||
nmax = 0;
|
||||
part2grid = NULL;
|
||||
|
@ -205,9 +206,6 @@ void PPPM::init()
|
|||
"slab correction");
|
||||
if (domain->dimension == 2)
|
||||
error->all(FLERR,"Cannot use PPPM with 2d simulation");
|
||||
if (comm->style != 0)
|
||||
error->universe_all(FLERR,"PPPM can only currently be used with "
|
||||
"comm_style brick");
|
||||
|
||||
if (!atom->q_flag)
|
||||
error->all(FLERR,"Kspace style requires atom attribute q");
|
||||
|
@ -297,9 +295,7 @@ void PPPM::init()
|
|||
// or overlap is allowed, then done
|
||||
// else reduce order and try again
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
|
||||
GridComm *cgtmp = NULL;
|
||||
GridComm *gctmp = NULL;
|
||||
int iteration = 0;
|
||||
|
||||
while (order >= minorder) {
|
||||
|
@ -312,24 +308,24 @@ void PPPM::init()
|
|||
set_grid_local();
|
||||
if (overlap_allowed) break;
|
||||
|
||||
cgtmp = new GridComm(lmp,world,1,1,
|
||||
gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
cgtmp->ghost_notify();
|
||||
if (!cgtmp->ghost_overlap()) break;
|
||||
delete cgtmp;
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
int tmp1,tmp2;
|
||||
gctmp->setup(tmp1,tmp2);
|
||||
if (gctmp->ghost_adjacent()) break;
|
||||
delete gctmp;
|
||||
|
||||
order--;
|
||||
iteration++;
|
||||
}
|
||||
|
||||
if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
|
||||
if (!overlap_allowed && cgtmp->ghost_overlap())
|
||||
if (!overlap_allowed && !gctmp->ghost_adjacent())
|
||||
error->all(FLERR,"PPPM grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
if (cgtmp) delete cgtmp;
|
||||
if (gctmp) delete gctmp;
|
||||
|
||||
// adjust g_ewald
|
||||
|
||||
|
@ -363,8 +359,6 @@ void PPPM::init()
|
|||
// don't invoke allocate peratom() or group(), will be allocated when needed
|
||||
|
||||
allocate();
|
||||
cg->ghost_notify();
|
||||
cg->setup();
|
||||
|
||||
// pre-compute Green's function denomiator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -578,11 +572,9 @@ void PPPM::setup_grid()
|
|||
|
||||
allocate();
|
||||
|
||||
cg->ghost_notify();
|
||||
if (overlap_allowed == 0 && cg->ghost_overlap())
|
||||
if (!overlap_allowed && !gc->ghost_adjacent())
|
||||
error->all(FLERR,"PPPM grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
cg->setup();
|
||||
|
||||
// pre-compute Green's function denomiator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -591,7 +583,7 @@ void PPPM::setup_grid()
|
|||
if (differentiation_flag == 1) compute_sf_precoeff();
|
||||
compute_rho_coeff();
|
||||
|
||||
// pre-compute volume-dependent coeffs
|
||||
// pre-compute volume-dependent coeffs for portion of grid I now own
|
||||
|
||||
setup();
|
||||
}
|
||||
|
@ -609,11 +601,7 @@ void PPPM::compute(int eflag, int vflag)
|
|||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// if atom count has changed, update qsum and qsqsum
|
||||
|
||||
|
@ -652,7 +640,8 @@ void PPPM::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -665,16 +654,22 @@ void PPPM::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
|
||||
else cg->forward_comm(this,FORWARD_IK);
|
||||
if (differentiation_flag == 1)
|
||||
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else
|
||||
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
if (differentiation_flag == 1 && vflag_atom)
|
||||
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else if (differentiation_flag == 0)
|
||||
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
// calculate the force on my particles
|
||||
|
@ -821,21 +816,19 @@ void PPPM::allocate()
|
|||
1,0,0,FFT_PRECISION,collective_flag);
|
||||
|
||||
// create ghost grid object for rho and electric field communication
|
||||
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
gc = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
if (differentiation_flag == 1)
|
||||
cg = new GridComm(lmp,world,1,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
else
|
||||
cg = new GridComm(lmp,world,3,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
gc->setup(ngc_buf1,ngc_buf2);
|
||||
|
||||
if (differentiation_flag) npergrid = 1;
|
||||
else npergrid = 3;
|
||||
|
||||
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
|
||||
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -886,7 +879,9 @@ void PPPM::deallocate()
|
|||
delete fft1;
|
||||
delete fft2;
|
||||
delete remap;
|
||||
delete cg;
|
||||
delete gc;
|
||||
memory->destroy(gc_buf1);
|
||||
memory->destroy(gc_buf2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -915,24 +910,16 @@ void PPPM::allocate_peratom()
|
|||
memory->create3d_offset(v5_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"pppm:v5_brick");
|
||||
|
||||
// create ghost grid object for rho and electric field communication
|
||||
// use same GC ghost grid object for peratom grid communication
|
||||
// but need to reallocate a larger gc_buf1 and gc_buf2
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
if (differentiation_flag) npergrid = 6;
|
||||
else npergrid = 7;
|
||||
|
||||
if (differentiation_flag == 1)
|
||||
cg_peratom =
|
||||
new GridComm(lmp,world,6,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
else
|
||||
cg_peratom =
|
||||
new GridComm(lmp,world,7,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
memory->destroy(gc_buf1);
|
||||
memory->destroy(gc_buf2);
|
||||
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
|
||||
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -952,8 +939,6 @@ void PPPM::deallocate_peratom()
|
|||
|
||||
if (differentiation_flag != 1)
|
||||
memory->destroy3d_offset(u_brick,nzlo_out,nylo_out,nxlo_out);
|
||||
|
||||
delete cg_peratom;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -1002,7 +987,8 @@ void PPPM::set_grid_global()
|
|||
int count = 0;
|
||||
while (1) {
|
||||
|
||||
// set grid dimension
|
||||
// set grid dimensions
|
||||
|
||||
nx_pppm = static_cast<int> (xprd/h_x);
|
||||
ny_pppm = static_cast<int> (yprd/h_y);
|
||||
nz_pppm = static_cast<int> (zprd_slab/h_z);
|
||||
|
@ -1011,31 +997,16 @@ void PPPM::set_grid_global()
|
|||
if (ny_pppm <= 1) ny_pppm = 2;
|
||||
if (nz_pppm <= 1) nz_pppm = 2;
|
||||
|
||||
//set local grid dimension
|
||||
int npey_fft,npez_fft;
|
||||
if (nz_pppm >= nprocs) {
|
||||
npey_fft = 1;
|
||||
npez_fft = nprocs;
|
||||
} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
|
||||
|
||||
int me_y = me % npey_fft;
|
||||
int me_z = me / npey_fft;
|
||||
|
||||
nxlo_fft = 0;
|
||||
nxhi_fft = nx_pppm - 1;
|
||||
nylo_fft = me_y*ny_pppm/npey_fft;
|
||||
nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
|
||||
nzlo_fft = me_z*nz_pppm/npez_fft;
|
||||
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
|
||||
// estimate Kspace force error
|
||||
|
||||
double df_kspace = compute_df_kspace();
|
||||
|
||||
count++;
|
||||
|
||||
// break loop if the accuracy has been reached or
|
||||
// too many loops have been performed
|
||||
|
||||
count++;
|
||||
if (df_kspace <= accuracy) break;
|
||||
|
||||
if (count > 500) error->all(FLERR, "Could not compute grid size");
|
||||
h *= 0.95;
|
||||
h_x = h_y = h_z = h;
|
||||
|
@ -1163,7 +1134,11 @@ double PPPM::compute_df_kspace()
|
|||
|
||||
double PPPM::compute_qopt()
|
||||
{
|
||||
double qopt = 0.0;
|
||||
int k,l,m,nx,ny,nz;
|
||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||
double u1,u2,sqk;
|
||||
double sum1,sum2,sum3,sum4,dot2;
|
||||
|
||||
double *prd = domain->prd;
|
||||
|
||||
const double xprd = prd[0];
|
||||
|
@ -1176,67 +1151,69 @@ double PPPM::compute_qopt()
|
|||
const double unitky = (MY_2PI/yprd);
|
||||
const double unitkz = (MY_2PI/zprd_slab);
|
||||
|
||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||
double u1, u2, sqk;
|
||||
double sum1,sum2,sum3,sum4,dot2;
|
||||
|
||||
int k,l,m,nx,ny,nz;
|
||||
const int twoorder = 2*order;
|
||||
|
||||
for (m = nzlo_fft; m <= nzhi_fft; m++) {
|
||||
// loop over entire FFT grid
|
||||
// each proc calculates contributions from every Pth grid point
|
||||
|
||||
bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
|
||||
int nxy_pppm = nx_pppm * ny_pppm;
|
||||
|
||||
double qopt = 0.0;
|
||||
|
||||
for (bigint i = me; i < ngridtotal; i += nprocs) {
|
||||
k = i % nx_pppm;
|
||||
l = (i/nx_pppm) % ny_pppm;
|
||||
m = i / nxy_pppm;
|
||||
|
||||
const int kper = k - nx_pppm*(2*k/nx_pppm);
|
||||
const int lper = l - ny_pppm*(2*l/ny_pppm);
|
||||
const int mper = m - nz_pppm*(2*m/nz_pppm);
|
||||
|
||||
for (l = nylo_fft; l <= nyhi_fft; l++) {
|
||||
const int lper = l - ny_pppm*(2*l/ny_pppm);
|
||||
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||
if (sqk == 0.0) continue;
|
||||
|
||||
for (k = nxlo_fft; k <= nxhi_fft; k++) {
|
||||
const int kper = k - nx_pppm*(2*k/nx_pppm);
|
||||
sum1 = sum2 = sum3 = sum4 = 0.0;
|
||||
|
||||
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||
for (nx = -2; nx <= 2; nx++) {
|
||||
qx = unitkx*(kper+nx_pppm*nx);
|
||||
sx = exp(-0.25*square(qx/g_ewald));
|
||||
argx = 0.5*qx*xprd/nx_pppm;
|
||||
wx = powsinxx(argx,twoorder);
|
||||
qx *= qx;
|
||||
|
||||
if (sqk != 0.0) {
|
||||
for (ny = -2; ny <= 2; ny++) {
|
||||
qy = unitky*(lper+ny_pppm*ny);
|
||||
sy = exp(-0.25*square(qy/g_ewald));
|
||||
argy = 0.5*qy*yprd/ny_pppm;
|
||||
wy = powsinxx(argy,twoorder);
|
||||
qy *= qy;
|
||||
|
||||
sum1 = 0.0;
|
||||
sum2 = 0.0;
|
||||
sum3 = 0.0;
|
||||
sum4 = 0.0;
|
||||
for (nx = -2; nx <= 2; nx++) {
|
||||
qx = unitkx*(kper+nx_pppm*nx);
|
||||
sx = exp(-0.25*square(qx/g_ewald));
|
||||
argx = 0.5*qx*xprd/nx_pppm;
|
||||
wx = powsinxx(argx,twoorder);
|
||||
qx *= qx;
|
||||
for (nz = -2; nz <= 2; nz++) {
|
||||
qz = unitkz*(mper+nz_pppm*nz);
|
||||
sz = exp(-0.25*square(qz/g_ewald));
|
||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||
wz = powsinxx(argz,twoorder);
|
||||
qz *= qz;
|
||||
|
||||
for (ny = -2; ny <= 2; ny++) {
|
||||
qy = unitky*(lper+ny_pppm*ny);
|
||||
sy = exp(-0.25*square(qy/g_ewald));
|
||||
argy = 0.5*qy*yprd/ny_pppm;
|
||||
wy = powsinxx(argy,twoorder);
|
||||
qy *= qy;
|
||||
dot2 = qx+qy+qz;
|
||||
u1 = sx*sy*sz;
|
||||
u2 = wx*wy*wz;
|
||||
|
||||
for (nz = -2; nz <= 2; nz++) {
|
||||
qz = unitkz*(mper+nz_pppm*nz);
|
||||
sz = exp(-0.25*square(qz/g_ewald));
|
||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||
wz = powsinxx(argz,twoorder);
|
||||
qz *= qz;
|
||||
|
||||
dot2 = qx+qy+qz;
|
||||
u1 = sx*sy*sz;
|
||||
u2 = wx*wy*wz;
|
||||
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
|
||||
sum2 += u1 * u2 * MY_4PI;
|
||||
sum3 += u2;
|
||||
sum4 += dot2*u2;
|
||||
}
|
||||
}
|
||||
}
|
||||
sum2 *= sum2;
|
||||
qopt += sum1 - sum2/(sum3*sum4);
|
||||
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
|
||||
sum2 += u1 * u2 * MY_4PI;
|
||||
sum3 += u2;
|
||||
sum4 += dot2*u2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sum2 *= sum2;
|
||||
qopt += sum1 - sum2/(sum3*sum4);
|
||||
}
|
||||
|
||||
// sum qopt over all procs
|
||||
|
||||
double qopt_all;
|
||||
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
return qopt_all;
|
||||
|
@ -1349,18 +1326,31 @@ void PPPM::set_grid_local()
|
|||
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
|
||||
// global PPPM grid that I own without ghost cells
|
||||
// for slab PPPM, assign z grid as if it were not extended
|
||||
// both non-tiled and tiled proc layouts use 0-1 fractional sumdomain info
|
||||
|
||||
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
|
||||
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
|
||||
if (comm->layout != Comm::LAYOUT_TILED) {
|
||||
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
|
||||
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
|
||||
|
||||
nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
|
||||
nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
|
||||
nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
|
||||
nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
|
||||
|
||||
nzlo_in = static_cast<int>
|
||||
nzlo_in = static_cast<int>
|
||||
(comm->zsplit[comm->myloc[2]] * nz_pppm/slab_volfactor);
|
||||
nzhi_in = static_cast<int>
|
||||
nzhi_in = static_cast<int>
|
||||
(comm->zsplit[comm->myloc[2]+1] * nz_pppm/slab_volfactor) - 1;
|
||||
|
||||
} else {
|
||||
nxlo_in = static_cast<int> (comm->mysplit[0][0] * nx_pppm);
|
||||
nxhi_in = static_cast<int> (comm->mysplit[0][1] * nx_pppm) - 1;
|
||||
|
||||
nylo_in = static_cast<int> (comm->mysplit[1][0] * ny_pppm);
|
||||
nyhi_in = static_cast<int> (comm->mysplit[1][1] * ny_pppm) - 1;
|
||||
|
||||
nzlo_in = static_cast<int> (comm->mysplit[2][0] * nz_pppm/slab_volfactor);
|
||||
nzhi_in = static_cast<int> (comm->mysplit[2][1] * nz_pppm/slab_volfactor) - 1;
|
||||
}
|
||||
|
||||
// nlower,nupper = stencil size for mapping particles to PPPM grid
|
||||
|
||||
nlower = -(order-1)/2;
|
||||
|
@ -1446,22 +1436,26 @@ void PPPM::set_grid_local()
|
|||
// -z proc, but not vice versa
|
||||
// this is accomplished by nzhi_in = nzhi_out on +z end (no ghost cells)
|
||||
// also insure no other procs use ghost cells beyond +z limit
|
||||
// differnet logic for non-tiled vs tiled decomposition
|
||||
|
||||
if (slabflag == 1) {
|
||||
if (comm->myloc[2] == comm->procgrid[2]-1)
|
||||
nzhi_in = nzhi_out = nz_pppm - 1;
|
||||
if (comm->layout != Comm::LAYOUT_TILED) {
|
||||
if (comm->myloc[2] == comm->procgrid[2]-1) nzhi_in = nzhi_out = nz_pppm - 1;
|
||||
} else {
|
||||
if (comm->mysplit[2][1] == 1.0) nzhi_in = nzhi_out = nz_pppm - 1;
|
||||
}
|
||||
nzhi_out = MIN(nzhi_out,nz_pppm-1);
|
||||
}
|
||||
|
||||
// decomposition of FFT mesh
|
||||
// x-pencil decomposition of FFT mesh
|
||||
// global indices range from 0 to N-1
|
||||
// proc owns entire x-dimension, clumps of columns in y,z dimensions
|
||||
// each proc owns entire x-dimension, clumps of columns in y,z dimensions
|
||||
// npey_fft,npez_fft = # of procs in y,z dims
|
||||
// if nprocs is small enough, proc can own 1 or more entire xy planes,
|
||||
// else proc owns 2d sub-blocks of yz plane
|
||||
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
|
||||
// nlo_fft,nhi_fft = lower/upper limit of the section
|
||||
// of the global FFT mesh that I own
|
||||
// of the global FFT mesh that I own in x-pencil decomposition
|
||||
|
||||
int npey_fft,npez_fft;
|
||||
if (nz_pppm >= nprocs) {
|
||||
|
@ -1479,13 +1473,13 @@ void PPPM::set_grid_local()
|
|||
nzlo_fft = me_z*nz_pppm/npez_fft;
|
||||
nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
|
||||
|
||||
// PPPM grid pts owned by this proc, including ghosts
|
||||
// ngrid = count of PPPM grid pts owned by this proc, including ghosts
|
||||
|
||||
ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
|
||||
// FFT grids owned by this proc, without ghosts
|
||||
// nfft = FFT points in FFT decomposition on this proc
|
||||
// count of FFT grids pts owned by this proc, without ghosts
|
||||
// nfft = FFT points in x-pencil FFT decomposition on this proc
|
||||
// nfft_brick = FFT points in 3d brick-decomposition on this proc
|
||||
// nfft_both = greater of 2 values
|
||||
|
||||
|
@ -2637,8 +2631,10 @@ void PPPM::fieldforce_peratom()
|
|||
pack own values to buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (flag == FORWARD_IK) {
|
||||
|
@ -2695,8 +2691,10 @@ void PPPM::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
unpack another proc's own values from buf and set own ghost values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (flag == FORWARD_IK) {
|
||||
|
@ -2753,8 +2751,10 @@ void PPPM::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
pack ghost values into buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
if (flag == REVERSE_RHO) {
|
||||
FFT_SCALAR *src = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
|
@ -2766,8 +2766,10 @@ void PPPM::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
unpack another proc's ghost values from buf and add to own values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPM::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
if (flag == REVERSE_RHO) {
|
||||
FFT_SCALAR *dest = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (int i = 0; i < nlist; i++)
|
||||
|
@ -3056,6 +3058,7 @@ int PPPM::timing_3d(int n, double &time3d)
|
|||
double PPPM::memory_usage()
|
||||
{
|
||||
double bytes = nmax*3 * sizeof(double);
|
||||
|
||||
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
if (differentiation_flag == 1) {
|
||||
|
@ -3063,6 +3066,7 @@ double PPPM::memory_usage()
|
|||
} else {
|
||||
bytes += 4 * nbrick * sizeof(FFT_SCALAR);
|
||||
}
|
||||
|
||||
if (triclinic) bytes += 3 * nfft_both * sizeof(double);
|
||||
bytes += 6 * nfft_both * sizeof(double);
|
||||
bytes += nfft_both * sizeof(double);
|
||||
|
@ -3076,7 +3080,9 @@ double PPPM::memory_usage()
|
|||
bytes += 2 * nfft_both * sizeof(FFT_SCALAR);;
|
||||
}
|
||||
|
||||
if (cg) bytes += cg->memory_usage();
|
||||
// two GridComm bufs
|
||||
|
||||
bytes += (ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
@ -3134,7 +3140,8 @@ void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
|
|||
density_brick = density_A_brick;
|
||||
density_fft = density_A_fft;
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// group B
|
||||
|
@ -3142,7 +3149,8 @@ void PPPM::compute_group_group(int groupbit_A, int groupbit_B, int AA_flag)
|
|||
density_brick = density_B_brick;
|
||||
density_fft = density_B_fft;
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// switch back pointers
|
||||
|
@ -3436,7 +3444,7 @@ void PPPM::poisson_groups_triclinic()
|
|||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Slab-geometry correction term to dampen inter-slab interactions between
|
||||
slab-geometry correction term to dampen inter-slab interactions between
|
||||
periodically repeating slabs. Yields good approximation to 2D Ewald if
|
||||
adequate empty space is left between repeating slabs (J. Chem. Phys.
|
||||
111, 3155). Slabs defined here to be parallel to the xy plane. Also
|
||||
|
|
|
@ -96,17 +96,21 @@ class PPPM : public KSpace {
|
|||
double sf_coeff[6]; // coefficients for calculating ad self-forces
|
||||
double **acons;
|
||||
|
||||
// FFTs and grid communication
|
||||
|
||||
class FFT3d *fft1,*fft2;
|
||||
class Remap *remap;
|
||||
class GridComm *gc;
|
||||
|
||||
FFT_SCALAR *gc_buf1,*gc_buf2;
|
||||
int ngc_buf1,ngc_buf2,npergrid;
|
||||
|
||||
// group-group interactions
|
||||
|
||||
int group_allocate_flag;
|
||||
FFT_SCALAR ***density_A_brick,***density_B_brick;
|
||||
FFT_SCALAR *density_A_fft,*density_B_fft;
|
||||
|
||||
class FFT3d *fft1,*fft2;
|
||||
class Remap *remap;
|
||||
class GridComm *cg;
|
||||
class GridComm *cg_peratom;
|
||||
|
||||
int **part2grid; // storage for particle -> grid mapping
|
||||
int nmax;
|
||||
|
||||
|
@ -160,10 +164,10 @@ class PPPM : public KSpace {
|
|||
|
||||
// grid communication
|
||||
|
||||
virtual void pack_forward(int, FFT_SCALAR *, int, int *);
|
||||
virtual void unpack_forward(int, FFT_SCALAR *, int, int *);
|
||||
virtual void pack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
virtual void unpack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
virtual void pack_forward_grid(int, void *, int, int *);
|
||||
virtual void unpack_forward_grid(int, void *, int, int *);
|
||||
virtual void pack_reverse_grid(int, void *, int, int *);
|
||||
virtual void unpack_reverse_grid(int, void *, int, int *);
|
||||
|
||||
// triclinic
|
||||
|
||||
|
|
|
@ -90,11 +90,7 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// if atom count has changed, update qsum and qsqsum
|
||||
|
||||
|
@ -162,6 +158,7 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||
}
|
||||
|
||||
// only need to rebuild this list after a neighbor list update
|
||||
|
||||
if (neighbor->ago == 0) {
|
||||
num_charged = 0;
|
||||
for (int i = 0; i < atom->nlocal; ++i) {
|
||||
|
@ -182,7 +179,8 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -195,16 +193,22 @@ void PPPMCG::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
|
||||
else cg->forward_comm(this,FORWARD_IK);
|
||||
if (differentiation_flag == 1)
|
||||
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else
|
||||
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
if (differentiation_flag == 1 && vflag_atom)
|
||||
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else if (differentiation_flag == 0)
|
||||
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
// calculate the force on my particles
|
||||
|
|
|
@ -79,8 +79,7 @@ PPPMDipole::PPPMDipole(LAMMPS *lmp) : PPPM(lmp),
|
|||
dipoleflag = 1;
|
||||
group_group_enable = 0;
|
||||
|
||||
cg_dipole = NULL;
|
||||
cg_peratom_dipole = NULL;
|
||||
gc_dipole = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -93,10 +92,6 @@ PPPMDipole::~PPPMDipole()
|
|||
|
||||
deallocate();
|
||||
if (peratom_allocate_flag) deallocate_peratom();
|
||||
fft1 = NULL;
|
||||
fft2 = NULL;
|
||||
remap = NULL;
|
||||
cg_dipole = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -195,9 +190,7 @@ void PPPMDipole::init()
|
|||
// or overlap is allowed, then done
|
||||
// else reduce order and try again
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
|
||||
GridComm *cgtmp = NULL;
|
||||
GridComm *gctmp = NULL;
|
||||
int iteration = 0;
|
||||
|
||||
while (order >= minorder) {
|
||||
|
@ -210,24 +203,24 @@ void PPPMDipole::init()
|
|||
set_grid_local();
|
||||
if (overlap_allowed) break;
|
||||
|
||||
cgtmp = new GridComm(lmp,world,1,1,
|
||||
gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
cgtmp->ghost_notify();
|
||||
if (!cgtmp->ghost_overlap()) break;
|
||||
delete cgtmp;
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
int tmp1,tmp2;
|
||||
gctmp->setup(tmp1,tmp2);
|
||||
if (gctmp->ghost_adjacent()) break;
|
||||
delete gctmp;
|
||||
|
||||
order--;
|
||||
iteration++;
|
||||
}
|
||||
|
||||
if (order < minorder) error->all(FLERR,"PPPMDipole order < minimum allowed order");
|
||||
if (!overlap_allowed && cgtmp->ghost_overlap())
|
||||
if (!overlap_allowed && !gctmp->ghost_adjacent())
|
||||
error->all(FLERR,"PPPMDipole grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
if (cgtmp) delete cgtmp;
|
||||
if (gctmp) delete gctmp;
|
||||
|
||||
// adjust g_ewald
|
||||
|
||||
|
@ -261,8 +254,6 @@ void PPPMDipole::init()
|
|||
// don't invoke allocate peratom(), will be allocated when needed
|
||||
|
||||
allocate();
|
||||
cg_dipole->ghost_notify();
|
||||
cg_dipole->setup();
|
||||
|
||||
// pre-compute Green's function denomiator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -385,11 +376,9 @@ void PPPMDipole::setup_grid()
|
|||
|
||||
allocate();
|
||||
|
||||
cg_dipole->ghost_notify();
|
||||
if (overlap_allowed == 0 && cg_dipole->ghost_overlap())
|
||||
if (!overlap_allowed && !gc_dipole->ghost_adjacent())
|
||||
error->all(FLERR,"PPPMDipole grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
cg_dipole->setup();
|
||||
|
||||
// pre-compute Green's function denomiator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -421,11 +410,7 @@ void PPPMDipole::compute(int eflag, int vflag)
|
|||
error->all(FLERR,"Cannot (yet) compute per-atom virial "
|
||||
"with kspace style pppm/dipole");
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom_dipole->ghost_notify();
|
||||
cg_peratom_dipole->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// if atom count has changed, update qsum and qsqsum
|
||||
|
||||
|
@ -460,7 +445,8 @@ void PPPMDipole::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg_dipole->reverse_comm(this,REVERSE_MU);
|
||||
gc_dipole->reverse_comm_kspace(this,3,sizeof(FFT_SCALAR),REVERSE_MU,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft_dipole();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -473,13 +459,14 @@ void PPPMDipole::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
cg_dipole->forward_comm(this,FORWARD_MU);
|
||||
gc_dipole->forward_comm_kspace(this,9,sizeof(FFT_SCALAR),FORWARD_MU,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
cg_peratom_dipole->forward_comm(this,FORWARD_MU_PERATOM);
|
||||
}
|
||||
if (evflag_atom)
|
||||
gc->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_MU_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// calculate the force on my particles
|
||||
|
||||
|
@ -522,7 +509,8 @@ void PPPMDipole::compute(int eflag, int vflag)
|
|||
if (eflag_atom) {
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
eatom[i] *= 0.5;
|
||||
eatom[i] -= (mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] + mu[i][2]*mu[i][2])*2.0*g3/3.0/MY_PIS;
|
||||
eatom[i] -= (mu[i][0]*mu[i][0] + mu[i][1]*mu[i][1] +
|
||||
mu[i][2]*mu[i][2])*2.0*g3/3.0/MY_PIS;
|
||||
eatom[i] *= qscale;
|
||||
}
|
||||
}
|
||||
|
@ -619,14 +607,18 @@ void PPPMDipole::allocate()
|
|||
1,0,0,FFT_PRECISION,collective_flag);
|
||||
|
||||
// create ghost grid object for rho and electric field communication
|
||||
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
|
||||
cg_dipole = new GridComm(lmp,world,9,3,
|
||||
gc_dipole = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
gc->setup(ngc_buf1,ngc_buf2);
|
||||
|
||||
npergrid = 9;
|
||||
|
||||
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
|
||||
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -674,7 +666,9 @@ void PPPMDipole::deallocate()
|
|||
delete fft1;
|
||||
delete fft2;
|
||||
delete remap;
|
||||
delete cg_dipole;
|
||||
delete gc_dipole;
|
||||
memory->destroy(gc_buf1);
|
||||
memory->destroy(gc_buf2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -724,16 +718,15 @@ void PPPMDipole::allocate_peratom()
|
|||
memory->create3d_offset(v5z_brick_dipole,nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"pppm_dipole:v5z_brick_dipole");
|
||||
|
||||
// create ghost grid object for rho and electric field communication
|
||||
// use same GC ghost grid object for peratom grid communication
|
||||
// but need to reallocate a larger gc_buf1 and gc_buf2
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
npergrid = 18;
|
||||
|
||||
cg_peratom_dipole =
|
||||
new GridComm(lmp,world,18,1,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
memory->destroy(gc_buf1);
|
||||
memory->destroy(gc_buf2);
|
||||
memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
|
||||
memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -764,8 +757,6 @@ void PPPMDipole::deallocate_peratom()
|
|||
memory->destroy3d_offset(v3z_brick_dipole,nzlo_out,nylo_out,nxlo_out);
|
||||
memory->destroy3d_offset(v4z_brick_dipole,nzlo_out,nylo_out,nxlo_out);
|
||||
memory->destroy3d_offset(v5z_brick_dipole,nzlo_out,nylo_out,nxlo_out);
|
||||
|
||||
delete cg_peratom_dipole;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -2171,8 +2162,10 @@ void PPPMDipole::fieldforce_peratom_dipole()
|
|||
pack own values to buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDipole::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMDipole::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (flag == FORWARD_MU) {
|
||||
|
@ -2242,8 +2235,10 @@ void PPPMDipole::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
unpack another proc's own values from buf and set own ghost values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDipole::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMDipole::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
|
||||
if (flag == FORWARD_MU) {
|
||||
|
@ -2313,8 +2308,10 @@ void PPPMDipole::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
pack ghost values into buf to send to another proc
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDipole::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMDipole::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
if (flag == REVERSE_MU) {
|
||||
FFT_SCALAR *src_dipole0 = &densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out];
|
||||
|
@ -2332,8 +2329,10 @@ void PPPMDipole::pack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
|||
unpack another proc's ghost values from buf and add to own values
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDipole::unpack_reverse(int flag, FFT_SCALAR *buf, int nlist, int *list)
|
||||
void PPPMDipole::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
|
||||
{
|
||||
FFT_SCALAR *buf = (FFT_SCALAR *) vbuf;
|
||||
|
||||
int n = 0;
|
||||
if (flag == REVERSE_MU) {
|
||||
FFT_SCALAR *dest_dipole0 = &densityx_brick_dipole[nzlo_out][nylo_out][nxlo_out];
|
||||
|
@ -2484,6 +2483,7 @@ int PPPMDipole::timing_3d(int n, double &time3d)
|
|||
double PPPMDipole::memory_usage()
|
||||
{
|
||||
double bytes = nmax*3 * sizeof(double);
|
||||
|
||||
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
bytes += 6 * nfft_both * sizeof(double); // vg
|
||||
|
@ -2495,8 +2495,9 @@ double PPPMDipole::memory_usage()
|
|||
if (peratom_allocate_flag)
|
||||
bytes += 21 * nbrick * sizeof(FFT_SCALAR);
|
||||
|
||||
if (cg_dipole) bytes += cg_dipole->memory_usage();
|
||||
if (cg_peratom_dipole) bytes += cg_peratom_dipole->memory_usage();
|
||||
// two GridComm bufs
|
||||
|
||||
bytes += (ngc_buf1 + ngc_buf2) * npergrid * sizeof(FFT_SCALAR);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -50,10 +50,10 @@ class PPPMDipole : public PPPM {
|
|||
|
||||
// grid communication
|
||||
|
||||
void pack_forward(int, FFT_SCALAR *, int, int *);
|
||||
void unpack_forward(int, FFT_SCALAR *, int, int *);
|
||||
void pack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
void unpack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
void pack_forward_grid(int, void *, int, int *);
|
||||
void unpack_forward_grid(int, void *, int, int *);
|
||||
void pack_reverse_grid(int, void *, int, int *);
|
||||
void unpack_reverse_grid(int, void *, int, int *);
|
||||
|
||||
// dipole
|
||||
|
||||
|
@ -69,10 +69,12 @@ class PPPMDipole : public PPPM {
|
|||
FFT_SCALAR ***v3z_brick_dipole,***v4z_brick_dipole,***v5z_brick_dipole;
|
||||
FFT_SCALAR *work3,*work4;
|
||||
FFT_SCALAR *densityx_fft_dipole,*densityy_fft_dipole,*densityz_fft_dipole;
|
||||
class GridComm *cg_dipole;
|
||||
class GridComm *cg_peratom_dipole;
|
||||
|
||||
class GridComm *gc_dipole;
|
||||
|
||||
int only_dipole_flag;
|
||||
double musum,musqsum,mu2;
|
||||
|
||||
double find_gewald_dipole(double, double, bigint, double, double);
|
||||
double newton_raphson_f_dipole(double, double, bigint, double, double);
|
||||
double derivf_dipole(double, double, bigint, double, double);
|
||||
|
@ -87,7 +89,6 @@ class PPPMDipole : public PPPM {
|
|||
void fieldforce_peratom_dipole();
|
||||
double final_accuracy_dipole();
|
||||
void musum_musq();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ PPPMDipoleSpin::~PPPMDipoleSpin()
|
|||
fft1 = NULL;
|
||||
fft2 = NULL;
|
||||
remap = NULL;
|
||||
cg_dipole = NULL;
|
||||
gc_dipole = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -175,9 +175,7 @@ void PPPMDipoleSpin::init()
|
|||
// or overlap is allowed, then done
|
||||
// else reduce order and try again
|
||||
|
||||
int (*procneigh)[2] = comm->procneigh;
|
||||
|
||||
GridComm *cgtmp = NULL;
|
||||
GridComm *gctmp = NULL;
|
||||
int iteration = 0;
|
||||
|
||||
while (order >= minorder) {
|
||||
|
@ -190,24 +188,24 @@ void PPPMDipoleSpin::init()
|
|||
set_grid_local();
|
||||
if (overlap_allowed) break;
|
||||
|
||||
cgtmp = new GridComm(lmp,world,1,1,
|
||||
gctmp = new GridComm(lmp,world,nx_pppm,ny_pppm,nz_pppm,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out,
|
||||
procneigh[0][0],procneigh[0][1],procneigh[1][0],
|
||||
procneigh[1][1],procneigh[2][0],procneigh[2][1]);
|
||||
cgtmp->ghost_notify();
|
||||
if (!cgtmp->ghost_overlap()) break;
|
||||
delete cgtmp;
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
int tmp1,tmp2;
|
||||
gctmp->setup(tmp1,tmp2);
|
||||
if (gctmp->ghost_adjacent()) break;
|
||||
delete gctmp;
|
||||
|
||||
order--;
|
||||
iteration++;
|
||||
}
|
||||
|
||||
if (order < minorder) error->all(FLERR,"PPPMDipoleSpin order < minimum allowed order");
|
||||
if (!overlap_allowed && cgtmp->ghost_overlap())
|
||||
if (!overlap_allowed && !gctmp->ghost_adjacent())
|
||||
error->all(FLERR,"PPPMDipoleSpin grid stencil extends "
|
||||
"beyond nearest neighbor processor");
|
||||
if (cgtmp) delete cgtmp;
|
||||
if (gctmp) delete gctmp;
|
||||
|
||||
// adjust g_ewald
|
||||
|
||||
|
@ -241,8 +239,6 @@ void PPPMDipoleSpin::init()
|
|||
// don't invoke allocate peratom(), will be allocated when needed
|
||||
|
||||
allocate();
|
||||
cg_dipole->ghost_notify();
|
||||
cg_dipole->setup();
|
||||
|
||||
// pre-compute Green's function denominator expansion
|
||||
// pre-compute 1d charge distribution coefficients
|
||||
|
@ -270,11 +266,7 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
|
|||
error->all(FLERR,"Cannot (yet) compute per-atom virial "
|
||||
"with kspace style pppm/dipole/spin");
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom_dipole->ghost_notify();
|
||||
cg_peratom_dipole->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// if atom count has changed, update qsum and qsqsum
|
||||
|
||||
|
@ -309,7 +301,8 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg_dipole->reverse_comm(this,REVERSE_MU);
|
||||
gc_dipole->reverse_comm_kspace(this,3,sizeof(FFT_SCALAR),REVERSE_MU,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft_dipole();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -322,13 +315,14 @@ void PPPMDipoleSpin::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
cg_dipole->forward_comm(this,FORWARD_MU);
|
||||
gc_dipole->forward_comm_kspace(this,9,sizeof(FFT_SCALAR),FORWARD_MU,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
cg_peratom_dipole->forward_comm(this,FORWARD_MU_PERATOM);
|
||||
}
|
||||
if (evflag_atom)
|
||||
gc->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_MU_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// calculate the force on my particles
|
||||
|
||||
|
|
|
@ -46,7 +46,6 @@ class PPPMDipoleSpin : public PPPMDipole {
|
|||
void fieldforce_ik_spin();
|
||||
void fieldforce_peratom_spin();
|
||||
void spsum_spsq();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -42,7 +42,6 @@ typedef double FFT_SCALAR;
|
|||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
|
||||
#define EWALD_MAXORDER 6
|
||||
#define EWALD_FUNCS 4
|
||||
|
||||
|
@ -190,15 +189,14 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
FFT_SCALAR *work1,*work2;
|
||||
FFT_SCALAR *work1_6, *work2_6;
|
||||
|
||||
|
||||
class FFT3d *fft1,*fft2 ;
|
||||
class FFT3d *fft1_6, *fft2_6;
|
||||
class Remap *remap;
|
||||
class Remap *remap_6;
|
||||
class GridComm *cg;
|
||||
class GridComm *cg_peratom;
|
||||
class GridComm *cg_6;
|
||||
class GridComm *cg_peratom_6;
|
||||
class FFT3d *fft1_6,*fft2_6;
|
||||
class Remap *remap,*remap_6;
|
||||
class GridComm *gc,*gc6;
|
||||
|
||||
FFT_SCALAR *gc_buf1,*gc_buf2,*gc6_buf1,*gc6_buf2;
|
||||
int ngc_buf1,ngc_buf2,npergrid;
|
||||
int ngc6_buf1,ngc6_buf2,npergrid6;
|
||||
|
||||
int **part2grid; // storage for particle -> grid mapping
|
||||
int **part2grid_6;
|
||||
|
@ -257,7 +255,6 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
void compute_gf_denom(double*, int);
|
||||
double gf_denom(double, double, double, double*, int);
|
||||
|
||||
|
||||
void compute_sf_precoeff(int, int, int, int,
|
||||
int, int, int,
|
||||
int, int, int,
|
||||
|
@ -268,7 +265,6 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
void compute_gf_6();
|
||||
void compute_sf_coeff_6();
|
||||
|
||||
|
||||
virtual void particle_map(double, double, double,
|
||||
double, int **, int, int,
|
||||
int, int, int,
|
||||
|
@ -295,8 +291,10 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
int, int, int, double&, double *,
|
||||
double *, double *, double *,
|
||||
double *, double *, double *,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, double *, double **, double **,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, double *, double **, double **,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
|
||||
|
||||
virtual void poisson_ad(FFT_SCALAR*, FFT_SCALAR*,
|
||||
|
@ -317,14 +315,18 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
virtual void poisson_2s_ik(FFT_SCALAR *, FFT_SCALAR *,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
|
||||
virtual void poisson_2s_ad(FFT_SCALAR *, FFT_SCALAR *,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***);
|
||||
|
||||
virtual void poisson_2s_peratom(FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
|
||||
|
@ -339,9 +341,11 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
virtual void poisson_none_ik(int, int, FFT_SCALAR *, FFT_SCALAR *,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ***, FFT_SCALAR ***, FFT_SCALAR ***,
|
||||
FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****,
|
||||
FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****,
|
||||
FFT_SCALAR ****,
|
||||
FFT_SCALAR ****, FFT_SCALAR ****, FFT_SCALAR ****);
|
||||
virtual void poisson_none_peratom(int, int, FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
|
||||
virtual void poisson_none_peratom(int, int,
|
||||
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
|
||||
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
|
||||
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***,
|
||||
FFT_SCALAR***, FFT_SCALAR***, FFT_SCALAR***);
|
||||
|
@ -369,10 +373,10 @@ Variables needed for calculating the 1/r and 1/r^6 potential
|
|||
|
||||
// grid communication
|
||||
|
||||
void pack_forward(int, FFT_SCALAR *, int, int *);
|
||||
void unpack_forward(int, FFT_SCALAR *, int, int *);
|
||||
void pack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
void unpack_reverse(int, FFT_SCALAR *, int, int *);
|
||||
void pack_forward_grid(int, void *, int, int *);
|
||||
void unpack_forward_grid(int, void *, int, int *);
|
||||
void pack_reverse_grid(int, void *, int, int *);
|
||||
void unpack_reverse_grid(int, void *, int, int *);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -123,11 +123,7 @@ void PPPMStagger::compute(int eflag, int vflag)
|
|||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// convert atoms from box to lamda coords
|
||||
|
||||
|
@ -160,7 +156,8 @@ void PPPMStagger::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -173,16 +170,22 @@ void PPPMStagger::compute(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
|
||||
else cg->forward_comm(this,FORWARD_IK);
|
||||
if (differentiation_flag == 1)
|
||||
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else
|
||||
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
if (differentiation_flag == 1 && vflag_atom)
|
||||
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else if (differentiation_flag == 0)
|
||||
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
// calculate the force on my particles
|
||||
|
@ -268,10 +271,16 @@ void PPPMStagger::compute(int eflag, int vflag)
|
|||
|
||||
double PPPMStagger::compute_qopt()
|
||||
{
|
||||
if (differentiation_flag == 1)
|
||||
return compute_qopt_ad();
|
||||
if (differentiation_flag == 1) return compute_qopt_ad();
|
||||
|
||||
int k,l,m,nx,ny,nz;
|
||||
double snx,sny,snz;
|
||||
double cnx,cny,cnz;
|
||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||
double sum1,sum2,dot1,dot2;
|
||||
double numerator,denominator;
|
||||
double u1,u2,u3,sqk;
|
||||
|
||||
double qopt = 0.0;
|
||||
const double * const prd = domain->prd;
|
||||
|
||||
const double xprd = prd[0];
|
||||
|
@ -282,77 +291,76 @@ double PPPMStagger::compute_qopt()
|
|||
const double unitky = (MY_2PI/yprd);
|
||||
const double unitkz = (MY_2PI/zprd_slab);
|
||||
|
||||
double snx,sny,snz;
|
||||
double cnx,cny,cnz;
|
||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||
double sum1,sum2,dot1,dot2;
|
||||
double numerator,denominator;
|
||||
double u1,u2,u3,sqk;
|
||||
|
||||
int k,l,m,nx,ny,nz,kper,lper,mper;
|
||||
|
||||
const int nbx = 2;
|
||||
const int nby = 2;
|
||||
const int nbz = 2;
|
||||
|
||||
const int twoorder = 2*order;
|
||||
|
||||
for (m = nzlo_fft; m <= nzhi_fft; m++) {
|
||||
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||
// loop over entire FFT grid
|
||||
// each proc calculates contributions from every Pth grid point
|
||||
|
||||
bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
|
||||
int nxy_pppm = nx_pppm * ny_pppm;
|
||||
|
||||
double qopt = 0.0;
|
||||
|
||||
for (bigint i = me; i < ngridtotal; i += nprocs) {
|
||||
k = i % nx_pppm;
|
||||
l = (i/nx_pppm) % ny_pppm;
|
||||
m = i / nxy_pppm;
|
||||
|
||||
const int kper = k - nx_pppm*(2*k/nx_pppm);
|
||||
const int lper = l - ny_pppm*(2*l/ny_pppm);
|
||||
const int mper = m - nz_pppm*(2*m/nz_pppm);
|
||||
|
||||
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||
if (sqk == 0.0) continue;
|
||||
|
||||
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
|
||||
cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
|
||||
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
|
||||
cny = cos(0.5*unitky*lper*yprd/ny_pppm);
|
||||
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
|
||||
cnz = cos(0.5*unitkz*mper*zprd_slab/nz_pppm);
|
||||
|
||||
for (l = nylo_fft; l <= nyhi_fft; l++) {
|
||||
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
|
||||
cny = cos(0.5*unitky*lper*yprd/ny_pppm);
|
||||
numerator = MY_4PI/sqk;
|
||||
denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
|
||||
|
||||
for (k = nxlo_fft; k <= nxhi_fft; k++) {
|
||||
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
|
||||
cnx = cos(0.5*unitkx*kper*xprd/nx_pppm);
|
||||
sum1 = sum2 = 0.0;
|
||||
|
||||
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||
for (nx = -nbx; nx <= nbx; nx++) {
|
||||
qx = unitkx*(kper+nx_pppm*nx);
|
||||
sx = exp(-0.25*square(qx/g_ewald));
|
||||
argx = 0.5*qx*xprd/nx_pppm;
|
||||
wx = powsinxx(argx,twoorder);
|
||||
|
||||
if (sqk != 0.0) {
|
||||
numerator = MY_4PI/sqk;
|
||||
denominator = 0.5*(gf_denom(snx,sny,snz) + gf_denom2(cnx,cny,cnz));
|
||||
sum1 = 0.0;
|
||||
sum2 = 0.0;
|
||||
for (ny = -nby; ny <= nby; ny++) {
|
||||
qy = unitky*(lper+ny_pppm*ny);
|
||||
sy = exp(-0.25*square(qy/g_ewald));
|
||||
argy = 0.5*qy*yprd/ny_pppm;
|
||||
wy = powsinxx(argy,twoorder);
|
||||
|
||||
for (nx = -nbx; nx <= nbx; nx++) {
|
||||
qx = unitkx*(kper+nx_pppm*nx);
|
||||
sx = exp(-0.25*square(qx/g_ewald));
|
||||
argx = 0.5*qx*xprd/nx_pppm;
|
||||
wx = powsinxx(argx,twoorder);
|
||||
for (nz = -nbz; nz <= nbz; nz++) {
|
||||
qz = unitkz*(mper+nz_pppm*nz);
|
||||
sz = exp(-0.25*square(qz/g_ewald));
|
||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||
wz = powsinxx(argz,twoorder);
|
||||
|
||||
for (ny = -nby; ny <= nby; ny++) {
|
||||
qy = unitky*(lper+ny_pppm*ny);
|
||||
sy = exp(-0.25*square(qy/g_ewald));
|
||||
argy = 0.5*qy*yprd/ny_pppm;
|
||||
wy = powsinxx(argy,twoorder);
|
||||
|
||||
for (nz = -nbz; nz <= nbz; nz++) {
|
||||
qz = unitkz*(mper+nz_pppm*nz);
|
||||
sz = exp(-0.25*square(qz/g_ewald));
|
||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||
wz = powsinxx(argz,twoorder);
|
||||
|
||||
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
|
||||
dot2 = qx*qx + qy*qy + qz*qz;
|
||||
u1 = sx*sy*sz;
|
||||
u2 = wx*wy*wz;
|
||||
u3 = numerator*u1*u2*dot1;
|
||||
sum1 += u1*u1*MY_4PI*MY_4PI/dot2;
|
||||
sum2 += u3*u3/dot2;
|
||||
}
|
||||
}
|
||||
}
|
||||
qopt += sum1 - sum2/denominator;
|
||||
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
|
||||
dot2 = qx*qx + qy*qy + qz*qz;
|
||||
u1 = sx*sy*sz;
|
||||
u2 = wx*wy*wz;
|
||||
u3 = numerator*u1*u2*dot1;
|
||||
sum1 += u1*u1*MY_4PI*MY_4PI/dot2;
|
||||
sum2 += u3*u3/dot2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qopt += sum1 - sum2/denominator;
|
||||
}
|
||||
|
||||
double qopt_all;
|
||||
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
return qopt_all;
|
||||
|
@ -364,7 +372,11 @@ double PPPMStagger::compute_qopt()
|
|||
|
||||
double PPPMStagger::compute_qopt_ad()
|
||||
{
|
||||
double qopt = 0.0;
|
||||
int k,l,m,nx,ny,nz;
|
||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||
double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
|
||||
double u1,u2,sqk;
|
||||
|
||||
const double * const prd = domain->prd;
|
||||
|
||||
const double xprd = prd[0];
|
||||
|
@ -375,72 +387,68 @@ double PPPMStagger::compute_qopt_ad()
|
|||
const double unitky = (MY_2PI/yprd);
|
||||
const double unitkz = (MY_2PI/zprd_slab);
|
||||
|
||||
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
|
||||
double sum1,sum2,sum3,sum4,sum5,sum6,dot2;
|
||||
double u1,u2,sqk;
|
||||
|
||||
int k,l,m,nx,ny,nz,kper,lper,mper;
|
||||
|
||||
const int nbx = 2;
|
||||
const int nby = 2;
|
||||
const int nbz = 2;
|
||||
|
||||
const int twoorder = 2*order;
|
||||
|
||||
for (m = nzlo_fft; m <= nzhi_fft; m++) {
|
||||
mper = m - nz_pppm*(2*m/nz_pppm);
|
||||
// loop over entire FFT grid
|
||||
// each proc calculates contributions from every Pth grid point
|
||||
|
||||
for (l = nylo_fft; l <= nyhi_fft; l++) {
|
||||
lper = l - ny_pppm*(2*l/ny_pppm);
|
||||
bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
|
||||
int nxy_pppm = nx_pppm * ny_pppm;
|
||||
|
||||
for (k = nxlo_fft; k <= nxhi_fft; k++) {
|
||||
kper = k - nx_pppm*(2*k/nx_pppm);
|
||||
double qopt = 0.0;
|
||||
|
||||
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||
for (bigint i = me; i < ngridtotal; i += nprocs) {
|
||||
k = i % nx_pppm;
|
||||
l = (i/nx_pppm) % ny_pppm;
|
||||
m = i / nxy_pppm;
|
||||
|
||||
if (sqk != 0.0) {
|
||||
sum1 = 0.0;
|
||||
sum2 = 0.0;
|
||||
sum3 = 0.0;
|
||||
sum4 = 0.0;
|
||||
sum5 = 0.0;
|
||||
sum6 = 0.0;
|
||||
const int kper = k - nx_pppm*(2*k/nx_pppm);
|
||||
const int lper = l - ny_pppm*(2*l/ny_pppm);
|
||||
const int mper = m - nz_pppm*(2*m/nz_pppm);
|
||||
|
||||
for (nx = -nbx; nx <= nbx; nx++) {
|
||||
qx = unitkx*(kper+nx_pppm*nx);
|
||||
sx = exp(-0.25*square(qx/g_ewald));
|
||||
argx = 0.5*qx*xprd/nx_pppm;
|
||||
wx = powsinxx(argx,twoorder);
|
||||
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
|
||||
if (sqk == 0.0) continue;
|
||||
|
||||
for (ny = -nby; ny <= nby; ny++) {
|
||||
qy = unitky*(lper+ny_pppm*ny);
|
||||
sy = exp(-0.25*square(qy/g_ewald));
|
||||
argy = 0.5*qy*yprd/ny_pppm;
|
||||
wy = powsinxx(argy,twoorder);
|
||||
sum1 = sum2 = sum3 = sum4 = sum5 = sum6 = 0.0;
|
||||
|
||||
for (nz = -nbz; nz <= nbz; nz++) {
|
||||
qz = unitkz*(mper+nz_pppm*nz);
|
||||
sz = exp(-0.25*square(qz/g_ewald));
|
||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||
wz = powsinxx(argz,twoorder);
|
||||
for (nx = -nbx; nx <= nbx; nx++) {
|
||||
qx = unitkx*(kper+nx_pppm*nx);
|
||||
sx = exp(-0.25*square(qx/g_ewald));
|
||||
argx = 0.5*qx*xprd/nx_pppm;
|
||||
wx = powsinxx(argx,twoorder);
|
||||
|
||||
dot2 = qx*qx + qy*qy + qz*qz;
|
||||
u1 = sx*sy*sz;
|
||||
u2 = wx*wy*wz;
|
||||
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
|
||||
sum2 += u1*u1*u2*u2*MY_4PI*MY_4PI;
|
||||
sum3 += u2;
|
||||
sum4 += dot2*u2;
|
||||
sum5 += u2*powint(-1.0,nx+ny+nz);
|
||||
sum6 += dot2*u2*powint(-1.0,nx+ny+nz);
|
||||
}
|
||||
}
|
||||
}
|
||||
qopt += sum1 - sum2/(0.5*(sum3*sum4 + sum5*sum6));
|
||||
for (ny = -nby; ny <= nby; ny++) {
|
||||
qy = unitky*(lper+ny_pppm*ny);
|
||||
sy = exp(-0.25*square(qy/g_ewald));
|
||||
argy = 0.5*qy*yprd/ny_pppm;
|
||||
wy = powsinxx(argy,twoorder);
|
||||
|
||||
for (nz = -nbz; nz <= nbz; nz++) {
|
||||
qz = unitkz*(mper+nz_pppm*nz);
|
||||
sz = exp(-0.25*square(qz/g_ewald));
|
||||
argz = 0.5*qz*zprd_slab/nz_pppm;
|
||||
wz = powsinxx(argz,twoorder);
|
||||
|
||||
dot2 = qx*qx + qy*qy + qz*qz;
|
||||
u1 = sx*sy*sz;
|
||||
u2 = wx*wy*wz;
|
||||
sum1 += u1*u1/dot2*MY_4PI*MY_4PI;
|
||||
sum2 += u1*u1*u2*u2*MY_4PI*MY_4PI;
|
||||
sum3 += u2;
|
||||
sum4 += dot2*u2;
|
||||
sum5 += u2*powint(-1.0,nx+ny+nz);
|
||||
sum6 += dot2*u2*powint(-1.0,nx+ny+nz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qopt += sum1 - sum2/(0.5*(sum3*sum4 + sum5*sum6));
|
||||
}
|
||||
|
||||
double qopt_all;
|
||||
MPI_Allreduce(&qopt,&qopt_all,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
return qopt_all;
|
||||
|
|
|
@ -50,7 +50,6 @@ class PPPMStagger : public PPPM {
|
|||
virtual void fieldforce_ad();
|
||||
virtual void fieldforce_peratom();
|
||||
|
||||
|
||||
inline double gf_denom2(const double &x, const double &y,
|
||||
const double &z) const
|
||||
{
|
||||
|
|
|
@ -568,7 +568,7 @@ void FixGLD::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to the nth set of extended variables
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i< nth; i++) m += static_cast<int> (extra[nlocal][m]);
|
||||
m++;
|
||||
|
|
|
@ -674,7 +674,7 @@ void FixTTM::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to Nth set of extra values
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
|
||||
m++;
|
||||
|
|
|
@ -1307,7 +1307,7 @@ void FixCMAP::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to Nth set of extra values
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int n = 0;
|
||||
for (int i = 0; i < nth; i++) n += static_cast<int> (extra[nlocal][n]);
|
||||
|
||||
|
|
|
@ -131,9 +131,10 @@ void PPPMDispIntel::init()
|
|||
// For vectorization, we need some padding in the end
|
||||
// The first thread computes on the global density
|
||||
if ((comm->nthreads > 1) && !_use_lrt) {
|
||||
int mygrid = MAX(ngrid,ngrid_6);
|
||||
memory->destroy(perthread_density);
|
||||
memory->create(perthread_density, comm->nthreads-1,
|
||||
ngrid + INTEL_P3M_ALIGNED_MAXORDER,
|
||||
mygrid + INTEL_P3M_ALIGNED_MAXORDER,
|
||||
"pppmdispintel:perthread_density");
|
||||
}
|
||||
|
||||
|
@ -173,23 +174,15 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
int i;
|
||||
|
||||
// convert atoms from box to lamda coords
|
||||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
if (function[0]) {
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
if (function[1] + function[2] + function[3]) {
|
||||
cg_peratom_6->ghost_notify();
|
||||
cg_peratom_6->setup();
|
||||
}
|
||||
peratom_allocate_flag = 1;
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
if (triclinic == 0) boxlo = domain->boxlo;
|
||||
else {
|
||||
boxlo = domain->boxlo_lamda;
|
||||
|
@ -299,7 +292,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
make_rho_c<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in,
|
||||
density_brick, density_fft, work1,remap);
|
||||
|
@ -312,7 +306,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
energy_1, greensfn, virial_1, vg,vg2, u_brick, v0_brick,
|
||||
v1_brick, v2_brick, v3_brick, v4_brick, v5_brick);
|
||||
|
||||
cg->forward_comm(this,FORWARD_AD);
|
||||
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_c_ad<float,double>(fix->get_mixed_buffers());
|
||||
|
@ -322,7 +317,9 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
fieldforce_c_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM);
|
||||
if (vflag_atom)
|
||||
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
} else {
|
||||
poisson_ik(work1, work2, density_fft, fft1, fft2,
|
||||
|
@ -334,7 +331,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick,
|
||||
v5_brick);
|
||||
|
||||
cg->forward_comm(this, FORWARD_IK);
|
||||
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_c_ik<float,double>(fix->get_mixed_buffers());
|
||||
|
@ -344,12 +342,15 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
fieldforce_c_ik<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM);
|
||||
if (evflag_atom)
|
||||
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
if (evflag_atom) fieldforce_c_peratom();
|
||||
}
|
||||
|
||||
if (function[1]) {
|
||||
|
||||
//perform calculations for geometric mixing
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
|
@ -375,14 +376,13 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
make_rho_g<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
|
||||
cg_6->reverse_comm(this, REVERSE_RHO_G);
|
||||
gc6->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_G,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6,
|
||||
density_brick_g, density_fft_g, work1_6,remap_6);
|
||||
|
||||
if (differentiation_flag == 1) {
|
||||
|
||||
poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
|
||||
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6,
|
||||
nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6,
|
||||
|
@ -391,17 +391,20 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
virial_6, vg_6, vg2_6, u_brick_g, v0_brick_g, v1_brick_g,
|
||||
v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
|
||||
|
||||
cg_6->forward_comm(this,FORWARD_AD_G);
|
||||
gc6->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD_G,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_g_ad<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_g_ad<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_g_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_g_ad<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_g_ad<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_g_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G);
|
||||
if (vflag_atom)
|
||||
gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_G,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
} else {
|
||||
poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6,
|
||||
|
@ -413,19 +416,22 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
vdz_brick_g, virial_6, vg_6, vg2_6, u_brick_g, v0_brick_g,
|
||||
v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g);
|
||||
|
||||
cg_6->forward_comm(this,FORWARD_IK_G);
|
||||
gc6->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK_G,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_g_ik<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_g_ik<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_g_ik<float,float>(fix->get_single_buffers());
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_g_ik<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_g_ik<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_g_ik<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (evflag_atom)
|
||||
gc6->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_G,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
|
||||
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G);
|
||||
}
|
||||
if (evflag_atom) fieldforce_g_peratom();
|
||||
}
|
||||
|
||||
|
@ -455,12 +461,12 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
make_rho_a<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
cg_6->reverse_comm(this, REVERSE_RHO_A);
|
||||
gc->reverse_comm_kspace(this,7,sizeof(FFT_SCALAR),REVERSE_RHO_A,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
brick2fft_a();
|
||||
|
||||
if ( differentiation_flag == 1) {
|
||||
|
||||
if (differentiation_flag == 1) {
|
||||
poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
|
||||
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6,
|
||||
nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
|
||||
|
@ -481,20 +487,22 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
v5_brick_a2, u_brick_a4, v0_brick_a4, v1_brick_a4,
|
||||
v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4);
|
||||
|
||||
cg_6->forward_comm(this, FORWARD_AD_A);
|
||||
gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_AD_A,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_a_ad<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_a_ad<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_a_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_a_ad<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_a_ad<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_a_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A);
|
||||
if (evflag_atom)
|
||||
gc6->forward_comm_kspace(this,42,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_A,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
} else {
|
||||
|
||||
poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6,
|
||||
nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6,
|
||||
nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6,
|
||||
|
@ -522,7 +530,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4,
|
||||
v3_brick_a4, v4_brick_a4, v5_brick_a4);
|
||||
|
||||
cg_6->forward_comm(this, FORWARD_IK_A);
|
||||
gc6->forward_comm_kspace(this,18,sizeof(FFT_SCALAR),FORWARD_IK_A,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_a_ik<float,double>(fix->get_mixed_buffers());
|
||||
|
@ -532,13 +541,17 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
fieldforce_a_ik<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A);
|
||||
if (evflag_atom)
|
||||
gc6->forward_comm_kspace(this,49,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_A,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
if (evflag_atom) fieldforce_a_peratom();
|
||||
}
|
||||
|
||||
if (function[3]) {
|
||||
//perform calculations if no mixing rule applies
|
||||
|
||||
// perform calculations if no mixing rule applies
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
|
||||
|
@ -563,7 +576,8 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
make_rho_none<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
cg_6->reverse_comm(this, REVERSE_RHO_NONE);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO_NONE,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
brick2fft_none();
|
||||
|
||||
|
@ -578,17 +592,20 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
n += 2;
|
||||
}
|
||||
|
||||
cg_6->forward_comm(this,FORWARD_AD_NONE);
|
||||
gc6->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD_NONE,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_none_ad<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_none_ad<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_none_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_none_ad<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_none_ad<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_none_ad<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE);
|
||||
if (vflag_atom)
|
||||
gc6->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM_NONE,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
} else {
|
||||
int n = 0;
|
||||
|
@ -604,19 +621,22 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
n += 2;
|
||||
}
|
||||
|
||||
cg_6->forward_comm(this,FORWARD_IK_NONE);
|
||||
gc6->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK_NONE,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_none_ik<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_none_ik<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_none_ik<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
fieldforce_none_ik<float,double>(fix->get_mixed_buffers());
|
||||
} else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
fieldforce_none_ik<double,double>(fix->get_double_buffers());
|
||||
} else {
|
||||
fieldforce_none_ik<float,float>(fix->get_single_buffers());
|
||||
}
|
||||
|
||||
if (evflag_atom)
|
||||
cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE);
|
||||
gc6->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM_NONE,
|
||||
gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
|
||||
if (evflag_atom) fieldforce_none_peratom();
|
||||
}
|
||||
|
||||
|
@ -655,7 +675,7 @@ void PPPMDispIntel::compute(int eflag, int vflag)
|
|||
for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
|
||||
MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
|
||||
for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i];
|
||||
if (function[1]+function[2]+function[3]){
|
||||
if (function[1]+function[2]+function[3]) {
|
||||
double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij;
|
||||
virial[0] -= a;
|
||||
virial[1] -= a;
|
||||
|
@ -2964,69 +2984,73 @@ void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
|
|||
|
||||
void PPPMDispIntel::precompute_rho()
|
||||
{
|
||||
|
||||
half_rho_scale = (rho_points - 1.)/2.;
|
||||
half_rho_scale_plus = half_rho_scale + 0.5;
|
||||
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int k=nlower; k<=nupper;k++){
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
for(int l=order-1; l>=0; l--){
|
||||
r1 = rho_coeff[l][k] + r1*dx;
|
||||
}
|
||||
rho_lookup[i][k-nlower] = r1;
|
||||
}
|
||||
for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho_lookup[i][k] = 0;
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
if (function[0]) {
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma simd
|
||||
#endif
|
||||
for(int k=nlower; k<=nupper;k++){
|
||||
for (int k=nlower; k<=nupper;k++){
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
for(int l=order-2; l>=0; l--){
|
||||
r1 = drho_coeff[l][k] + r1*dx;
|
||||
for(int l=order-1; l>=0; l--){
|
||||
r1 = rho_coeff[l][k] + r1*dx;
|
||||
}
|
||||
drho_lookup[i][k-nlower] = r1;
|
||||
rho_lookup[i][k-nlower] = r1;
|
||||
}
|
||||
for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
drho_lookup[i][k] = 0;
|
||||
rho_lookup[i][k] = 0;
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma simd
|
||||
#endif
|
||||
for(int k=nlower; k<=nupper;k++){
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
for(int l=order-2; l>=0; l--){
|
||||
r1 = drho_coeff[l][k] + r1*dx;
|
||||
}
|
||||
drho_lookup[i][k-nlower] = r1;
|
||||
}
|
||||
for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
drho_lookup[i][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int k=nlower_6; k<=nupper_6;k++){
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
for(int l=order_6-1; l>=0; l--){
|
||||
r1 = rho_coeff_6[l][k] + r1*dx;
|
||||
}
|
||||
rho6_lookup[i][k-nlower_6] = r1;
|
||||
}
|
||||
for (int k = nupper_6-nlower_6+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
rho6_lookup[i][k] = 0;
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
|
||||
if (function[1]+function[2]+function[3]) {
|
||||
for (int i = 0; i < rho_points; i++) {
|
||||
FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma simd
|
||||
#endif
|
||||
for(int k=nlower_6; k<=nupper_6;k++){
|
||||
for (int k=nlower_6; k<=nupper_6;k++){
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
for(int l=order_6-2; l>=0; l--){
|
||||
r1 = drho_coeff_6[l][k] + r1*dx;
|
||||
for(int l=order_6-1; l>=0; l--){
|
||||
r1 = rho_coeff_6[l][k] + r1*dx;
|
||||
}
|
||||
drho6_lookup[i][k-nlower_6] = r1;
|
||||
rho6_lookup[i][k-nlower_6] = r1;
|
||||
}
|
||||
for (int k = nupper_6-nlower_6+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
drho6_lookup[i][k] = 0;
|
||||
rho6_lookup[i][k] = 0;
|
||||
}
|
||||
if (differentiation_flag == 1) {
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma simd
|
||||
#endif
|
||||
for(int k=nlower_6; k<=nupper_6;k++){
|
||||
FFT_SCALAR r1 = ZEROF;
|
||||
for(int l=order_6-2; l>=0; l--){
|
||||
r1 = drho_coeff_6[l][k] + r1*dx;
|
||||
}
|
||||
drho6_lookup[i][k-nlower_6] = r1;
|
||||
}
|
||||
for (int k = nupper_6-nlower_6+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) {
|
||||
drho6_lookup[i][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,8 +70,6 @@ namespace LAMMPS_NS {
|
|||
FFT_SCALAR *particle_eky6;
|
||||
FFT_SCALAR *particle_ekz6;
|
||||
|
||||
|
||||
|
||||
int _use_table;
|
||||
int rho_points;
|
||||
FFT_SCALAR **rho_lookup;
|
||||
|
@ -82,7 +80,6 @@ namespace LAMMPS_NS {
|
|||
|
||||
int _use_packing;
|
||||
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _use_base;
|
||||
#endif
|
||||
|
|
|
@ -164,11 +164,7 @@ void PPPMIntel::compute_first(int eflag, int vflag)
|
|||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
if (evflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom->ghost_notify();
|
||||
cg_peratom->setup();
|
||||
}
|
||||
if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// if atom count has changed, update qsum and qsqsum
|
||||
|
||||
|
@ -232,7 +228,8 @@ void PPPMIntel::compute_first(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d bricks
|
||||
// remap from 3d decomposition to FFT decomposition
|
||||
|
||||
cg->reverse_comm(this,REVERSE_RHO);
|
||||
gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
brick2fft();
|
||||
|
||||
// compute potential gradient on my FFT grid and
|
||||
|
@ -246,16 +243,22 @@ void PPPMIntel::compute_first(int eflag, int vflag)
|
|||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
|
||||
else cg->forward_comm(this,FORWARD_IK);
|
||||
if (differentiation_flag == 1)
|
||||
gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else
|
||||
gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (evflag_atom) {
|
||||
if (differentiation_flag == 1 && vflag_atom)
|
||||
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
else if (differentiation_flag == 0)
|
||||
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
|
||||
gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -843,7 +843,7 @@ void FixGLE::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to the nth set of extended variables
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i< nth; i++) m += static_cast<int> (extra[nlocal][m]);
|
||||
m++;
|
||||
|
|
|
@ -572,7 +572,7 @@ void FixSRP::unpack_restart(int nlocal, int nth)
|
|||
|
||||
// skip to Nth set of extra values
|
||||
// unpack the Nth first values this way because other fixes pack them
|
||||
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < nth; i++){
|
||||
m += static_cast<int> (extra[nlocal][m]);
|
||||
|
|
|
@ -448,7 +448,7 @@ void PairMEAMSpline::coeff(int narg, char **arg)
|
|||
if (map[j] == i) count++;
|
||||
if (count != 1)
|
||||
error->all(FLERR,"Pair style meam/spline requires one atom type per element");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
|
|
@ -438,14 +438,14 @@ void EwaldOMP::eik_dot_r_triclinic()
|
|||
#pragma omp parallel LMP_DEFAULT_NONE
|
||||
#endif
|
||||
{
|
||||
|
||||
|
||||
int i,ifrom,ito,k,l,m,n,ic,tid;
|
||||
double cstr1,sstr1;
|
||||
double sqk,clpm,slpm;
|
||||
double unitk_lamda[3];
|
||||
|
||||
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
|
||||
|
||||
|
||||
double max_kvecs[3];
|
||||
max_kvecs[0] = kxmax;
|
||||
max_kvecs[1] = kymax;
|
||||
|
|
|
@ -95,17 +95,7 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
|
||||
// invoke allocate_peratom() if needed for first time
|
||||
|
||||
if (vflag_atom && !peratom_allocate_flag) {
|
||||
allocate_peratom();
|
||||
cg_peratom_all->ghost_notify();
|
||||
cg_peratom_all->setup();
|
||||
for (int n=0; n<levels; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
cg_peratom[n]->ghost_notify();
|
||||
cg_peratom[n]->setup();
|
||||
}
|
||||
peratom_allocate_flag = 1;
|
||||
}
|
||||
if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
|
||||
|
||||
// extend size of per-atom arrays if necessary
|
||||
|
||||
|
@ -175,7 +165,8 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
// to fully sum contribution in their 3d grid
|
||||
|
||||
current_level = 0;
|
||||
cg_all->reverse_comm(this,REVERSE_RHO);
|
||||
gcall->reverse_comm_kspace(this,1,sizeof(double),REVERSE_RHO,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// forward communicate charge density values to fill ghost grid points
|
||||
// compute direct sum interaction and then restrict to coarser grid
|
||||
|
@ -183,24 +174,30 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
for (int n=0; n<=levels-2; n++) {
|
||||
if (!active_flag[n]) continue;
|
||||
current_level = n;
|
||||
cg[n]->forward_comm(this,FORWARD_RHO);
|
||||
|
||||
gc[n]->forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
direct(n);
|
||||
restriction(n);
|
||||
}
|
||||
|
||||
|
||||
// compute direct interaction for top grid level for non-periodic
|
||||
// and for second from top grid level for periodic
|
||||
|
||||
if (active_flag[levels-1]) {
|
||||
if (domain->nonperiodic) {
|
||||
current_level = levels-1;
|
||||
cg[levels-1]->forward_comm(this,FORWARD_RHO);
|
||||
gc[levels-1]->
|
||||
forward_comm_kspace(this,1,sizeof(double),FORWARD_RHO,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
direct_top(levels-1);
|
||||
cg[levels-1]->reverse_comm(this,REVERSE_AD);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
|
||||
} else {
|
||||
// Here using MPI_Allreduce is cheaper than using commgrid
|
||||
grid_swap_forward(levels-1,qgrid[levels-1]);
|
||||
|
@ -208,7 +205,9 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
grid_swap_reverse(levels-1,egrid[levels-1]);
|
||||
current_level = levels-1;
|
||||
if (vflag_atom)
|
||||
cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[levels-1]->
|
||||
reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -220,24 +219,28 @@ void MSMCGOMP::compute(int eflag, int vflag)
|
|||
prolongation(n);
|
||||
|
||||
current_level = n;
|
||||
cg[n]->reverse_comm(this,REVERSE_AD);
|
||||
gc[n]->reverse_comm_kspace(this,1,sizeof(double),REVERSE_AD,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
|
||||
// extra per-atom virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
|
||||
gc[n]->reverse_comm_kspace(this,6,sizeof(double),REVERSE_AD_PERATOM,
|
||||
gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
|
||||
}
|
||||
|
||||
// all procs communicate E-field values
|
||||
// to fill ghost cells surrounding their 3d bricks
|
||||
|
||||
current_level = 0;
|
||||
cg_all->forward_comm(this,FORWARD_AD);
|
||||
gcall->forward_comm_kspace(this,1,sizeof(double),FORWARD_AD,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// extra per-atom energy/virial communication
|
||||
|
||||
if (vflag_atom)
|
||||
cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);
|
||||
gcall->forward_comm_kspace(this,6,sizeof(double),FORWARD_AD_PERATOM,
|
||||
gcall_buf1,gcall_buf2,MPI_DOUBLE);
|
||||
|
||||
// calculate the force on my particles (interpolation)
|
||||
|
||||
|
@ -556,6 +559,7 @@ void MSMCGOMP::fieldforce_peratom()
|
|||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double MSMCGOMP::memory_usage()
|
||||
{
|
||||
|
|
|
@ -387,20 +387,20 @@ void Balance::command(int narg, char **arg)
|
|||
MPI_Wtime()-start_time);
|
||||
mesg += fmt::format(" iteration count = {}\n",niter);
|
||||
for (int i = 0; i < nimbalance; ++i) mesg += imbalances[i]->info();
|
||||
mesg += fmt::format(" initial/final maximal load/proc = {} {}\n"
|
||||
" initial/final imbalance factor = {:.6g} {:.6g}\n",
|
||||
mesg += fmt::format(" initial/final maximal load/proc = {:.8} {:.8}\n"
|
||||
" initial/final imbalance factor = {:.8} {:.8}\n",
|
||||
maxinit,maxfinal,imbinit,imbfinal);
|
||||
|
||||
if (style != BISECTION) {
|
||||
mesg += " x cuts:";
|
||||
for (int i = 0; i <= comm->procgrid[0]; i++)
|
||||
mesg += fmt::format(" {}",comm->xsplit[i]);
|
||||
mesg += fmt::format(" {:.8}",comm->xsplit[i]);
|
||||
mesg += "\n y cuts:";
|
||||
for (int i = 0; i <= comm->procgrid[1]; i++)
|
||||
mesg += fmt::format(" {}",comm->ysplit[i]);
|
||||
mesg += fmt::format(" {:.8}",comm->ysplit[i]);
|
||||
mesg += "\n z cuts:";
|
||||
for (int i = 0; i <= comm->procgrid[2]; i++)
|
||||
mesg += fmt::format(" {}",comm->zsplit[i]);
|
||||
mesg += fmt::format(" {:.8}",comm->zsplit[i]);
|
||||
mesg += "\n";
|
||||
}
|
||||
|
||||
|
|
|
@ -853,7 +853,7 @@ int FixNeighHistory::pack_restart(int i, double *buf)
|
|||
memcpy(&buf[m],&valuepartner[i][dnum*n],dnumbytes);
|
||||
m += dnum;
|
||||
}
|
||||
// pack buf[0] this way because other fixes unpack it
|
||||
// pack buf[0] this way because other fixes unpack it
|
||||
buf[0] = m;
|
||||
return m;
|
||||
}
|
||||
|
|
|
@ -664,10 +664,6 @@ void Force::create_kspace(const std::string &style, int trysuffix)
|
|||
int sflag;
|
||||
kspace = new_kspace(style,trysuffix,sflag);
|
||||
store_style(kspace_style,style,sflag);
|
||||
|
||||
if (comm->style == 1 && !kspace_match("ewald",0))
|
||||
error->all(FLERR,
|
||||
"Cannot yet use KSpace solver with grid with comm style tiled");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -121,10 +121,10 @@ class KSpace : protected Pointers {
|
|||
virtual void compute(int, int) = 0;
|
||||
virtual void compute_group_group(int, int, int) {};
|
||||
|
||||
virtual void pack_forward(int, FFT_SCALAR *, int, int *) {};
|
||||
virtual void unpack_forward(int, FFT_SCALAR *, int, int *) {};
|
||||
virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {};
|
||||
virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {};
|
||||
virtual void pack_forward_grid(int, void *, int, int *) {};
|
||||
virtual void unpack_forward_grid(int, void *, int, int *) {};
|
||||
virtual void pack_reverse_grid(int, void *, int, int *) {};
|
||||
virtual void unpack_reverse_grid(int, void *, int, int *) {};
|
||||
|
||||
virtual int timing(int, double &, double &) {return 0;}
|
||||
virtual int timing_1d(int, double &) {return 0;}
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
---
|
||||
lammps_version: 30 Jun 2020
|
||||
date_generated: Sun Jul 12 19:14:30 202
|
||||
epsilon: 7.5e-14
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
pair coul/long
|
||||
kspace pppm/cg
|
||||
pre_commands: ! ""
|
||||
post_commands: ! |
|
||||
set atom 22*23 charge 0.0
|
||||
set atom 25*26 charge 0.0
|
||||
set atom 28*29 charge 0.0
|
||||
set type 5 charge 0.0
|
||||
comm_style tiled
|
||||
pair_modify compute no
|
||||
kspace_style pppm/cg 1.0e-6
|
||||
kspace_modify gewald 0.3
|
||||
balance 0.0 rcb
|
||||
input_file: in.fourmol
|
||||
pair_style: coul/long 8.0
|
||||
pair_coeff: ! |
|
||||
* *
|
||||
extract: ! ""
|
||||
natoms: 29
|
||||
init_vdwl: 0
|
||||
init_coul: 0
|
||||
init_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
init_forces: ! |2
|
||||
1 -2.6277259148670029e-01 6.3371291789473080e-02 2.5711876152341273e-01
|
||||
2 7.2367296933642605e-02 -2.6377287441660424e-01 -7.6051513821789138e-02
|
||||
3 -2.0405952297641711e-02 -7.8572370620207509e-03 1.6901994166300244e-02
|
||||
4 1.3913539877995293e-01 2.2591757798275315e-03 -4.0397816193944613e-02
|
||||
5 7.8220840962476321e-02 5.8018673691041286e-02 -6.0298251586383647e-02
|
||||
6 1.6642601835640428e-02 5.4813636460583826e-01 -5.9948263006137181e-01
|
||||
7 7.4422159409041455e-02 -5.2976037824286260e-01 2.8745376504092163e-01
|
||||
8 5.1715645698484136e-01 -8.4224353212928849e-01 3.9252943496496401e-01
|
||||
9 -2.8698074414725644e-01 3.8846246184711297e-01 -8.8735165696061394e-02
|
||||
10 -1.4119088048501882e-01 1.7101059256234674e-01 -2.1093128275588843e-02
|
||||
11 -1.9802109162868770e-01 2.3799905238132363e-01 -6.9205308272233729e-02
|
||||
12 5.9829442228003460e-01 -6.6570589694338889e-01 1.8052289946052447e-01
|
||||
13 -1.4432183109305963e-01 2.1119613677613439e-01 -3.7953547334912435e-02
|
||||
14 -2.1057539442776216e-01 1.9833568369952351e-01 -4.3863660550604380e-02
|
||||
15 -1.8510184013940739e-01 1.4558620355697388e-01 -6.3736986322272177e-02
|
||||
16 -8.7068540567994446e-01 8.0781838347116530e-01 7.4023620199055895e-01
|
||||
17 6.8573068239024604e-01 -5.4365714223217854e-01 -9.1523962812961923e-01
|
||||
18 9.0711753065407197e-01 1.6157336473023263e+00 -1.6708191809635238e+00
|
||||
19 -3.2812987976759644e-01 -8.1654277506424433e-01 9.6628327138680947e-01
|
||||
20 -4.4090177907687206e-01 -7.7838783137249912e-01 8.4583048867481425e-01
|
||||
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_vdwl: 0
|
||||
run_coul: 0
|
||||
run_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_forces: ! |2
|
||||
1 -2.5272870059014418e-01 7.7993693891669019e-02 2.7413192480528514e-01
|
||||
2 6.0317817543279174e-02 -2.7726219497773258e-01 -9.2328466795041211e-02
|
||||
3 -2.0381925802263216e-02 -7.4169903634883827e-03 1.7718770630259381e-02
|
||||
4 1.4007637687058852e-01 6.0294233349724097e-04 -4.4118635982017075e-02
|
||||
5 7.7661555892458781e-02 5.6660597072423394e-02 -6.2527393985284924e-02
|
||||
6 1.8472776517392236e-02 5.4080910460464371e-01 -6.2683322637258831e-01
|
||||
7 7.1172361943358894e-02 -5.3339808852688553e-01 3.1182624682757953e-01
|
||||
8 5.1931682876258478e-01 -8.4009257264924786e-01 4.1437713723636871e-01
|
||||
9 -2.8926485235033661e-01 3.8545218869697995e-01 -9.6445715585120301e-02
|
||||
10 -1.4130393273230371e-01 1.7154601844601167e-01 -2.4319589810512892e-02
|
||||
11 -1.9820651458435656e-01 2.3998861307221755e-01 -7.2850168561931314e-02
|
||||
12 6.0180481293740029e-01 -6.6352386739919189e-01 1.9241766558675899e-01
|
||||
13 -1.4512210035912929e-01 2.1001807770143230e-01 -4.0833849803347325e-02
|
||||
14 -2.1221325118484802e-01 1.9911652392243312e-01 -4.7359593324180368e-02
|
||||
15 -1.8584239277230100e-01 1.4355785962059972e-01 -6.8625732013099072e-02
|
||||
16 -8.6818324744564379e-01 8.1248951063969599e-01 7.2669223824162077e-01
|
||||
17 6.8330419262914510e-01 -5.4635175697697436e-01 -9.0599066050100585e-01
|
||||
18 9.4795190549104413e-01 1.6607978960168899e+00 -1.6003547491287762e+00
|
||||
19 -3.4101870572315529e-01 -8.2840317570845334e-01 9.3657904772766076e-01
|
||||
20 -4.6581300504277079e-01 -8.0258437941651817e-01 8.0884475080737095e-01
|
||||
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
...
|
|
@ -0,0 +1,99 @@
|
|||
---
|
||||
lammps_version: 21 Jul 2020
|
||||
date_generated: Thu Aug 20 23:05:29 202
|
||||
epsilon: 2.5e-13
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
pair lj/long/coul/long
|
||||
kspace pppm/disp
|
||||
pre_commands: ! ""
|
||||
post_commands: ! |
|
||||
pair_modify compute no
|
||||
kspace_style pppm/disp 1.0e-4
|
||||
kspace_modify gewald 0.5
|
||||
kspace_modify force/disp/real 0.001
|
||||
kspace_modify force/disp/kspace 0.005
|
||||
kspace_modify diff ad
|
||||
input_file: in.fourmol
|
||||
pair_style: lj/long/coul/long long off 7.0
|
||||
pair_coeff: ! |
|
||||
1 1 0.02 2.5
|
||||
2 2 0.005 1.0
|
||||
2 4 0.005 0.5
|
||||
3 3 0.02 3.2
|
||||
4 4 0.015 3.1
|
||||
5 5 0.015 3.1
|
||||
extract: ! |
|
||||
epsilon 2
|
||||
sigma 2
|
||||
cut_coul 0
|
||||
natoms: 29
|
||||
init_vdwl: 0
|
||||
init_coul: 0
|
||||
init_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
init_forces: ! |2
|
||||
1 6.9724769730763625e-11 -6.2917196472420914e-11 -1.7553582754737637e-11
|
||||
2 7.6094811340731255e-13 -1.1540411942409495e-12 1.6298559231496556e-13
|
||||
3 3.9430185022927007e-11 -1.9017673718233126e-11 -1.6651673285376923e-13
|
||||
4 1.5917087679856494e-12 -7.5219417810712124e-13 3.9732355364959770e-13
|
||||
5 1.3754210730241487e-12 -5.2393248740046831e-13 -5.8786972569898321e-13
|
||||
6 2.4324626429202970e-11 -5.9541975521081885e-12 1.1615602625372913e-11
|
||||
7 3.9335920395601978e-11 -4.5805387197461763e-12 4.9818207226960900e-11
|
||||
8 1.9840262237451370e-11 6.2076746170376731e-12 -4.0963480752166205e-13
|
||||
9 6.7032491388560462e-14 2.8806602906466370e-14 -5.1168576901689580e-13
|
||||
10 -9.5639153188399775e-12 1.6260508185384930e-11 5.9852790016982401e-12
|
||||
11 -2.1047310133618033e-13 7.9345765138879343e-13 7.5666446278699520e-13
|
||||
12 -2.9173061030991263e-11 3.7633064986308384e-12 1.7460466677353034e-11
|
||||
13 -1.4524529065361568e-12 3.3470067557100611e-13 6.2185091609144014e-13
|
||||
14 -7.3138369212027322e-13 1.0248711043717948e-13 1.1305113696402957e-12
|
||||
15 -9.5037504870165340e-13 -3.5714658367712204e-13 3.9441042026885766e-13
|
||||
16 -2.2415639659259037e-11 2.7640113997479121e-11 -1.1642230752605363e-11
|
||||
17 -2.1208473166391679e-11 3.8522058586534765e-11 -4.6846216614724886e-11
|
||||
18 -1.8040308770875740e-11 -5.7279008382924961e-11 7.2376750640813174e-11
|
||||
19 -3.8160168113867275e-14 -1.0975133018216530e-12 1.5219157003617188e-12
|
||||
20 -8.2805108197841928e-13 -1.1974415437385753e-12 1.4978331961150702e-12
|
||||
21 -8.9475625142751869e-11 6.3090972691297525e-11 7.4019219615381729e-11
|
||||
22 -1.5546663673414098e-12 1.1916129263874209e-12 1.5208406092298733e-12
|
||||
23 -1.6319521654857783e-12 1.1826485038559488e-12 1.4904048534083759e-12
|
||||
24 -1.5641127395622623e-11 -5.8519640828036183e-11 -7.6239449591252700e-11
|
||||
25 1.1910315964875356e-13 -1.1889645936751337e-12 -1.4555354381968814e-12
|
||||
26 -7.1757130498965896e-13 -1.1745493969901884e-12 -1.5212605958363234e-12
|
||||
27 8.0820809208387831e-11 6.3170487105530568e-11 -6.5474684007301223e-11
|
||||
28 1.6363504379062180e-12 1.1991903709725124e-12 -1.1721879859607168e-12
|
||||
29 1.5216247551162307e-12 1.1846825159982258e-12 -1.4010202583443579e-12
|
||||
run_vdwl: 0
|
||||
run_coul: 0
|
||||
run_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_forces: ! |2
|
||||
1 6.9638112975627505e-11 -6.2952655433984510e-11 -1.7520579341404558e-11
|
||||
2 7.5441280063952564e-13 -1.1565278235052112e-12 1.5428964869214424e-13
|
||||
3 3.9495752841065062e-11 -1.9092781431624742e-11 -1.4273797798954887e-13
|
||||
4 1.5925753071119834e-12 -7.5018590838065349e-13 4.0694176768714727e-13
|
||||
5 1.3773389032846222e-12 -5.2518353267880336e-13 -5.8790169309916104e-13
|
||||
6 2.4346713154520441e-11 -6.0219217022078622e-12 1.1556637696922417e-11
|
||||
7 3.9348886643182682e-11 -4.5159528947341432e-12 4.9885111934485096e-11
|
||||
8 1.9778099538765962e-11 6.2897367226276710e-12 -4.3131958485496316e-13
|
||||
9 6.3695687649969267e-14 3.3892963902521213e-14 -5.1413748665229894e-13
|
||||
10 -9.6012722021579935e-12 1.6233041014363616e-11 6.0003996378633241e-12
|
||||
11 -2.0949350984685278e-13 7.9562479682529135e-13 7.6275214731002570e-13
|
||||
12 -2.9254571969811829e-11 3.7981192304699482e-12 1.7545839680382277e-11
|
||||
13 -1.4582673940336726e-12 3.2818654537516632e-13 6.2347550527444227e-13
|
||||
14 -7.3106004851428835e-13 1.1022739695233736e-13 1.1369294512424691e-12
|
||||
15 -9.4060413626822731e-13 -3.5126188279055496e-13 3.8676904179416883e-13
|
||||
16 -2.2408460889929651e-11 2.7657483426256059e-11 -1.1741964403647136e-11
|
||||
17 -2.1298916095589507e-11 3.8489167711847335e-11 -4.6910251494943480e-11
|
||||
18 -1.7957271474310740e-11 -5.7234788024451294e-11 7.2383088876515692e-11
|
||||
19 -3.6288894307680100e-14 -1.0943572602766692e-12 1.5210320121776364e-12
|
||||
20 -8.3935575478300129e-13 -1.1966327701053927e-12 1.4966158763399045e-12
|
||||
21 -8.9447202173297866e-11 6.3082241565048134e-11 7.4014350079233694e-11
|
||||
22 -1.5510607489175700e-12 1.1907296946509683e-12 1.5210644252358221e-12
|
||||
23 -1.6318182177140126e-12 1.1841207226717162e-12 1.4885209322729697e-12
|
||||
24 -1.5676362809971050e-11 -5.8529118094744956e-11 -7.6215807688416359e-11
|
||||
25 1.0742373795185689e-13 -1.1875074064094843e-12 -1.4571756878225340e-12
|
||||
26 -7.1286524148732120e-13 -1.1757034686897436e-12 -1.5207002392359802e-12
|
||||
27 8.0828500068665394e-11 6.3165679701490667e-11 -6.5424272562687016e-11
|
||||
28 1.6362386625442489e-12 1.1989853840181747e-12 -1.1753814351696325e-12
|
||||
29 1.5246414975786595e-12 1.1846002567244795e-12 -1.4036250137375415e-12
|
||||
...
|
|
@ -0,0 +1,98 @@
|
|||
---
|
||||
lammps_version: 21 Jul 2020
|
||||
date_generated: Thu Aug 20 23:05:29 202
|
||||
epsilon: 2.5e-13
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
pair lj/long/coul/long
|
||||
kspace pppm/disp
|
||||
pre_commands: ! ""
|
||||
post_commands: ! |
|
||||
pair_modify compute no
|
||||
kspace_style pppm/disp 1.0e-4
|
||||
kspace_modify gewald 0.5
|
||||
kspace_modify force/disp/real 0.001
|
||||
kspace_modify force/disp/kspace 0.005
|
||||
input_file: in.fourmol
|
||||
pair_style: lj/long/coul/long long off 7.0
|
||||
pair_coeff: ! |
|
||||
1 1 0.02 2.5
|
||||
2 2 0.005 1.0
|
||||
2 4 0.005 0.5
|
||||
3 3 0.02 3.2
|
||||
4 4 0.015 3.1
|
||||
5 5 0.015 3.1
|
||||
extract: ! |
|
||||
epsilon 2
|
||||
sigma 2
|
||||
cut_coul 0
|
||||
natoms: 29
|
||||
init_vdwl: 0
|
||||
init_coul: 0
|
||||
init_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
init_forces: ! |2
|
||||
1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_vdwl: 0
|
||||
run_coul: 0
|
||||
run_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_forces: ! |2
|
||||
1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
...
|
|
@ -0,0 +1,90 @@
|
|||
---
|
||||
lammps_version: 21 Jul 2020
|
||||
date_generated: Mon Aug 3 23:27:35 202
|
||||
epsilon: 7.5e-14
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
pair coul/long
|
||||
kspace pppm/stagger
|
||||
pre_commands: ! ""
|
||||
post_commands: ! |
|
||||
comm_style tiled
|
||||
pair_modify compute no
|
||||
kspace_style pppm/stagger 1.0e-6
|
||||
kspace_modify gewald 0.3
|
||||
balance 0.0 rcb
|
||||
input_file: in.fourmol
|
||||
pair_style: coul/long 8.0
|
||||
pair_coeff: ! |
|
||||
* *
|
||||
extract: ! ""
|
||||
natoms: 29
|
||||
init_vdwl: 0
|
||||
init_coul: 0
|
||||
init_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
init_forces: ! |2
|
||||
1 -5.2227715900845750e-01 8.1950519891037896e-02 2.1568864750376832e-01
|
||||
2 2.1709329984201631e-01 -2.7910826043908610e-01 -1.3501796562404628e-01
|
||||
3 -3.4431410110235108e-02 -9.3026609475793179e-03 1.9970153652663108e-02
|
||||
4 1.6294612553322482e-01 2.8820208915782589e-02 -7.8041537992552090e-02
|
||||
5 1.6018614143950777e-01 7.5432356753115826e-02 -3.7722588054429150e-02
|
||||
6 5.6492988309033354e-01 4.1696814706534630e-01 -6.7673476362799612e-01
|
||||
7 -3.4216205471660566e-01 -4.0009165524095192e-01 3.9325085501474977e-01
|
||||
8 -1.4121658810415072e-01 -6.1694926607232958e-01 3.3967088097622000e-01
|
||||
9 1.8213420211015810e-01 3.2023550501078568e-01 5.0865716213608053e-02
|
||||
10 -5.1659753410710330e-02 1.1063713004312387e-01 -1.4434062323990951e-02
|
||||
11 -8.4675426404111437e-02 1.5093767469541053e-01 -3.9273222694910556e-02
|
||||
12 4.5743278086428246e-01 -4.2657803621344026e-01 3.4765791147615285e-02
|
||||
13 -1.5598326968651899e-01 1.1609333380277490e-01 2.6827585674639255e-02
|
||||
14 -1.7229830712699734e-01 1.3660265515995373e-01 1.0364293545061238e-02
|
||||
15 -1.3779624948415931e-01 8.5611514314178239e-02 -1.4417936578185790e-02
|
||||
16 -3.4309620718952316e-01 4.3358259515061448e-01 5.3264911488862143e-01
|
||||
17 1.3394866253126306e-01 -4.1287506953147796e-01 -7.8819987038465467e-01
|
||||
18 7.3032854805187175e-01 1.5459002190369358e+00 -1.3876618467094484e+00
|
||||
19 -2.5946241349817661e-01 -7.7450328984918526e-01 7.7107291114413901e-01
|
||||
20 -3.9364379163465191e-01 -7.0318115064463305e-01 7.3145133273582563e-01
|
||||
21 5.1854207039779388e-01 5.4316140431986648e-01 -1.1630561612460866e+00
|
||||
22 -2.9474924657955082e-01 -1.2314722330232061e-01 5.8311514555951505e-01
|
||||
23 -2.8773056143507980e-01 -2.9281856868591627e-01 5.5619506923778372e-01
|
||||
24 6.2752036539684239e-02 1.7441478631220706e+00 -2.7849000426516041e-01
|
||||
25 1.2955239352175907e-01 -7.0427160638100861e-01 2.2582608971039136e-01
|
||||
26 -2.2227598237037974e-01 -9.7470415379148345e-01 7.4514829391794934e-02
|
||||
27 -8.5917766336431800e-01 1.6506499588643100e+00 -9.3704596621935576e-01
|
||||
28 5.7091533654326099e-01 -9.1760299361767794e-01 5.4073727763352530e-01
|
||||
29 4.1187460365847078e-01 -8.0559715142821642e-01 4.4313023169089372e-01
|
||||
run_vdwl: 0
|
||||
run_coul: 0
|
||||
run_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_forces: ! |2
|
||||
1 -5.0888488962950285e-01 8.3430404180325907e-02 2.3883070350075583e-01
|
||||
2 2.0290231222263222e-01 -2.8552521628160221e-01 -1.4795322214007761e-01
|
||||
3 -3.4147227596864727e-02 -9.1044423234983921e-03 2.1050772312966981e-02
|
||||
4 1.6403639693437871e-01 2.7510909903057143e-02 -8.1700222473303191e-02
|
||||
5 1.5793837855428028e-01 7.5134684921661865e-02 -4.3070400148934276e-02
|
||||
6 5.5874989482134696e-01 4.1097082705147381e-01 -7.0880717747024347e-01
|
||||
7 -3.4244419763693645e-01 -4.0336810330768602e-01 4.1416013762933518e-01
|
||||
8 -1.2867011168103898e-01 -6.1360944335177758e-01 3.7373621646565264e-01
|
||||
9 1.7179361391284365e-01 3.1570434896702448e-01 2.9166730933101400e-02
|
||||
10 -5.3386439917058298e-02 1.1157104966535075e-01 -1.9069019877901452e-02
|
||||
11 -8.6641568612711800e-02 1.5447778751280100e-01 -4.3302035139313286e-02
|
||||
12 4.6277613291799979e-01 -4.2640261166652782e-01 5.4642934467963714e-02
|
||||
13 -1.5768096764682599e-01 1.1678927823658505e-01 2.0726708870285715e-02
|
||||
14 -1.7362409264878331e-01 1.3769878891777848e-01 5.6914483198930487e-03
|
||||
15 -1.3804334923445472e-01 8.4232584350085404e-02 -2.2242211531143450e-02
|
||||
16 -3.5505453326610353e-01 4.4007379281236625e-01 5.1108930583976653e-01
|
||||
17 1.4393262437860022e-01 -4.0681625150263201e-01 -7.6550494257092949e-01
|
||||
18 7.7411316765167837e-01 1.6013845677120058e+00 -1.3406161587038208e+00
|
||||
19 -2.7109483507409249e-01 -7.9220736692047411e-01 7.5606765561780620e-01
|
||||
20 -4.2022391372143653e-01 -7.3398564926630305e-01 7.0862056036484811e-01
|
||||
21 5.2066744810063004e-01 4.5601151336785911e-01 -1.1109669200204870e+00
|
||||
22 -2.9131419157235938e-01 -8.0660148622295896e-02 5.6037505807873789e-01
|
||||
23 -2.8851937111607945e-01 -2.5707916023569388e-01 5.3115241105748146e-01
|
||||
24 7.7132406285747662e-02 1.6936935947231240e+00 -2.6197671637095088e-01
|
||||
25 1.1739827770831035e-01 -6.8172720908194795e-01 2.1292828073010400e-01
|
||||
26 -2.2399952098390596e-01 -9.4806186651755442e-01 6.5237310702708273e-02
|
||||
27 -8.6781949013350235e-01 1.6453631212617197e+00 -8.8867885380482659e-01
|
||||
28 5.7526918079966904e-01 -9.1110567278710985e-01 5.1441520143469011e-01
|
||||
29 4.1483886618353916e-01 -8.0439411171811659e-01 4.1599644392583446e-01
|
||||
...
|
|
@ -0,0 +1,90 @@
|
|||
---
|
||||
lammps_version: 30 Jun 2020
|
||||
date_generated: Sun Jul 12 19:14:29 202
|
||||
epsilon: 7.5e-14
|
||||
prerequisites: ! |
|
||||
atom full
|
||||
pair coul/long
|
||||
kspace pppm
|
||||
pre_commands: ! ""
|
||||
post_commands: ! |
|
||||
comm_style tiled
|
||||
pair_modify compute no
|
||||
kspace_style pppm 1.0e-6
|
||||
kspace_modify gewald 0.3
|
||||
balance 0.0 rcb
|
||||
input_file: in.fourmol
|
||||
pair_style: coul/long 8.0
|
||||
pair_coeff: ! |
|
||||
* *
|
||||
extract: ! ""
|
||||
natoms: 29
|
||||
init_vdwl: 0
|
||||
init_coul: 0
|
||||
init_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
init_forces: ! |2
|
||||
1 -5.2239274535568314e-01 8.2051545744881466e-02 2.1533594847972076e-01
|
||||
2 2.1712968366442176e-01 -2.7928074334318026e-01 -1.3471540076656802e-01
|
||||
3 -3.4442019165638028e-02 -9.3084265599194874e-03 1.9948062571124484e-02
|
||||
4 1.6298334373562443e-01 2.8852998088186425e-02 -7.8001870103674154e-02
|
||||
5 1.6024289196964533e-01 7.5428818157230709e-02 -3.7746220978715959e-02
|
||||
6 5.6503043686117405e-01 4.1669523647698320e-01 -6.7638762712651512e-01
|
||||
7 -3.4224573570118516e-01 -3.9969025602522534e-01 3.9331747529410527e-01
|
||||
8 -1.4133104801408738e-01 -6.1685378954692482e-01 3.3931746208503027e-01
|
||||
9 1.8219762821810317e-01 3.2009822401929577e-01 5.0881307357289934e-02
|
||||
10 -5.1688860353236589e-02 1.1069131959908671e-01 -1.4422029744161480e-02
|
||||
11 -8.4689878918105269e-02 1.5099315110947911e-01 -3.9231342126204188e-02
|
||||
12 4.5754413540574290e-01 -4.2644798683690410e-01 3.4587713233253971e-02
|
||||
13 -1.5596780753830558e-01 1.1607584778590280e-01 2.6865880696619902e-02
|
||||
14 -1.7231427615749528e-01 1.3653099035839830e-01 1.0392517888507409e-02
|
||||
15 -1.3787738509698347e-01 8.5569383216123673e-02 -1.4365596072224287e-02
|
||||
16 -3.4322564010548312e-01 4.3371633953160166e-01 5.3259611401138551e-01
|
||||
17 1.3414272886699793e-01 -4.1322529572771644e-01 -7.8812435933765979e-01
|
||||
18 7.3073447759345089e-01 1.5456517688814524e+00 -1.3881786173290165e+00
|
||||
19 -2.5943625025418654e-01 -7.7424664728587522e-01 7.7105598737678260e-01
|
||||
20 -3.9409193260988501e-01 -7.0311103001458264e-01 7.3171724652214931e-01
|
||||
21 5.1856078926614546e-01 5.4286369838352699e-01 -1.1629548434823531e+00
|
||||
22 -2.9453203152655405e-01 -1.2298517567747463e-01 5.8298446261040782e-01
|
||||
23 -2.8798525475710529e-01 -2.9277384277527774e-01 5.5631883166904628e-01
|
||||
24 6.2753212217437501e-02 1.7443957830145815e+00 -2.7814103479849506e-01
|
||||
25 1.2986161832727383e-01 -7.0443921770565177e-01 2.2578528867489417e-01
|
||||
26 -2.2254044464386455e-01 -9.7470640011041609e-01 7.4360754308868779e-02
|
||||
27 -8.5917998510192983e-01 1.6512375326941557e+00 -9.3680672362601536e-01
|
||||
28 5.7118802253451917e-01 -9.1790362039827855e-01 5.4063664700585301e-01
|
||||
29 4.1157232663919069e-01 -8.0588020505345637e-01 4.4297396570656278e-01
|
||||
run_vdwl: 0
|
||||
run_coul: 0
|
||||
run_stress: ! |2-
|
||||
0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
|
||||
run_forces: ! |2
|
||||
1 -5.0900177524332380e-01 8.3541312360770559e-02 2.3847626933913957e-01
|
||||
2 2.0293760437213748e-01 -2.8570861986354196e-01 -1.4764907529922344e-01
|
||||
3 -3.4158170718364404e-02 -9.1103261103567309e-03 2.1028676255224140e-02
|
||||
4 1.6407381836362508e-01 2.7543938443804754e-02 -8.1661581263582694e-02
|
||||
5 1.5799678256598612e-01 7.5131693181657300e-02 -4.3093731729494773e-02
|
||||
6 5.5884729319027959e-01 4.1070142887655381e-01 -7.0845348552640675e-01
|
||||
7 -3.4252252488516066e-01 -4.0296980848156633e-01 4.1421689597710110e-01
|
||||
8 -1.2877561673829283e-01 -6.1352311819313976e-01 3.7338100957587267e-01
|
||||
9 1.7184965305565320e-01 3.1557352532666366e-01 2.9184166085517198e-02
|
||||
10 -5.3416314160604578e-02 1.1162618970928689e-01 -1.9056641624128120e-02
|
||||
11 -8.6656970143397835e-02 1.5453217862588842e-01 -4.3260381230061914e-02
|
||||
12 4.6288691229484780e-01 -4.2627837697985460e-01 5.4462739307820834e-02
|
||||
13 -1.5766510975402218e-01 1.1677248221389504e-01 2.0765584899621815e-02
|
||||
14 -1.7364037400975366e-01 1.3763067517397995e-01 5.7227400340144263e-03
|
||||
15 -1.3812467553300772e-01 8.4189156813104071e-02 -2.2189341317061961e-02
|
||||
16 -3.5518552554301230e-01 4.4020942653600509e-01 5.1103809754957041e-01
|
||||
17 1.4412978936104159e-01 -4.0716931333084577e-01 -7.6542748703258823e-01
|
||||
18 7.7452008907550252e-01 1.6011444029485182e+00 -1.3411322152836536e+00
|
||||
19 -2.7106696803117064e-01 -7.9194312531475608e-01 7.5606651125297675e-01
|
||||
20 -4.2067655880426469e-01 -7.3392054862184575e-01 7.0888202878660511e-01
|
||||
21 5.2066009642687827e-01 4.5571443252966459e-01 -1.1108598505670140e+00
|
||||
22 -2.9107834596471255e-01 -8.0497477430417727e-02 5.6024739795440914e-01
|
||||
23 -2.8876713232015855e-01 -2.5703387138535833e-01 5.3126570223700098e-01
|
||||
24 7.7134065884385553e-02 1.6939505170091371e+00 -2.6162012529552015e-01
|
||||
25 1.1769649007263155e-01 -6.8190526638151339e-01 2.1287906681296132e-01
|
||||
26 -2.2425351267435142e-01 -9.4806947839154443e-01 6.5072664668127181e-02
|
||||
27 -8.6783221113018416e-01 1.6459489466934927e+00 -8.8842216340655056e-01
|
||||
28 5.7554532106965894e-01 -9.1140444835486800e-01 5.1430957288504053e-01
|
||||
29 4.1454386992115411e-01 -8.0467652760281361e-01 4.1582695595428282e-01
|
||||
...
|
Loading…
Reference in New Issue