forked from lijiext/lammps
Added compact arrays, removed unused openmp stuff
This commit is contained in:
parent
803e0631c5
commit
960a975e2a
|
@ -25,7 +25,6 @@
|
|||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "openmp_snap.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -115,20 +114,10 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
|
|||
} else error->all(FLERR,"Illegal compute sna/atom command");
|
||||
}
|
||||
|
||||
nthreads = comm->nthreads;
|
||||
snaptr = new SNA*[nthreads];
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
|
||||
// always unset use_shared_arrays since it does not work with computes
|
||||
snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
|
||||
0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
|
||||
}
|
||||
|
||||
ncoeff = snaptr[0]->ncoeff;
|
||||
ncoeff = snaptr->ncoeff;
|
||||
size_peratom_cols = ncoeff;
|
||||
if (quadraticflag) size_peratom_cols += (ncoeff*(ncoeff+1))/2;
|
||||
peratom_flag = 1;
|
||||
|
@ -136,7 +125,6 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
|
|||
nmax = 0;
|
||||
njmax = 0;
|
||||
sna = NULL;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -147,9 +135,7 @@ ComputeSNAAtom::~ComputeSNAAtom()
|
|||
memory->destroy(radelem);
|
||||
memory->destroy(wjelem);
|
||||
memory->destroy(cutsq);
|
||||
for (int tid = 0; tid<nthreads; tid++)
|
||||
delete snaptr[tid];
|
||||
delete [] snaptr;
|
||||
delete snaptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -176,13 +162,7 @@ void ComputeSNAAtom::init()
|
|||
if (strcmp(modify->compute[i]->style,"sna/atom") == 0) count++;
|
||||
if (count > 1 && comm->me == 0)
|
||||
error->warning(FLERR,"More than one compute sna/atom");
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
snaptr[tid]->init();
|
||||
}
|
||||
snaptr->init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -223,11 +203,7 @@ void ComputeSNAAtom::compute_peratom()
|
|||
double** const x = atom->x;
|
||||
const int* const mask = atom->mask;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for default(none)
|
||||
#endif
|
||||
for (int ii = 0; ii < inum; ii++) {
|
||||
const int tid = omp_get_thread_num();
|
||||
const int i = ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
|
||||
|
@ -241,7 +217,7 @@ void ComputeSNAAtom::compute_peratom()
|
|||
|
||||
// insure rij, inside, and typej are of size jnum
|
||||
|
||||
snaptr[tid]->grow_rij(jnum);
|
||||
snaptr->grow_rij(jnum);
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
|
@ -258,26 +234,26 @@ void ComputeSNAAtom::compute_peratom()
|
|||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
int jtype = type[j];
|
||||
if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
|
||||
snaptr[tid]->rij[ninside][0] = delx;
|
||||
snaptr[tid]->rij[ninside][1] = dely;
|
||||
snaptr[tid]->rij[ninside][2] = delz;
|
||||
snaptr[tid]->inside[ninside] = j;
|
||||
snaptr[tid]->wj[ninside] = wjelem[jtype];
|
||||
snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
|
||||
snaptr->rij[ninside][0] = delx;
|
||||
snaptr->rij[ninside][1] = dely;
|
||||
snaptr->rij[ninside][2] = delz;
|
||||
snaptr->inside[ninside] = j;
|
||||
snaptr->wj[ninside] = wjelem[jtype];
|
||||
snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
|
||||
ninside++;
|
||||
}
|
||||
}
|
||||
|
||||
snaptr[tid]->compute_ui(ninside);
|
||||
snaptr[tid]->compute_zi();
|
||||
snaptr[tid]->compute_bi();
|
||||
snaptr[tid]->copy_bi2bvec();
|
||||
snaptr->compute_ui(ninside);
|
||||
snaptr->compute_zi();
|
||||
snaptr->compute_bi();
|
||||
snaptr->copy_bi2bvec();
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
sna[i][icoeff] = snaptr[tid]->bvec[icoeff];
|
||||
sna[i][icoeff] = snaptr->bvec[icoeff];
|
||||
if (quadraticflag) {
|
||||
int ncount = ncoeff;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bi = snaptr[tid]->bvec[icoeff];
|
||||
double bi = snaptr->bvec[icoeff];
|
||||
|
||||
// diagonal element of quadratic matrix
|
||||
|
||||
|
@ -286,7 +262,7 @@ void ComputeSNAAtom::compute_peratom()
|
|||
// upper-triangular elements of quadratic matrix
|
||||
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++)
|
||||
sna[i][ncount++] = bi*snaptr[tid]->bvec[jcoeff];
|
||||
sna[i][ncount++] = bi*snaptr->bvec[jcoeff];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -305,7 +281,6 @@ double ComputeSNAAtom::memory_usage()
|
|||
double bytes = nmax*size_peratom_cols * sizeof(double);
|
||||
bytes += 3*njmax*sizeof(double);
|
||||
bytes += njmax*sizeof(int);
|
||||
bytes += snaptr[0]->memory_usage()*comm->nthreads;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
|
|
@ -42,10 +42,9 @@ class ComputeSNAAtom : public Compute {
|
|||
double rcutfac;
|
||||
double *radelem;
|
||||
double *wjelem;
|
||||
class SNA** snaptr;
|
||||
class SNA* snaptr;
|
||||
double cutmax;
|
||||
int quadraticflag;
|
||||
int nthreads;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "openmp_snap.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -113,20 +112,10 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) :
|
|||
} else error->all(FLERR,"Illegal compute snad/atom command");
|
||||
}
|
||||
|
||||
nthreads = comm->nthreads;
|
||||
snaptr = new SNA*[nthreads];
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
|
||||
// always unset use_shared_arrays since it does not work with computes
|
||||
snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
|
||||
0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
|
||||
}
|
||||
|
||||
ncoeff = snaptr[0]->ncoeff;
|
||||
ncoeff = snaptr->ncoeff;
|
||||
nperdim = ncoeff;
|
||||
if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2;
|
||||
yoffset = nperdim;
|
||||
|
@ -138,7 +127,6 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) :
|
|||
nmax = 0;
|
||||
njmax = 0;
|
||||
snad = NULL;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -149,9 +137,7 @@ ComputeSNADAtom::~ComputeSNADAtom()
|
|||
memory->destroy(radelem);
|
||||
memory->destroy(wjelem);
|
||||
memory->destroy(cutsq);
|
||||
for (int tid = 0; tid<nthreads; tid++)
|
||||
delete snaptr[tid];
|
||||
delete [] snaptr;
|
||||
delete snaptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -178,13 +164,7 @@ void ComputeSNADAtom::init()
|
|||
if (strcmp(modify->compute[i]->style,"snad/atom") == 0) count++;
|
||||
if (count > 1 && comm->me == 0)
|
||||
error->warning(FLERR,"More than one compute snad/atom");
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
snaptr[tid]->init();
|
||||
}
|
||||
snaptr->init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -235,11 +215,7 @@ void ComputeSNADAtom::compute_peratom()
|
|||
double** const x = atom->x;
|
||||
const int* const mask = atom->mask;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for default(none)
|
||||
#endif
|
||||
for (int ii = 0; ii < inum; ii++) {
|
||||
const int tid = omp_get_thread_num();
|
||||
const int i = ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
|
||||
|
@ -258,7 +234,7 @@ void ComputeSNADAtom::compute_peratom()
|
|||
|
||||
// insure rij, inside, and typej are of size jnum
|
||||
|
||||
snaptr[tid]->grow_rij(jnum);
|
||||
snaptr->grow_rij(jnum);
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
|
@ -276,30 +252,30 @@ void ComputeSNADAtom::compute_peratom()
|
|||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
int jtype = type[j];
|
||||
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
|
||||
snaptr[tid]->rij[ninside][0] = delx;
|
||||
snaptr[tid]->rij[ninside][1] = dely;
|
||||
snaptr[tid]->rij[ninside][2] = delz;
|
||||
snaptr[tid]->inside[ninside] = j;
|
||||
snaptr[tid]->wj[ninside] = wjelem[jtype];
|
||||
snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
|
||||
snaptr->rij[ninside][0] = delx;
|
||||
snaptr->rij[ninside][1] = dely;
|
||||
snaptr->rij[ninside][2] = delz;
|
||||
snaptr->inside[ninside] = j;
|
||||
snaptr->wj[ninside] = wjelem[jtype];
|
||||
snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
|
||||
ninside++;
|
||||
}
|
||||
}
|
||||
|
||||
snaptr[tid]->compute_ui(ninside);
|
||||
snaptr[tid]->compute_zi();
|
||||
snaptr->compute_ui(ninside);
|
||||
snaptr->compute_zi();
|
||||
if (quadraticflag) {
|
||||
snaptr[tid]->compute_bi();
|
||||
snaptr[tid]->copy_bi2bvec();
|
||||
snaptr->compute_bi();
|
||||
snaptr->copy_bi2bvec();
|
||||
}
|
||||
|
||||
for (int jj = 0; jj < ninside; jj++) {
|
||||
const int j = snaptr[tid]->inside[jj];
|
||||
snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
|
||||
snaptr[tid]->wj[jj],
|
||||
snaptr[tid]->rcutij[jj]);
|
||||
snaptr[tid]->compute_dbidrj();
|
||||
snaptr[tid]->copy_dbi2dbvec();
|
||||
const int j = snaptr->inside[jj];
|
||||
snaptr->compute_duidrj(snaptr->rij[jj],
|
||||
snaptr->wj[jj],
|
||||
snaptr->rcutij[jj]);
|
||||
snaptr->compute_dbidrj();
|
||||
snaptr->copy_dbi2dbvec();
|
||||
|
||||
// Accumulate -dBi/dRi, -dBi/dRj
|
||||
|
||||
|
@ -307,12 +283,12 @@ void ComputeSNADAtom::compute_peratom()
|
|||
double *snadj = snad[j]+typeoffset;
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0];
|
||||
snadi[icoeff+yoffset] += snaptr[tid]->dbvec[icoeff][1];
|
||||
snadi[icoeff+zoffset] += snaptr[tid]->dbvec[icoeff][2];
|
||||
snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0];
|
||||
snadj[icoeff+yoffset] -= snaptr[tid]->dbvec[icoeff][1];
|
||||
snadj[icoeff+zoffset] -= snaptr[tid]->dbvec[icoeff][2];
|
||||
snadi[icoeff] += snaptr->dbvec[icoeff][0];
|
||||
snadi[icoeff+yoffset] += snaptr->dbvec[icoeff][1];
|
||||
snadi[icoeff+zoffset] += snaptr->dbvec[icoeff][2];
|
||||
snadj[icoeff] -= snaptr->dbvec[icoeff][0];
|
||||
snadj[icoeff+yoffset] -= snaptr->dbvec[icoeff][1];
|
||||
snadj[icoeff+zoffset] -= snaptr->dbvec[icoeff][2];
|
||||
}
|
||||
|
||||
if (quadraticflag) {
|
||||
|
@ -321,10 +297,10 @@ void ComputeSNADAtom::compute_peratom()
|
|||
snadj += quadraticoffset;
|
||||
int ncount = 0;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bi = snaptr[tid]->bvec[icoeff];
|
||||
double bix = snaptr[tid]->dbvec[icoeff][0];
|
||||
double biy = snaptr[tid]->dbvec[icoeff][1];
|
||||
double biz = snaptr[tid]->dbvec[icoeff][2];
|
||||
double bi = snaptr->bvec[icoeff];
|
||||
double bix = snaptr->dbvec[icoeff][0];
|
||||
double biy = snaptr->dbvec[icoeff][1];
|
||||
double biz = snaptr->dbvec[icoeff][2];
|
||||
|
||||
// diagonal elements of quadratic matrix
|
||||
|
||||
|
@ -343,12 +319,12 @@ void ComputeSNADAtom::compute_peratom()
|
|||
// upper-triangular elements of quadratic matrix
|
||||
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0]
|
||||
+ bix*snaptr[tid]->bvec[jcoeff];
|
||||
double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1]
|
||||
+ biy*snaptr[tid]->bvec[jcoeff];
|
||||
double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2]
|
||||
+ biz*snaptr[tid]->bvec[jcoeff];
|
||||
double dbxtmp = bi*snaptr->dbvec[jcoeff][0]
|
||||
+ bix*snaptr->bvec[jcoeff];
|
||||
double dbytmp = bi*snaptr->dbvec[jcoeff][1]
|
||||
+ biy*snaptr->bvec[jcoeff];
|
||||
double dbztmp = bi*snaptr->dbvec[jcoeff][2]
|
||||
+ biz*snaptr->bvec[jcoeff];
|
||||
|
||||
snadi[ncount] += dbxtmp;
|
||||
snadi[ncount+yoffset] += dbytmp;
|
||||
|
@ -408,6 +384,5 @@ double ComputeSNADAtom::memory_usage()
|
|||
bytes += 3*njmax*sizeof(double);
|
||||
bytes += njmax*sizeof(int);
|
||||
bytes += 3*nperdim*atom->ntypes;
|
||||
bytes += snaptr[0]->memory_usage()*comm->nthreads;
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -44,10 +44,9 @@ class ComputeSNADAtom : public Compute {
|
|||
double rcutfac;
|
||||
double *radelem;
|
||||
double *wjelem;
|
||||
class SNA** snaptr;
|
||||
class SNA* snaptr;
|
||||
double cutmax;
|
||||
int quadraticflag;
|
||||
int nthreads;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
#include "openmp_snap.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
@ -109,20 +108,10 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) :
|
|||
} else error->all(FLERR,"Illegal compute snav/atom command");
|
||||
}
|
||||
|
||||
nthreads = comm->nthreads;
|
||||
snaptr = new SNA*[nthreads];
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
|
||||
// always unset use_shared_arrays since it does not work with computes
|
||||
snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
|
||||
0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
|
||||
}
|
||||
|
||||
ncoeff = snaptr[0]->ncoeff;
|
||||
ncoeff = snaptr->ncoeff;
|
||||
nperdim = ncoeff;
|
||||
if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2;
|
||||
size_peratom_cols = 6*nperdim*atom->ntypes;
|
||||
|
@ -144,9 +133,7 @@ ComputeSNAVAtom::~ComputeSNAVAtom()
|
|||
memory->destroy(wjelem);
|
||||
memory->destroy(cutsq);
|
||||
|
||||
for (int tid = 0; tid<nthreads; tid++)
|
||||
delete snaptr[tid];
|
||||
delete [] snaptr;
|
||||
delete snaptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -174,13 +161,7 @@ void ComputeSNAVAtom::init()
|
|||
if (strcmp(modify->compute[i]->style,"snav/atom") == 0) count++;
|
||||
if (count > 1 && comm->me == 0)
|
||||
error->warning(FLERR,"More than one compute snav/atom");
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
snaptr[tid]->init();
|
||||
}
|
||||
snaptr->init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
@ -230,11 +211,7 @@ void ComputeSNAVAtom::compute_peratom()
|
|||
double** const x = atom->x;
|
||||
const int* const mask = atom->mask;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for default(none)
|
||||
#endif
|
||||
for (int ii = 0; ii < inum; ii++) {
|
||||
const int tid = omp_get_thread_num();
|
||||
const int i = ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
|
||||
|
@ -251,7 +228,7 @@ void ComputeSNAVAtom::compute_peratom()
|
|||
|
||||
// insure rij, inside, and typej are of size jnum
|
||||
|
||||
snaptr[tid]->grow_rij(jnum);
|
||||
snaptr->grow_rij(jnum);
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
|
@ -269,31 +246,31 @@ void ComputeSNAVAtom::compute_peratom()
|
|||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
int jtype = type[j];
|
||||
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
|
||||
snaptr[tid]->rij[ninside][0] = delx;
|
||||
snaptr[tid]->rij[ninside][1] = dely;
|
||||
snaptr[tid]->rij[ninside][2] = delz;
|
||||
snaptr[tid]->inside[ninside] = j;
|
||||
snaptr[tid]->wj[ninside] = wjelem[jtype];
|
||||
snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
|
||||
snaptr->rij[ninside][0] = delx;
|
||||
snaptr->rij[ninside][1] = dely;
|
||||
snaptr->rij[ninside][2] = delz;
|
||||
snaptr->inside[ninside] = j;
|
||||
snaptr->wj[ninside] = wjelem[jtype];
|
||||
snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
|
||||
ninside++;
|
||||
}
|
||||
}
|
||||
|
||||
snaptr[tid]->compute_ui(ninside);
|
||||
snaptr[tid]->compute_zi();
|
||||
snaptr->compute_ui(ninside);
|
||||
snaptr->compute_zi();
|
||||
if (quadraticflag) {
|
||||
snaptr[tid]->compute_bi();
|
||||
snaptr[tid]->copy_bi2bvec();
|
||||
snaptr->compute_bi();
|
||||
snaptr->copy_bi2bvec();
|
||||
}
|
||||
|
||||
for (int jj = 0; jj < ninside; jj++) {
|
||||
const int j = snaptr[tid]->inside[jj];
|
||||
const int j = snaptr->inside[jj];
|
||||
|
||||
snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
|
||||
snaptr[tid]->wj[jj],
|
||||
snaptr[tid]->rcutij[jj]);
|
||||
snaptr[tid]->compute_dbidrj();
|
||||
snaptr[tid]->copy_dbi2dbvec();
|
||||
snaptr->compute_duidrj(snaptr->rij[jj],
|
||||
snaptr->wj[jj],
|
||||
snaptr->rcutij[jj]);
|
||||
snaptr->compute_dbidrj();
|
||||
snaptr->copy_dbi2dbvec();
|
||||
|
||||
// Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj
|
||||
|
||||
|
@ -301,18 +278,18 @@ void ComputeSNAVAtom::compute_peratom()
|
|||
double *snavj = snav[j]+typeoffset;
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
snavi[icoeff] += snaptr[tid]->dbvec[icoeff][0]*xtmp;
|
||||
snavi[icoeff+nperdim] += snaptr[tid]->dbvec[icoeff][1]*ytmp;
|
||||
snavi[icoeff+2*nperdim] += snaptr[tid]->dbvec[icoeff][2]*ztmp;
|
||||
snavi[icoeff+3*nperdim] += snaptr[tid]->dbvec[icoeff][1]*ztmp;
|
||||
snavi[icoeff+4*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ztmp;
|
||||
snavi[icoeff+5*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ytmp;
|
||||
snavj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][0];
|
||||
snavj[icoeff+nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][1];
|
||||
snavj[icoeff+2*nperdim] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2];
|
||||
snavj[icoeff+3*nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2];
|
||||
snavj[icoeff+4*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2];
|
||||
snavj[icoeff+5*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1];
|
||||
snavi[icoeff] += snaptr->dbvec[icoeff][0]*xtmp;
|
||||
snavi[icoeff+nperdim] += snaptr->dbvec[icoeff][1]*ytmp;
|
||||
snavi[icoeff+2*nperdim] += snaptr->dbvec[icoeff][2]*ztmp;
|
||||
snavi[icoeff+3*nperdim] += snaptr->dbvec[icoeff][1]*ztmp;
|
||||
snavi[icoeff+4*nperdim] += snaptr->dbvec[icoeff][0]*ztmp;
|
||||
snavi[icoeff+5*nperdim] += snaptr->dbvec[icoeff][0]*ytmp;
|
||||
snavj[icoeff] -= snaptr->dbvec[icoeff][0]*x[j][0];
|
||||
snavj[icoeff+nperdim] -= snaptr->dbvec[icoeff][1]*x[j][1];
|
||||
snavj[icoeff+2*nperdim] -= snaptr->dbvec[icoeff][2]*x[j][2];
|
||||
snavj[icoeff+3*nperdim] -= snaptr->dbvec[icoeff][1]*x[j][2];
|
||||
snavj[icoeff+4*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][2];
|
||||
snavj[icoeff+5*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][1];
|
||||
}
|
||||
|
||||
if (quadraticflag) {
|
||||
|
@ -321,10 +298,10 @@ void ComputeSNAVAtom::compute_peratom()
|
|||
snavj += quadraticoffset;
|
||||
int ncount = 0;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bi = snaptr[tid]->bvec[icoeff];
|
||||
double bix = snaptr[tid]->dbvec[icoeff][0];
|
||||
double biy = snaptr[tid]->dbvec[icoeff][1];
|
||||
double biz = snaptr[tid]->dbvec[icoeff][2];
|
||||
double bi = snaptr->bvec[icoeff];
|
||||
double bix = snaptr->dbvec[icoeff][0];
|
||||
double biy = snaptr->dbvec[icoeff][1];
|
||||
double biz = snaptr->dbvec[icoeff][2];
|
||||
|
||||
// diagonal element of quadratic matrix
|
||||
|
||||
|
@ -348,12 +325,12 @@ void ComputeSNAVAtom::compute_peratom()
|
|||
// upper-triangular elements of quadratic matrix
|
||||
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0]
|
||||
+ bix*snaptr[tid]->bvec[jcoeff];
|
||||
double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1]
|
||||
+ biy*snaptr[tid]->bvec[jcoeff];
|
||||
double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2]
|
||||
+ biz*snaptr[tid]->bvec[jcoeff];
|
||||
double dbxtmp = bi*snaptr->dbvec[jcoeff][0]
|
||||
+ bix*snaptr->bvec[jcoeff];
|
||||
double dbytmp = bi*snaptr->dbvec[jcoeff][1]
|
||||
+ biy*snaptr->bvec[jcoeff];
|
||||
double dbztmp = bi*snaptr->dbvec[jcoeff][2]
|
||||
+ biz*snaptr->bvec[jcoeff];
|
||||
snavi[ncount] += dbxtmp*xtmp;
|
||||
snavi[ncount+nperdim] += dbytmp*ytmp;
|
||||
snavi[ncount+2*nperdim] += dbztmp*ztmp;
|
||||
|
@ -419,6 +396,5 @@ double ComputeSNAVAtom::memory_usage()
|
|||
bytes += njmax*sizeof(int);
|
||||
bytes += 6*nperdim*atom->ntypes;
|
||||
if (quadraticflag) bytes += 6*nperdim*atom->ntypes;
|
||||
bytes += snaptr[0]->memory_usage()*comm->nthreads;
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -44,9 +44,8 @@ class ComputeSNAVAtom : public Compute {
|
|||
double rcutfac;
|
||||
double *radelem;
|
||||
double *wjelem;
|
||||
class SNA** snaptr;
|
||||
class SNA* snaptr;
|
||||
int quadraticflag;
|
||||
int nthreads;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
|
||||
#ifndef LMP_OPENMP_SNAP_H
|
||||
#define LMP_OPENMP_SNAP_H
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#else
|
||||
enum omp_sched_t {omp_sched_static, omp_sched_dynamic, omp_sched_guided, omp_sched_auto};
|
||||
inline int omp_get_thread_num() { return 0;}
|
||||
inline int omp_set_num_threads(int num_threads) {return 1;}
|
||||
/* inline int __sync_fetch_and_add(int* ptr, int value) {int tmp = *ptr; ptr[0]+=value; return tmp;} */
|
||||
inline void omp_set_schedule(omp_sched_t schedule,int modifier=1) {}
|
||||
inline int omp_in_parallel() {return 0;}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -23,7 +23,6 @@
|
|||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "sna.h"
|
||||
#include "openmp_snap.h"
|
||||
#include "domain.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
@ -55,51 +54,6 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp)
|
|||
coeffelem = NULL;
|
||||
|
||||
nmax = 0;
|
||||
nthreads = 1;
|
||||
|
||||
schedule_user = 0;
|
||||
schedule_time_guided = -1;
|
||||
schedule_time_dynamic = -1;
|
||||
ncalls_neigh =-1;
|
||||
|
||||
ilistmask_max = 0;
|
||||
ilistmask = NULL;
|
||||
ghostinum = 0;
|
||||
ghostilist_max = 0;
|
||||
ghostilist = NULL;
|
||||
ghostnumneigh_max = 0;
|
||||
ghostnumneigh = NULL;
|
||||
ghostneighs = NULL;
|
||||
ghostfirstneigh = NULL;
|
||||
ghostneighs_total = 0;
|
||||
ghostneighs_max = 0;
|
||||
|
||||
i_max = 0;
|
||||
i_neighmax = 0;
|
||||
i_numpairs = 0;
|
||||
i_rij = NULL;
|
||||
i_inside = NULL;
|
||||
i_wj = NULL;
|
||||
i_rcutij = NULL;
|
||||
i_ninside = NULL;
|
||||
i_pairs = NULL;
|
||||
i_uarraytot_r = NULL;
|
||||
i_uarraytot_i = NULL;
|
||||
i_zarray_r = NULL;
|
||||
i_zarray_i = NULL;
|
||||
|
||||
use_shared_arrays = 0;
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
timers[0] = 0;
|
||||
timers[1] = 0;
|
||||
timers[2] = 0;
|
||||
timers[3] = 0;
|
||||
#endif
|
||||
|
||||
// Need to set this because restart not handled by PairHybrid
|
||||
|
||||
sna = NULL;
|
||||
|
||||
beta_max = 0;
|
||||
beta = NULL;
|
||||
|
@ -123,35 +77,7 @@ PairSNAP::~PairSNAP()
|
|||
memory->destroy(beta);
|
||||
memory->destroy(bispectrum);
|
||||
|
||||
// Need to set this because restart not handled by PairHybrid
|
||||
|
||||
if (sna) {
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
double time[5];
|
||||
double timeave[5];
|
||||
double timeave_mpi[5];
|
||||
double timemax_mpi[5];
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
time[i] = 0;
|
||||
timeave[i] = 0;
|
||||
for (int tid = 0; tid<nthreads; tid++) {
|
||||
if (sna[tid]->timers[i]>time[i])
|
||||
time[i] = sna[tid]->timers[i];
|
||||
timeave[i] += sna[tid]->timers[i];
|
||||
}
|
||||
timeave[i] /= nthreads;
|
||||
}
|
||||
MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world);
|
||||
MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world);
|
||||
#endif
|
||||
|
||||
for (int tid = 0; tid<nthreads; tid++)
|
||||
delete sna[tid];
|
||||
delete [] sna;
|
||||
|
||||
}
|
||||
delete snaptr;
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
|
@ -161,22 +87,11 @@ PairSNAP::~PairSNAP()
|
|||
|
||||
}
|
||||
|
||||
void PairSNAP::compute(int eflag, int vflag)
|
||||
{
|
||||
// if (use_optimized)
|
||||
// compute_optimized(eflag, vflag);
|
||||
// else
|
||||
|
||||
// hard-code compute_regular()
|
||||
|
||||
compute_regular(eflag, vflag);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
This version is a straightforward implementation
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
void PairSNAP::compute_regular(int eflag, int vflag)
|
||||
void PairSNAP::compute(int eflag, int vflag)
|
||||
{
|
||||
int i,j,jnum,ninside;
|
||||
double delx,dely,delz,evdwl,rsq;
|
||||
|
@ -191,7 +106,6 @@ void PairSNAP::compute_regular(int eflag, int vflag)
|
|||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_pair = force->newton_pair;
|
||||
class SNA* snaptr = sna[0];
|
||||
|
||||
if (beta_max < list->inum) {
|
||||
memory->grow(beta,list->inum,ncoeff,"PairSNAP:beta");
|
||||
|
@ -315,573 +229,6 @@ void PairSNAP::compute_regular(int eflag, int vflag)
|
|||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
inline int PairSNAP::equal(double* x,double* y)
|
||||
{
|
||||
double dist2 =
|
||||
(x[0]-y[0])*(x[0]-y[0]) +
|
||||
(x[1]-y[1])*(x[1]-y[1]) +
|
||||
(x[2]-y[2])*(x[2]-y[2]);
|
||||
if (dist2 < 1e-20) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline double PairSNAP::dist2(double* x,double* y)
|
||||
{
|
||||
return
|
||||
(x[0]-y[0])*(x[0]-y[0]) +
|
||||
(x[1]-y[1])*(x[1]-y[1]) +
|
||||
(x[2]-y[2])*(x[2]-y[2]);
|
||||
}
|
||||
|
||||
// return extra communication cutoff
|
||||
// extra_cutoff = max(subdomain_length)
|
||||
|
||||
double PairSNAP::extra_cutoff()
|
||||
{
|
||||
double sublo[3],subhi[3];
|
||||
|
||||
if (domain->triclinic == 0) {
|
||||
for (int dim = 0 ; dim < 3 ; dim++) {
|
||||
sublo[dim] = domain->sublo[dim];
|
||||
subhi[dim] = domain->subhi[dim];
|
||||
}
|
||||
} else {
|
||||
domain->lamda2x(domain->sublo_lamda,sublo);
|
||||
domain->lamda2x(domain->subhi_lamda,subhi);
|
||||
}
|
||||
|
||||
double sub_size[3];
|
||||
for (int dim = 0; dim < 3; dim++)
|
||||
sub_size[dim] = subhi[dim] - sublo[dim];
|
||||
|
||||
double max_sub_size = 0;
|
||||
for (int dim = 0; dim < 3; dim++)
|
||||
max_sub_size = MAX(max_sub_size,sub_size[dim]);
|
||||
|
||||
// note: for triclinic, probably need something different
|
||||
// see Comm::setup()
|
||||
|
||||
return max_sub_size;
|
||||
}
|
||||
|
||||
// micro load_balancer: each MPI process will
|
||||
// check with each of its 26 neighbors,
|
||||
// whether an imbalance exists in the number
|
||||
// of atoms to calculate forces for.
|
||||
// If it does it will set ilistmask of one of
|
||||
// its local atoms to zero, and send its Tag
|
||||
// to the neighbor process. The neighboring process
|
||||
// will check its ghost list for the
|
||||
// ghost atom with the same Tag which is closest
|
||||
// to its domain center, and build a
|
||||
// neighborlist for this ghost atom. For this to work,
|
||||
// the communication cutoff has to be
|
||||
// as large as the neighbor cutoff +
|
||||
// maximum subdomain length.
|
||||
|
||||
// Note that at most one atom is exchanged per processor pair.
|
||||
|
||||
// Also note that the local atom assignment
|
||||
// doesn't change. This load balancer will cause
|
||||
// some ghost atoms to have full neighborlists
|
||||
// which are unique to PairSNAP.
|
||||
// They are not part of the generally accessible neighborlist.
|
||||
// At the same time corresponding local atoms on
|
||||
// other MPI processes will not be
|
||||
// included in the force computation since
|
||||
// their ilistmask is 0. This does not effect
|
||||
// any other classes which might
|
||||
// access the same general neighborlist.
|
||||
// Reverse communication (newton on) of forces is required.
|
||||
|
||||
// Currently the load balancer does two passes,
|
||||
// since its exchanging atoms upstream and downstream.
|
||||
|
||||
void PairSNAP::load_balance()
|
||||
{
|
||||
double sublo[3],subhi[3];
|
||||
if (domain->triclinic == 0) {
|
||||
double* sublotmp = domain->sublo;
|
||||
double* subhitmp = domain->subhi;
|
||||
for (int dim = 0 ; dim<3 ; dim++) {
|
||||
sublo[dim]=sublotmp[dim];
|
||||
subhi[dim]=subhitmp[dim];
|
||||
}
|
||||
} else {
|
||||
double* sublotmp = domain->sublo_lamda;
|
||||
double* subhitmp = domain->subhi_lamda;
|
||||
domain->lamda2x(sublotmp,sublo);
|
||||
domain->lamda2x(subhitmp,subhi);
|
||||
}
|
||||
|
||||
//if (list->inum==0) list->grow(atom->nmax);
|
||||
|
||||
int nlocal = ghostinum;
|
||||
for (int i=0; i < list->inum; i++)
|
||||
if (ilistmask[i]) nlocal++;
|
||||
int ***grid2proc = comm->grid2proc;
|
||||
int* procgrid = comm->procgrid;
|
||||
|
||||
int nlocal_up,nlocal_down;
|
||||
MPI_Request request;
|
||||
|
||||
double sub_mid[3];
|
||||
for (int dim=0; dim<3; dim++)
|
||||
sub_mid[dim] = (subhi[dim] + sublo[dim])/2;
|
||||
|
||||
if (comm->cutghostuser <
|
||||
neighbor->cutneighmax+extra_cutoff())
|
||||
error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing");
|
||||
|
||||
int nrecv = ghostinum;
|
||||
int totalsend = 0;
|
||||
int nsend = 0;
|
||||
int depth = 1;
|
||||
|
||||
for (int dx = -depth; dx < depth+1; dx++)
|
||||
for (int dy = -depth; dy < depth+1; dy++)
|
||||
for (int dz = -depth; dz < depth+1; dz++) {
|
||||
|
||||
if (dx == dy && dy == dz && dz == 0) continue;
|
||||
|
||||
int sendloc[3] = {comm->myloc[0],
|
||||
comm->myloc[1], comm->myloc[2]
|
||||
};
|
||||
sendloc[0] += dx;
|
||||
sendloc[1] += dy;
|
||||
sendloc[2] += dz;
|
||||
for (int dim = 0; dim < 3; dim++)
|
||||
if (sendloc[dim] >= procgrid[dim])
|
||||
sendloc[dim] = sendloc[dim] - procgrid[dim];
|
||||
for (int dim = 0; dim < 3; dim++)
|
||||
if (sendloc[dim] < 0)
|
||||
sendloc[dim] = procgrid[dim] + sendloc[dim];
|
||||
int recvloc[3] = {comm->myloc[0],
|
||||
comm->myloc[1], comm->myloc[2]
|
||||
};
|
||||
recvloc[0] -= dx;
|
||||
recvloc[1] -= dy;
|
||||
recvloc[2] -= dz;
|
||||
for (int dim = 0; dim < 3; dim++)
|
||||
if (recvloc[dim] < 0)
|
||||
recvloc[dim] = procgrid[dim] + recvloc[dim];
|
||||
for (int dim = 0; dim < 3; dim++)
|
||||
if (recvloc[dim] >= procgrid[dim])
|
||||
recvloc[dim] = recvloc[dim] - procgrid[dim];
|
||||
|
||||
int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]];
|
||||
int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]];
|
||||
|
||||
// two stage process, first upstream movement, then downstream
|
||||
|
||||
MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
|
||||
&nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
|
||||
MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
|
||||
&nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
|
||||
nsend = 0;
|
||||
|
||||
// send upstream
|
||||
|
||||
if (nlocal > nlocal_up+1) {
|
||||
|
||||
int i = totalsend++;
|
||||
while(i < list->inum && ilistmask[i] == 0)
|
||||
i = totalsend++;
|
||||
|
||||
if (i < list->inum)
|
||||
MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request);
|
||||
else {
|
||||
int j = -1;
|
||||
MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request);
|
||||
}
|
||||
|
||||
if (i < list->inum) {
|
||||
for (int j = 0; j < list->inum; j++)
|
||||
if (list->ilist[j] == i)
|
||||
ilistmask[j] = 0;
|
||||
nsend = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// recv downstream
|
||||
|
||||
if (nlocal < nlocal_down-1) {
|
||||
nlocal++;
|
||||
int get_tag = -1;
|
||||
MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
|
||||
|
||||
// if get_tag -1 the other process didnt have local atoms to send
|
||||
|
||||
if (get_tag >= 0) {
|
||||
if (ghostinum >= ghostilist_max) {
|
||||
memory->grow(ghostilist,ghostinum+10,
|
||||
"PairSnap::ghostilist");
|
||||
ghostilist_max = ghostinum+10;
|
||||
}
|
||||
if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
|
||||
ghostnumneigh_max = atom->nlocal+atom->nghost+100;
|
||||
memory->grow(ghostnumneigh,ghostnumneigh_max,
|
||||
"PairSnap::ghostnumneigh");
|
||||
memory->grow(ghostfirstneigh,ghostnumneigh_max,
|
||||
"PairSnap::ghostfirstneigh");
|
||||
}
|
||||
|
||||
// find closest ghost image of the transfered particle
|
||||
|
||||
double mindist = 1e200;
|
||||
int closestghost = -1;
|
||||
for (int j = 0; j < atom->nlocal + atom->nghost; j++)
|
||||
if (atom->tag[j] == get_tag)
|
||||
if (dist2(sub_mid, atom->x[j]) < mindist) {
|
||||
closestghost = j;
|
||||
mindist = dist2(sub_mid, atom->x[j]);
|
||||
}
|
||||
|
||||
// build neighborlist for this particular
|
||||
// ghost atom, and add it to list->ilist
|
||||
|
||||
if (ghostneighs_max - ghostneighs_total <
|
||||
neighbor->oneatom) {
|
||||
memory->grow(ghostneighs,
|
||||
ghostneighs_total + neighbor->oneatom,
|
||||
"PairSnap::ghostneighs");
|
||||
ghostneighs_max = ghostneighs_total + neighbor->oneatom;
|
||||
}
|
||||
|
||||
int j = closestghost;
|
||||
|
||||
ghostilist[ghostinum] = j;
|
||||
ghostnumneigh[j] = 0;
|
||||
ghostfirstneigh[j] = ghostneighs_total;
|
||||
|
||||
ghostinum++;
|
||||
int* jlist = ghostneighs + ghostfirstneigh[j];
|
||||
|
||||
// find all neighbors by looping
|
||||
// over all local and ghost atoms
|
||||
|
||||
for (int k = 0; k < atom->nlocal + atom->nghost; k++)
|
||||
if (dist2(atom->x[j],atom->x[k]) <
|
||||
neighbor->cutneighmax*neighbor->cutneighmax) {
|
||||
jlist[ghostnumneigh[j]] = k;
|
||||
ghostnumneigh[j]++;
|
||||
ghostneighs_total++;
|
||||
}
|
||||
}
|
||||
|
||||
if (get_tag >= 0) nrecv++;
|
||||
}
|
||||
|
||||
// decrease nlocal later, so that it is the
|
||||
// initial number both for receiving and sending
|
||||
|
||||
if (nsend) nlocal--;
|
||||
|
||||
// second pass through the grid
|
||||
|
||||
MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
|
||||
&nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
|
||||
MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
|
||||
&nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
|
||||
|
||||
// send downstream
|
||||
|
||||
nsend=0;
|
||||
if (nlocal > nlocal_down+1) {
|
||||
int i = totalsend++;
|
||||
while(i < list->inum && ilistmask[i]==0) i = totalsend++;
|
||||
|
||||
if (i < list->inum)
|
||||
MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request);
|
||||
else {
|
||||
int j =- 1;
|
||||
MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request);
|
||||
}
|
||||
|
||||
if (i < list->inum) {
|
||||
for (int j=0; j<list->inum; j++)
|
||||
if (list->ilist[j] == i) ilistmask[j] = 0;
|
||||
nsend = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// receive upstream
|
||||
|
||||
if (nlocal < nlocal_up-1) {
|
||||
nlocal++;
|
||||
int get_tag = -1;
|
||||
|
||||
MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
|
||||
|
||||
if (get_tag >= 0) {
|
||||
if (ghostinum >= ghostilist_max) {
|
||||
memory->grow(ghostilist,ghostinum+10,
|
||||
"PairSnap::ghostilist");
|
||||
ghostilist_max = ghostinum+10;
|
||||
}
|
||||
if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
|
||||
ghostnumneigh_max = atom->nlocal + atom->nghost + 100;
|
||||
memory->grow(ghostnumneigh,ghostnumneigh_max,
|
||||
"PairSnap::ghostnumneigh");
|
||||
memory->grow(ghostfirstneigh,ghostnumneigh_max,
|
||||
"PairSnap::ghostfirstneigh");
|
||||
}
|
||||
|
||||
// find closest ghost image of the transfered particle
|
||||
|
||||
double mindist = 1e200;
|
||||
int closestghost = -1;
|
||||
for (int j = 0; j < atom->nlocal + atom->nghost; j++)
|
||||
if (atom->tag[j] == get_tag)
|
||||
if (dist2(sub_mid,atom->x[j])<mindist) {
|
||||
closestghost = j;
|
||||
mindist = dist2(sub_mid,atom->x[j]);
|
||||
}
|
||||
|
||||
// build neighborlist for this particular ghost atom
|
||||
|
||||
if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) {
|
||||
memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom,
|
||||
"PairSnap::ghostneighs");
|
||||
ghostneighs_max = ghostneighs_total + neighbor->oneatom;
|
||||
}
|
||||
|
||||
int j = closestghost;
|
||||
|
||||
ghostilist[ghostinum] = j;
|
||||
ghostnumneigh[j] = 0;
|
||||
ghostfirstneigh[j] = ghostneighs_total;
|
||||
|
||||
ghostinum++;
|
||||
int* jlist = ghostneighs + ghostfirstneigh[j];
|
||||
|
||||
for (int k = 0; k < atom->nlocal + atom->nghost; k++)
|
||||
if (dist2(atom->x[j],atom->x[k]) <
|
||||
neighbor->cutneighmax*neighbor->cutneighmax) {
|
||||
jlist[ghostnumneigh[j]] = k;
|
||||
ghostnumneigh[j]++;
|
||||
ghostneighs_total++;
|
||||
}
|
||||
}
|
||||
|
||||
if (get_tag >= 0) nrecv++;
|
||||
}
|
||||
if (nsend) nlocal--;
|
||||
}
|
||||
}
|
||||
|
||||
void PairSNAP::set_sna_to_shared(int snaid,int i)
|
||||
{
|
||||
sna[snaid]->rij = i_rij[i];
|
||||
sna[snaid]->inside = i_inside[i];
|
||||
sna[snaid]->wj = i_wj[i];
|
||||
sna[snaid]->rcutij = i_rcutij[i];
|
||||
sna[snaid]->zarray_r = i_zarray_r[i];
|
||||
sna[snaid]->zarray_i = i_zarray_i[i];
|
||||
sna[snaid]->uarraytot_r = i_uarraytot_r[i];
|
||||
sna[snaid]->uarraytot_i = i_uarraytot_i[i];
|
||||
}
|
||||
|
||||
void PairSNAP::build_per_atom_arrays()
|
||||
{
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||
#endif
|
||||
|
||||
int count = 0;
|
||||
int neighmax = 0;
|
||||
for (int ii = 0; ii < list->inum; ii++)
|
||||
if ((do_load_balance <= 0) || ilistmask[ii]) {
|
||||
neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]);
|
||||
++count;
|
||||
}
|
||||
for (int ii = 0; ii < ghostinum; ii++) {
|
||||
neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]);
|
||||
++count;
|
||||
}
|
||||
|
||||
if (i_max < count || i_neighmax < neighmax) {
|
||||
int i_maxt = MAX(count,i_max);
|
||||
i_neighmax = MAX(neighmax,i_neighmax);
|
||||
memory->destroy(i_rij);
|
||||
memory->destroy(i_inside);
|
||||
memory->destroy(i_wj);
|
||||
memory->destroy(i_rcutij);
|
||||
memory->destroy(i_ninside);
|
||||
memory->destroy(i_pairs);
|
||||
memory->create(i_rij,i_maxt,i_neighmax,3,"PairSNAP::i_rij");
|
||||
memory->create(i_inside,i_maxt,i_neighmax,"PairSNAP::i_inside");
|
||||
memory->create(i_wj,i_maxt,i_neighmax,"PairSNAP::i_wj");
|
||||
memory->create(i_rcutij,i_maxt,i_neighmax,"PairSNAP::i_rcutij");
|
||||
memory->create(i_ninside,i_maxt,"PairSNAP::i_ninside");
|
||||
memory->create(i_pairs,i_maxt*i_neighmax,4,"PairSNAP::i_pairs");
|
||||
}
|
||||
|
||||
if (i_max < count) {
|
||||
int jdim = sna[0]->twojmax+1;
|
||||
memory->destroy(i_uarraytot_r);
|
||||
memory->destroy(i_uarraytot_i);
|
||||
memory->create(i_uarraytot_r,count,jdim,jdim,jdim,
|
||||
"PairSNAP::i_uarraytot_r");
|
||||
memory->create(i_uarraytot_i,count,jdim,jdim,jdim,
|
||||
"PairSNAP::i_uarraytot_i");
|
||||
if (i_zarray_r != NULL)
|
||||
for (int i = 0; i < i_max; i++) {
|
||||
memory->destroy(i_zarray_r[i]);
|
||||
memory->destroy(i_zarray_i[i]);
|
||||
}
|
||||
|
||||
delete [] i_zarray_r;
|
||||
delete [] i_zarray_i;
|
||||
i_zarray_r = new double*****[count];
|
||||
i_zarray_i = new double*****[count];
|
||||
for (int i = 0; i < count; i++) {
|
||||
memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim,
|
||||
"PairSNAP::i_zarray_r");
|
||||
memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim,
|
||||
"PairSNAP::i_zarray_i");
|
||||
}
|
||||
}
|
||||
|
||||
if (i_max < count)
|
||||
i_max = count;
|
||||
|
||||
count = 0;
|
||||
i_numpairs = 0;
|
||||
for (int ii = 0; ii < list->inum; ii++) {
|
||||
if ((do_load_balance <= 0) || ilistmask[ii]) {
|
||||
int i = list->ilist[ii];
|
||||
int jnum = list->numneigh[i];
|
||||
int* jlist = list->firstneigh[i];
|
||||
const double xtmp = atom->x[i][0];
|
||||
const double ytmp = atom->x[i][1];
|
||||
const double ztmp = atom->x[i][2];
|
||||
const int itype = atom->type[i];
|
||||
const int ielem = map[itype];
|
||||
const double radi = radelem[ielem];
|
||||
int ninside = 0;
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = jlist[jj];
|
||||
j &= NEIGHMASK;
|
||||
const double delx = atom->x[j][0] - xtmp;
|
||||
const double dely = atom->x[j][1] - ytmp;
|
||||
const double delz = atom->x[j][2] - ztmp;
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
int jtype = atom->type[j];
|
||||
int jelem = map[jtype];
|
||||
|
||||
i_pairs[i_numpairs][0] = i;
|
||||
i_pairs[i_numpairs][1] = jj;
|
||||
i_pairs[i_numpairs][2] = -1;
|
||||
i_pairs[i_numpairs][3] = count;
|
||||
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
|
||||
i_rij[count][ninside][0] = delx;
|
||||
i_rij[count][ninside][1] = dely;
|
||||
i_rij[count][ninside][2] = delz;
|
||||
i_inside[count][ninside] = j;
|
||||
i_wj[count][ninside] = wjelem[jelem];
|
||||
i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
|
||||
|
||||
// update index list with inside index
|
||||
i_pairs[i_numpairs][2] = ninside++;
|
||||
}
|
||||
i_numpairs++;
|
||||
}
|
||||
i_ninside[count] = ninside;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
for (int ii = 0; ii < ghostinum; ii++) {
|
||||
int i = ghostilist[ii];
|
||||
int jnum = ghostnumneigh[i];
|
||||
int* jlist = ghostneighs+ghostfirstneigh[i];
|
||||
const double xtmp = atom->x[i][0];
|
||||
const double ytmp = atom->x[i][1];
|
||||
const double ztmp = atom->x[i][2];
|
||||
const int itype = atom->type[i];
|
||||
const int ielem = map[itype];
|
||||
const double radi = radelem[ielem];
|
||||
int ninside = 0;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = jlist[jj];
|
||||
j &= NEIGHMASK;
|
||||
const double delx = atom->x[j][0] - xtmp;
|
||||
const double dely = atom->x[j][1] - ytmp;
|
||||
const double delz = atom->x[j][2] - ztmp;
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
int jtype = atom->type[j];
|
||||
int jelem = map[jtype];
|
||||
|
||||
i_pairs[i_numpairs][0] = i;
|
||||
i_pairs[i_numpairs][1] = jj;
|
||||
i_pairs[i_numpairs][2] = -1;
|
||||
i_pairs[i_numpairs][3] = count;
|
||||
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
|
||||
i_rij[count][ninside][0] = delx;
|
||||
i_rij[count][ninside][1] = dely;
|
||||
i_rij[count][ninside][2] = delz;
|
||||
i_inside[count][ninside] = j;
|
||||
i_wj[count][ninside] = wjelem[jelem];
|
||||
i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
|
||||
// update index list with inside index
|
||||
i_pairs[i_numpairs][2] = ninside++;
|
||||
}
|
||||
i_numpairs++;
|
||||
}
|
||||
i_ninside[count] = ninside;
|
||||
count++;
|
||||
}
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||
timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
|
||||
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
|
||||
#endif
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||
#endif
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for shared(count) default(none)
|
||||
#endif
|
||||
for (int ii=0; ii < count; ii++) {
|
||||
int tid = omp_get_thread_num();
|
||||
set_sna_to_shared(tid,ii);
|
||||
//sna[tid]->compute_ui(i_ninside[ii]);
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||
#endif
|
||||
sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1));
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||
sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
|
||||
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&starttime);
|
||||
#endif
|
||||
for (int ii=0; ii < count; ii++) {
|
||||
int tid = 0;//omp_get_thread_num();
|
||||
set_sna_to_shared(tid,ii);
|
||||
sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1));
|
||||
}
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||
sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
|
||||
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
|
||||
#endif
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME,&endtime);
|
||||
timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
|
||||
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute beta
|
||||
------------------------------------------------------------------------- */
|
||||
|
@ -914,7 +261,6 @@ void PairSNAP::compute_bispectrum()
|
|||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
class SNA* snaptr = sna[0];
|
||||
|
||||
for (int ii = 0; ii < list->inum; ii++) {
|
||||
i = list->ilist[ii];
|
||||
|
@ -991,96 +337,8 @@ void PairSNAP::allocate()
|
|||
|
||||
void PairSNAP::settings(int narg, char **arg)
|
||||
{
|
||||
|
||||
// set default values for optional arguments
|
||||
|
||||
nthreads = -1;
|
||||
use_shared_arrays=-1;
|
||||
do_load_balance = 0;
|
||||
use_optimized = 1;
|
||||
|
||||
// optional arguments
|
||||
|
||||
for (int i=0; i < narg; i++) {
|
||||
if (i+2>narg) error->all(FLERR,"Illegal pair_style command");
|
||||
if (strcmp(arg[i],"nthreads")==0) {
|
||||
nthreads=force->inumeric(FLERR,arg[++i]);
|
||||
#if defined(LMP_USER_OMP)
|
||||
error->all(FLERR,"Must set number of threads via package omp command");
|
||||
#else
|
||||
omp_set_num_threads(nthreads);
|
||||
comm->nthreads=nthreads;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
if (strcmp(arg[i],"optimized")==0) {
|
||||
use_optimized=force->inumeric(FLERR,arg[++i]);
|
||||
continue;
|
||||
}
|
||||
if (strcmp(arg[i],"shared")==0) {
|
||||
use_shared_arrays=force->inumeric(FLERR,arg[++i]);
|
||||
continue;
|
||||
}
|
||||
if (strcmp(arg[i],"loadbalance")==0) {
|
||||
do_load_balance = force->inumeric(FLERR,arg[++i]);
|
||||
if (do_load_balance) {
|
||||
double mincutoff = extra_cutoff() +
|
||||
rcutmax + neighbor->skin;
|
||||
if (comm->cutghostuser < mincutoff) {
|
||||
char buffer[255];
|
||||
|
||||
//apparently mincutoff is 0 after sprintf command ?????
|
||||
|
||||
double tmp = mincutoff + 0.1;
|
||||
sprintf(buffer, "Communication cutoff is too small "
|
||||
"for SNAP micro load balancing, increased to %lf",
|
||||
mincutoff+0.1);
|
||||
if (comm->me==0)
|
||||
error->warning(FLERR,buffer);
|
||||
|
||||
comm->cutghostuser = tmp;
|
||||
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (strcmp(arg[i],"schedule")==0) {
|
||||
i++;
|
||||
if (strcmp(arg[i],"static")==0)
|
||||
schedule_user = 1;
|
||||
if (strcmp(arg[i],"dynamic")==0)
|
||||
schedule_user = 2;
|
||||
if (strcmp(arg[i],"guided")==0)
|
||||
schedule_user = 3;
|
||||
if (strcmp(arg[i],"auto")==0)
|
||||
schedule_user = 4;
|
||||
if (strcmp(arg[i],"determine")==0)
|
||||
schedule_user = 5;
|
||||
if (schedule_user == 0)
|
||||
error->all(FLERR,"Illegal pair_style command");
|
||||
continue;
|
||||
}
|
||||
for (int i=0; i < narg; i++)
|
||||
error->all(FLERR,"Illegal pair_style command");
|
||||
}
|
||||
|
||||
if (nthreads < 0)
|
||||
nthreads = comm->nthreads;
|
||||
|
||||
if (use_shared_arrays < 0) {
|
||||
if (nthreads > 1 && atom->nlocal <= 2*nthreads)
|
||||
use_shared_arrays = 1;
|
||||
else use_shared_arrays = 0;
|
||||
}
|
||||
|
||||
// check if running non-optimized code with
|
||||
// optimization flags set
|
||||
|
||||
if (!use_optimized)
|
||||
if (nthreads > 1 ||
|
||||
use_shared_arrays ||
|
||||
do_load_balance ||
|
||||
schedule_user)
|
||||
error->all(FLERR,"Illegal pair_style command");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -1170,26 +428,14 @@ void PairSNAP::coeff(int narg, char **arg)
|
|||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
|
||||
sna = new SNA*[nthreads];
|
||||
snaptr = new SNA(lmp,rfac0,twojmax,
|
||||
diagonalstyle,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
snaptr->grow_rij(nmax);
|
||||
|
||||
// allocate memory for per OpenMP thread data which
|
||||
// is wrapped into the sna class
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
sna[tid] = new SNA(lmp,rfac0,twojmax,
|
||||
diagonalstyle,use_shared_arrays,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
if (!use_shared_arrays)
|
||||
sna[tid]->grow_rij(nmax);
|
||||
}
|
||||
|
||||
if (ncoeff != sna[0]->ncoeff) {
|
||||
if (ncoeff != snaptr->ncoeff) {
|
||||
if (comm->me == 0)
|
||||
printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff);
|
||||
printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff);
|
||||
error->all(FLERR,"Incorrect SNAP parameter file");
|
||||
}
|
||||
|
||||
|
@ -1216,13 +462,7 @@ void PairSNAP::init_style()
|
|||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
sna[tid]->init();
|
||||
}
|
||||
snaptr->init();
|
||||
|
||||
}
|
||||
|
||||
|
@ -1370,6 +610,8 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
|
|||
}
|
||||
}
|
||||
|
||||
if (comm->me == 0) fclose(fpcoeff);
|
||||
|
||||
// set flags for required keywords
|
||||
|
||||
rcutfacflag = 0;
|
||||
|
@ -1471,7 +713,6 @@ double PairSNAP::memory_usage()
|
|||
bytes += nmax*sizeof(int);
|
||||
bytes += (2*ncoeffall)*sizeof(double);
|
||||
bytes += (ncoeff*3)*sizeof(double);
|
||||
bytes += sna[0]->memory_usage()*nthreads;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,8 +29,6 @@ public:
|
|||
PairSNAP(class LAMMPS *);
|
||||
~PairSNAP();
|
||||
virtual void compute(int, int);
|
||||
void compute_regular(int, int);
|
||||
void compute_optimized(int, int);
|
||||
void settings(int, char **);
|
||||
virtual void coeff(int, char **);
|
||||
virtual void init_style();
|
||||
|
@ -43,59 +41,16 @@ public:
|
|||
protected:
|
||||
int ncoeffq, ncoeffall;
|
||||
double **bvec, ***dbvec;
|
||||
class SNA** sna;
|
||||
class SNA* snaptr;
|
||||
int nmax;
|
||||
int nthreads;
|
||||
virtual void allocate();
|
||||
void read_files(char *, char *);
|
||||
inline int equal(double* x,double* y);
|
||||
inline double dist2(double* x,double* y);
|
||||
double extra_cutoff();
|
||||
void load_balance();
|
||||
void set_sna_to_shared(int snaid,int i);
|
||||
void build_per_atom_arrays();
|
||||
|
||||
void compute_beta();
|
||||
void compute_bispectrum();
|
||||
|
||||
int schedule_user;
|
||||
double schedule_time_guided;
|
||||
double schedule_time_dynamic;
|
||||
|
||||
int ncalls_neigh;
|
||||
int do_load_balance;
|
||||
int ilistmask_max;
|
||||
int* ilistmask;
|
||||
int ghostinum;
|
||||
int ghostilist_max;
|
||||
int* ghostilist;
|
||||
int ghostnumneigh_max;
|
||||
int* ghostnumneigh;
|
||||
int* ghostneighs;
|
||||
int* ghostfirstneigh;
|
||||
int ghostneighs_total;
|
||||
int ghostneighs_max;
|
||||
|
||||
int use_optimized;
|
||||
int use_shared_arrays;
|
||||
|
||||
int i_max;
|
||||
int i_neighmax;
|
||||
int i_numpairs;
|
||||
int **i_pairs;
|
||||
double ***i_rij;
|
||||
int **i_inside;
|
||||
double **i_wj;
|
||||
double **i_rcutij;
|
||||
int *i_ninside;
|
||||
double ****i_uarraytot_r, ****i_uarraytot_i;
|
||||
double ******i_zarray_r, ******i_zarray_i;
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
// timespec starttime, endtime;
|
||||
double timers[4];
|
||||
#endif
|
||||
|
||||
double rcutmax; // max cutoff for all elements
|
||||
int nelements; // # of unique elements
|
||||
char **elements; // names of unique elements
|
||||
|
@ -130,15 +85,6 @@ Self-explanatory. Check the input script syntax and compare to the
|
|||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Must set number of threads via package omp command
|
||||
|
||||
Because you are using the USER-OMP package, set the number of threads
|
||||
via its settings, not by the pair_style snap nthreads setting.
|
||||
|
||||
W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
|
748
src/SNAP/sna.cpp
748
src/SNAP/sna.cpp
|
@ -21,7 +21,6 @@
|
|||
#include "math_extra.h"
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "openmp_snap.h"
|
||||
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
@ -114,12 +113,11 @@ using namespace MathConst;
|
|||
------------------------------------------------------------------------- */
|
||||
|
||||
SNA::SNA(LAMMPS* lmp, double rfac0_in,
|
||||
int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in,
|
||||
int twojmax_in, int diagonalstyle_in,
|
||||
double rmin0_in, int switch_flag_in, int bzero_flag_in) : Pointers(lmp)
|
||||
{
|
||||
wself = 1.0;
|
||||
|
||||
use_shared_arrays = use_shared_arrays_in;
|
||||
rfac0 = rfac0_in;
|
||||
rmin0 = rmin0_in;
|
||||
switch_flag = switch_flag_in;
|
||||
|
@ -141,7 +139,8 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in,
|
|||
wj = NULL;
|
||||
rcutij = NULL;
|
||||
nmax = 0;
|
||||
idxj = NULL;
|
||||
idxz = NULL;
|
||||
idxb= NULL;
|
||||
|
||||
if (bzero_flag) {
|
||||
double www = wself*wself*wself;
|
||||
|
@ -149,133 +148,178 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in,
|
|||
bzero[j] = www*(j+1);
|
||||
}
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
timers = new double[20];
|
||||
for(int i = 0; i < 20; i++) timers[i] = 0;
|
||||
print = 0;
|
||||
counter = 0;
|
||||
#endif
|
||||
|
||||
build_indexlist();
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
SNA::~SNA()
|
||||
{
|
||||
if(!use_shared_arrays) {
|
||||
destroy_twojmax_arrays();
|
||||
memory->destroy(rij);
|
||||
memory->destroy(inside);
|
||||
memory->destroy(wj);
|
||||
memory->destroy(rcutij);
|
||||
memory->destroy(bvec);
|
||||
memory->destroy(dbvec);
|
||||
}
|
||||
delete[] idxj;
|
||||
destroy_twojmax_arrays();
|
||||
memory->destroy(rij);
|
||||
memory->destroy(inside);
|
||||
memory->destroy(wj);
|
||||
memory->destroy(rcutij);
|
||||
memory->destroy(bvec);
|
||||
memory->destroy(dbvec);
|
||||
delete[] idxz;
|
||||
delete[] idxb;
|
||||
}
|
||||
|
||||
void SNA::build_indexlist()
|
||||
{
|
||||
if(diagonalstyle == 0) {
|
||||
int idxj_count = 0;
|
||||
if(diagonalstyle != 3)
|
||||
error->all(FLERR, "diagonal_style must be 3\n");
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
idxj_count++;
|
||||
// index list for cglist
|
||||
|
||||
// indexList can be changed here
|
||||
int jdim = twojmax + 1;
|
||||
memory->create(idxcg_block, jdim, jdim, jdim,
|
||||
"sna:idxcg_block");
|
||||
|
||||
idxj = new SNA_LOOPINDICES[idxj_count];
|
||||
idxj_max = idxj_count;
|
||||
int idxcg_count = 0;
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
|
||||
idxcg_block[j1][j2][j] = idxcg_count;
|
||||
for (int m1 = 0; m1 <= j1; m1++)
|
||||
for (int m2 = 0; m2 <= j2; m2++)
|
||||
idxcg_count++;
|
||||
}
|
||||
idxcg_max = idxcg_count;
|
||||
|
||||
idxj_count = 0;
|
||||
// index list for uarray
|
||||
// need to include both halves
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
|
||||
idxj[idxj_count].j1 = j1;
|
||||
idxj[idxj_count].j2 = j2;
|
||||
idxj[idxj_count].j = j;
|
||||
idxj_count++;
|
||||
memory->create(idxu_block, jdim,
|
||||
"sna:idxu_block");
|
||||
|
||||
int idxu_count = 0;
|
||||
|
||||
for(int j = 0; j <= twojmax; j++) {
|
||||
idxu_block[j] = idxu_count;
|
||||
for(int mb = 0; mb <= j; mb++)
|
||||
for(int ma = 0; ma <= j; ma++)
|
||||
idxu_count++;
|
||||
}
|
||||
idxu_max = idxu_count;
|
||||
|
||||
// index list for beta and B
|
||||
|
||||
int idxb_count = 0;
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
if (j >= j1) idxb_count++;
|
||||
|
||||
idxb_max = idxb_count;
|
||||
idxb = new SNA_BINDICES[idxb_max];
|
||||
|
||||
idxb_count = 0;
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
if (j >= j1) {
|
||||
idxb[idxb_count].j1 = j1;
|
||||
idxb[idxb_count].j2 = j2;
|
||||
idxb[idxb_count].j = j;
|
||||
idxb_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if(diagonalstyle == 1) {
|
||||
int idxj_count = 0;
|
||||
// reverse index list for beta and b
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
|
||||
idxj_count++;
|
||||
memory->create(idxb_block, jdim, jdim, jdim,
|
||||
"sna:idxb_block");
|
||||
idxb_count = 0;
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
|
||||
if (j < j1) continue;
|
||||
idxb_block[j1][j2][j] = idxb_count;
|
||||
idxb_count++;
|
||||
}
|
||||
|
||||
// indexList can be changed here
|
||||
// index list for zlist
|
||||
|
||||
idxj = new SNA_LOOPINDICES[idxj_count];
|
||||
idxj_max = idxj_count;
|
||||
int idxz_count = 0;
|
||||
|
||||
idxj_count = 0;
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
for (int mb = 0; 2*mb <= j; mb++)
|
||||
for (int ma = 0; ma <= j; ma++)
|
||||
idxz_count++;
|
||||
|
||||
idxz_max = idxz_count;
|
||||
idxz = new SNA_ZINDICES[idxz_max];
|
||||
|
||||
memory->create(idxz_block, jdim, jdim, jdim,
|
||||
"sna:idxz_block");
|
||||
|
||||
idxz_count = 0;
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
|
||||
idxz_block[j1][j2][j] = idxz_count;
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
|
||||
idxj[idxj_count].j1 = j1;
|
||||
idxj[idxj_count].j2 = j1;
|
||||
idxj[idxj_count].j = j;
|
||||
idxj_count++;
|
||||
}
|
||||
}
|
||||
// find right beta[jjb] entry
|
||||
// multiply and divide by j+1 factors
|
||||
// account for multiplicity of 1, 2, or 3
|
||||
|
||||
if(diagonalstyle == 2) {
|
||||
int idxj_count = 0;
|
||||
// CODE HORROR!! Need to figure this out later
|
||||
double betaj = 1.0;
|
||||
// #ifdef USE_YDIRECT_ZLIST
|
||||
// double betaj;
|
||||
// if (j >= j1) {
|
||||
// const int jjb = idxb_block[j1][j2][j];
|
||||
// if (j1 == j) {
|
||||
// if (j2 == j) betaj = 3*beta[jjb];
|
||||
// else betaj = 2*beta[jjb];
|
||||
// } else betaj = beta[jjb];
|
||||
// } else if (j >= j2) {
|
||||
// const int jjb = idxb_block[j][j2][j1];
|
||||
// if (j2 == j) betaj = 2*beta[jjb]*(j1+1)/(j+1.0);
|
||||
// else betaj = beta[jjb]*(j1+1)/(j+1.0);
|
||||
// } else {
|
||||
// const int jjb = idxb_block[j2][j][j1];
|
||||
// betaj = beta[jjb]*(j1+1)/(j+1.0);
|
||||
// }
|
||||
// #else
|
||||
// double betaj;
|
||||
// if (j >= j1) {
|
||||
// const int jjb = idxb_block[j1][j2][j];
|
||||
// betaj = beta[jjb];
|
||||
// } else if (j >= j2) {
|
||||
// const int jjb = idxb_block[j][j2][j1];
|
||||
// betaj = beta[jjb]*(j1+1)/(j+1.0);
|
||||
// } else {
|
||||
// const int jjb = idxb_block[j2][j][j1];
|
||||
// betaj = beta[jjb]*(j1+1)/(j+1.0);
|
||||
// }
|
||||
// #endif
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++) {
|
||||
idxj_count++;
|
||||
}
|
||||
for (int mb = 0; 2*mb <= j; mb++)
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
idxz[idxz_count].j1 = j1;
|
||||
idxz[idxz_count].j2 = j2;
|
||||
idxz[idxz_count].j = j;
|
||||
idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2);
|
||||
idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2;
|
||||
idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1;
|
||||
idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2);
|
||||
idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2;
|
||||
idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1;
|
||||
|
||||
// indexList can be changed here
|
||||
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
|
||||
// find right beta[jjb] and y_list[jju] entries
|
||||
|
||||
idxj = new SNA_LOOPINDICES[idxj_count];
|
||||
idxj_max = idxj_count;
|
||||
const int jju = idxu_block[j] + (j+1)*mb + ma;
|
||||
idxz[idxz_count].jju = jju;
|
||||
idxz[idxz_count].betaj = betaj;
|
||||
|
||||
idxj_count = 0;
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++) {
|
||||
idxj[idxj_count].j1 = j1;
|
||||
idxj[idxj_count].j2 = j1;
|
||||
idxj[idxj_count].j = j1;
|
||||
idxj_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if(diagonalstyle == 3) {
|
||||
int idxj_count = 0;
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
if (j >= j1) idxj_count++;
|
||||
|
||||
// indexList can be changed here
|
||||
|
||||
idxj = new SNA_LOOPINDICES[idxj_count];
|
||||
idxj_max = idxj_count;
|
||||
|
||||
idxj_count = 0;
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
if (j >= j1) {
|
||||
idxj[idxj_count].j1 = j1;
|
||||
idxj[idxj_count].j2 = j2;
|
||||
idxj[idxj_count].j = j;
|
||||
idxj_count++;
|
||||
idxz_count++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
|
@ -292,16 +336,14 @@ void SNA::grow_rij(int newnmax)
|
|||
|
||||
nmax = newnmax;
|
||||
|
||||
if(!use_shared_arrays) {
|
||||
memory->destroy(rij);
|
||||
memory->destroy(inside);
|
||||
memory->destroy(wj);
|
||||
memory->destroy(rcutij);
|
||||
memory->create(rij, nmax, 3, "pair:rij");
|
||||
memory->create(inside, nmax, "pair:inside");
|
||||
memory->create(wj, nmax, "pair:wj");
|
||||
memory->create(rcutij, nmax, "pair:rcutij");
|
||||
}
|
||||
memory->destroy(rij);
|
||||
memory->destroy(inside);
|
||||
memory->destroy(wj);
|
||||
memory->destroy(rcutij);
|
||||
memory->create(rij, nmax, 3, "pair:rij");
|
||||
memory->create(inside, nmax, "pair:inside");
|
||||
memory->create(wj, nmax, "pair:wj");
|
||||
memory->create(rcutij, nmax, "pair:rcutij");
|
||||
}
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Ui by summing over neighbors j
|
||||
|
@ -320,10 +362,6 @@ void SNA::compute_ui(int jnum)
|
|||
zero_uarraytot();
|
||||
addself_uarraytot(wself);
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &starttime);
|
||||
#endif
|
||||
|
||||
for(int j = 0; j < jnum; j++) {
|
||||
x = rij[j][0];
|
||||
y = rij[j][1];
|
||||
|
@ -339,48 +377,6 @@ void SNA::compute_ui(int jnum)
|
|||
add_uarraytot(r, wj[j], rcutij[j]);
|
||||
}
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &endtime);
|
||||
timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
|
||||
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void SNA::compute_ui_omp(int jnum, int sub_threads)
|
||||
{
|
||||
double rsq, r, x, y, z, z0, theta0;
|
||||
|
||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||
// utot(j,ma,ma) = 1 for all j,ma
|
||||
// for j in neighbors of i:
|
||||
// compute r0 = (x,y,z,z0)
|
||||
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
|
||||
|
||||
zero_uarraytot();
|
||||
addself_uarraytot(wself);
|
||||
|
||||
for(int j = 0; j < jnum; j++) {
|
||||
x = rij[j][0];
|
||||
y = rij[j][1];
|
||||
z = rij[j][2];
|
||||
rsq = x * x + y * y + z * z;
|
||||
r = sqrt(rsq);
|
||||
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
|
||||
// theta0 = (r - rmin0) * rscale0;
|
||||
z0 = r / tan(theta0);
|
||||
omp_set_num_threads(sub_threads);
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none)
|
||||
#endif
|
||||
{
|
||||
compute_uarray_omp(x, y, z, z0, r, sub_threads);
|
||||
}
|
||||
add_uarraytot(r, wj[j], rcutij[j]);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -389,24 +385,6 @@ void SNA::compute_ui_omp(int jnum, int sub_threads)
|
|||
|
||||
void SNA::compute_zi()
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
// for j = |j1-j2|,Min(twojmax,j1+j2),2
|
||||
// for ma = 0,...,j
|
||||
// for mb = 0,...,jmid
|
||||
// z(j1,j2,j,ma,mb) = 0
|
||||
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
|
||||
// sumb1 = 0
|
||||
// ma2 = ma-ma1+(j1+j2-j)/2;
|
||||
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
|
||||
// mb2 = mb-mb1+(j1+j2-j)/2;
|
||||
// sumb1 += cg(j1,mb1,j2,mb2,j) *
|
||||
// u(j1,ma1,mb1) * u(j2,ma2,mb2)
|
||||
// z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &starttime);
|
||||
#endif
|
||||
|
||||
// compute_dbidrj() requires full j1/j2/j chunk of z elements
|
||||
// use zarray j1/j2 symmetry
|
||||
|
@ -449,84 +427,13 @@ void SNA::compute_zi()
|
|||
} // end loop over j
|
||||
} // end loop over j1, j2
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &endtime);
|
||||
timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
|
||||
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
|
||||
#endif
|
||||
}
|
||||
|
||||
void SNA::compute_zi_omp(int sub_threads)
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
// for j = |j1-j2|,Min(twojmax,j1+j2),2
|
||||
// for ma = 0,...,j
|
||||
// for mb = 0,...,j
|
||||
// z(j1,j2,j,ma,mb) = 0
|
||||
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
|
||||
// sumb1 = 0
|
||||
// ma2 = ma-ma1+(j1+j2-j)/2;
|
||||
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
|
||||
// mb2 = mb-mb1+(j1+j2-j)/2;
|
||||
// sumb1 += cg(j1,mb1,j2,mb2,j) *
|
||||
// u(j1,ma1,mb1) * u(j2,ma2,mb2)
|
||||
// z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
|
||||
|
||||
if(omp_in_parallel())
|
||||
omp_set_num_threads(sub_threads);
|
||||
|
||||
// compute_dbidrj() requires full j1/j2/j chunk of z elements
|
||||
// use zarray j1/j2 symmetry
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for schedule(auto) default(none)
|
||||
#endif
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
|
||||
|
||||
double sumb1_r, sumb1_i;
|
||||
int ma2, mb2;
|
||||
|
||||
for(int ma = 0; ma <= j; ma++)
|
||||
for(int mb = 0; mb <= j; mb++) {
|
||||
zarray_r[j1][j2][j][ma][mb] = 0.0;
|
||||
zarray_i[j1][j2][j][ma][mb] = 0.0;
|
||||
|
||||
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
|
||||
ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) {
|
||||
sumb1_r = 0.0;
|
||||
sumb1_i = 0.0;
|
||||
|
||||
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
|
||||
|
||||
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2);
|
||||
mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) {
|
||||
|
||||
mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2;
|
||||
sumb1_r += cgarray[j1][j2][j][mb1][mb2] *
|
||||
(uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] -
|
||||
uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]);
|
||||
sumb1_i += cgarray[j1][j2][j][mb1][mb2] *
|
||||
(uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] +
|
||||
uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]);
|
||||
}
|
||||
|
||||
zarray_r[j1][j2][j][ma][mb] +=
|
||||
sumb1_r * cgarray[j1][j2][j][ma1][ma2];
|
||||
zarray_i[j1][j2][j][ma][mb] +=
|
||||
sumb1_i * cgarray[j1][j2][j][ma1][ma2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Yi by summing over products of beta and Zi
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void SNA::compute_yi(double* beta)
|
||||
void SNA::compute_yi(const double* beta)
|
||||
{
|
||||
int j;
|
||||
int idxz_count;
|
||||
|
@ -540,18 +447,18 @@ void SNA::compute_yi(double* beta)
|
|||
} // end loop over ma, mb
|
||||
} // end loop over j
|
||||
|
||||
for(int JJ = 0; JJ < idxj_max; JJ++) {
|
||||
const int j1 = idxj[JJ].j1;
|
||||
const int j2 = idxj[JJ].j2;
|
||||
const int j3 = idxj[JJ].j;
|
||||
for(int jjb = 0; jjb < idxb_max; jjb++) {
|
||||
const int j1 = idxb[jjb].j1;
|
||||
const int j2 = idxb[jjb].j2;
|
||||
const int j3 = idxb[jjb].j;
|
||||
|
||||
j = j3;
|
||||
jjjzarray_r = zarray_r[j1][j2][j3];
|
||||
jjjzarray_i = zarray_i[j1][j2][j3];
|
||||
for(int mb = 0; 2*mb <= j; mb++)
|
||||
for(int ma = 0; ma <= j; ma++) {
|
||||
yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb];
|
||||
yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb];
|
||||
yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb];
|
||||
yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb];
|
||||
} // end loop over ma, mb
|
||||
|
||||
j = j1;
|
||||
|
@ -560,8 +467,8 @@ void SNA::compute_yi(double* beta)
|
|||
double j1fac = (j3+1)/(j+1.0);
|
||||
for(int mb = 0; 2*mb <= j; mb++)
|
||||
for(int ma = 0; ma <= j; ma++) {
|
||||
yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j1fac;
|
||||
yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j1fac;
|
||||
yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j1fac;
|
||||
yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j1fac;
|
||||
} // end loop over ma, mb
|
||||
|
||||
j = j2;
|
||||
|
@ -570,8 +477,8 @@ void SNA::compute_yi(double* beta)
|
|||
double j2fac = (j3+1)/(j+1.0);
|
||||
for(int mb = 0; 2*mb <= j; mb++)
|
||||
for(int ma = 0; ma <= j; ma++) {
|
||||
yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j2fac;
|
||||
yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j2fac;
|
||||
yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j2fac;
|
||||
yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j2fac;
|
||||
} // end loop over ma, mb
|
||||
|
||||
} // end loop over jjb
|
||||
|
@ -655,10 +562,6 @@ void SNA::compute_bi()
|
|||
// b(j1,j2,j) +=
|
||||
// 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &starttime);
|
||||
#endif
|
||||
|
||||
for(int j1 = 0; j1 <= twojmax; j1++)
|
||||
for(int j2 = 0; j2 <= j1; j2++) {
|
||||
for(int j = abs(j1 - j2);
|
||||
|
@ -691,12 +594,6 @@ void SNA::compute_bi()
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &endtime);
|
||||
timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
|
||||
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -760,164 +657,8 @@ void SNA::compute_duidrj(double* rij, double wj, double rcut)
|
|||
z0 = r * cs / sn;
|
||||
dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &starttime);
|
||||
#endif
|
||||
|
||||
compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut);
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &endtime);
|
||||
timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
|
||||
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
calculate derivative of Bi w.r.t. atom j
|
||||
variant using indexlist for j1,j2,j
|
||||
variant not using symmetry relation
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void SNA::compute_dbidrj_nonsymm()
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
// for j = |j1-j2|,Min(twojmax,j1+j2),2
|
||||
// dbdr(j1,j2,j) = 0
|
||||
// for ma = 0,...,j
|
||||
// for mb = 0,...,j
|
||||
// dzdr = 0
|
||||
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
|
||||
// sumb1 = 0
|
||||
// ma2 = ma-ma1+(j1+j2-j)/2;
|
||||
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
|
||||
// mb2 = mb-mb1+(j1+j2-j)/2;
|
||||
// sumb1 += cg(j1,mb1,j2,mb2,j) *
|
||||
// (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) +
|
||||
// u(j1,ma1,mb1) * dudr(j2,ma2,mb2))
|
||||
// dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
|
||||
// dbdr(j1,j2,j) +=
|
||||
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
|
||||
// Conj(u(j,ma,mb))*dzdr
|
||||
|
||||
double* dbdr;
|
||||
double* dudr_r, *dudr_i;
|
||||
double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3];
|
||||
int ma2;
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &starttime);
|
||||
#endif
|
||||
|
||||
for(int JJ = 0; JJ < idxj_max; JJ++) {
|
||||
const int j1 = idxj[JJ].j1;
|
||||
const int j2 = idxj[JJ].j2;
|
||||
const int j = idxj[JJ].j;
|
||||
|
||||
dbdr = dbarray[j1][j2][j];
|
||||
dbdr[0] = 0.0;
|
||||
dbdr[1] = 0.0;
|
||||
dbdr[2] = 0.0;
|
||||
|
||||
double** *j1duarray_r = duarray_r[j1];
|
||||
double** *j2duarray_r = duarray_r[j2];
|
||||
double** *j1duarray_i = duarray_i[j1];
|
||||
double** *j2duarray_i = duarray_i[j2];
|
||||
double** j1uarraytot_r = uarraytot_r[j1];
|
||||
double** j2uarraytot_r = uarraytot_r[j2];
|
||||
double** j1uarraytot_i = uarraytot_i[j1];
|
||||
double** j2uarraytot_i = uarraytot_i[j2];
|
||||
double** j1j2jcgarray = cgarray[j1][j2][j];
|
||||
|
||||
for(int ma = 0; ma <= j; ma++)
|
||||
for(int mb = 0; mb <= j; mb++) {
|
||||
dzdr_r[0] = 0.0;
|
||||
dzdr_r[1] = 0.0;
|
||||
dzdr_r[2] = 0.0;
|
||||
dzdr_i[0] = 0.0;
|
||||
dzdr_i[1] = 0.0;
|
||||
dzdr_i[2] = 0.0;
|
||||
|
||||
const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1;
|
||||
const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1;
|
||||
|
||||
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
|
||||
ma1 < max_ma1; ma1++) {
|
||||
|
||||
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
|
||||
sumb1_r[0] = 0.0;
|
||||
sumb1_r[1] = 0.0;
|
||||
sumb1_r[2] = 0.0;
|
||||
sumb1_i[0] = 0.0;
|
||||
sumb1_i[1] = 0.0;
|
||||
sumb1_i[2] = 0.0;
|
||||
|
||||
//inside loop 54 operations (mul and add)
|
||||
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2),
|
||||
mb2 = mb + (j1 + j2 - j) / 2 - mb1;
|
||||
mb1 < max_mb1; mb1++, mb2--) {
|
||||
|
||||
double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i;
|
||||
|
||||
dudr1_r = j1duarray_r[ma1][mb1];
|
||||
dudr2_r = j2duarray_r[ma2][mb2];
|
||||
dudr1_i = j1duarray_i[ma1][mb1];
|
||||
dudr2_i = j2duarray_i[ma2][mb2];
|
||||
|
||||
const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2];
|
||||
const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2];
|
||||
const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1];
|
||||
const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2];
|
||||
const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1];
|
||||
|
||||
for(int k = 0; k < 3; k++) {
|
||||
sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2;
|
||||
sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2;
|
||||
sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2;
|
||||
sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2;
|
||||
|
||||
sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1;
|
||||
sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1;
|
||||
sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1;
|
||||
sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1;
|
||||
}
|
||||
} // end loop over mb1,mb2
|
||||
|
||||
// dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
|
||||
|
||||
dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2];
|
||||
dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2];
|
||||
dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2];
|
||||
dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2];
|
||||
dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2];
|
||||
dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2];
|
||||
} // end loop over ma1,ma2
|
||||
|
||||
// dbdr(j1,j2,j) +=
|
||||
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
|
||||
// Conj(u(j,ma,mb))*dzdr
|
||||
|
||||
dudr_r = duarray_r[j][ma][mb];
|
||||
dudr_i = duarray_i[j][ma][mb];
|
||||
|
||||
for(int k = 0; k < 3; k++)
|
||||
dbdr[k] +=
|
||||
(dudr_r[k] * zarray_r[j1][j2][j][ma][mb] +
|
||||
dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) +
|
||||
(uarraytot_r[j][ma][mb] * dzdr_r[k] +
|
||||
uarraytot_i[j][ma][mb] * dzdr_i[k]);
|
||||
} //end loop over ma mb
|
||||
|
||||
} //end loop over j1 j2 j
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &endtime);
|
||||
timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
|
||||
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -958,14 +699,10 @@ void SNA::compute_dbidrj()
|
|||
double jjjmambzarray_r;
|
||||
double jjjmambzarray_i;
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &starttime);
|
||||
#endif
|
||||
|
||||
for(int JJ = 0; JJ < idxj_max; JJ++) {
|
||||
const int j1 = idxj[JJ].j1;
|
||||
const int j2 = idxj[JJ].j2;
|
||||
const int j = idxj[JJ].j;
|
||||
for(int jjb = 0; jjb < idxb_max; jjb++) {
|
||||
const int j1 = idxb[jjb].j1;
|
||||
const int j2 = idxb[jjb].j2;
|
||||
const int j = idxb[jjb].j;
|
||||
|
||||
dbdr = dbarray[j1][j2][j];
|
||||
dbdr[0] = 0.0;
|
||||
|
@ -1149,12 +886,6 @@ void SNA::compute_dbidrj()
|
|||
|
||||
} //end loop over j1 j2 j
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
clock_gettime(CLOCK_REALTIME, &endtime);
|
||||
timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
|
||||
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -1251,27 +982,6 @@ void SNA::add_uarraytot(double r, double wj, double rcut)
|
|||
}
|
||||
}
|
||||
|
||||
void SNA::add_uarraytot_omp(double r, double wj, double rcut)
|
||||
{
|
||||
double sfac;
|
||||
|
||||
sfac = compute_sfac(r, rcut);
|
||||
|
||||
sfac *= wj;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int j = 0; j <= twojmax; j++)
|
||||
for (int ma = 0; ma <= j; ma++)
|
||||
for (int mb = 0; mb <= j; mb++) {
|
||||
uarraytot_r[j][ma][mb] +=
|
||||
sfac * uarray_r[j][ma][mb];
|
||||
uarraytot_i[j][ma][mb] +=
|
||||
sfac * uarray_i[j][ma][mb];
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Wigner U-functions for one neighbor
|
||||
------------------------------------------------------------------------- */
|
||||
|
@ -1348,88 +1058,6 @@ void SNA::compute_uarray(double x, double y, double z,
|
|||
}
|
||||
}
|
||||
|
||||
void SNA::compute_uarray_omp(double x, double y, double z,
|
||||
double z0, double r, int /*sub_threads*/)
|
||||
{
|
||||
double r0inv;
|
||||
double a_r, b_r, a_i, b_i;
|
||||
double rootpq;
|
||||
|
||||
// compute Cayley-Klein parameters for unit quaternion
|
||||
|
||||
r0inv = 1.0 / sqrt(r * r + z0 * z0);
|
||||
a_r = r0inv * z0;
|
||||
a_i = -r0inv * z;
|
||||
b_r = r0inv * y;
|
||||
b_i = -r0inv * x;
|
||||
|
||||
// VMK Section 4.8.2
|
||||
|
||||
uarray_r[0][0][0] = 1.0;
|
||||
uarray_i[0][0][0] = 0.0;
|
||||
|
||||
for (int j = 1; j <= twojmax; j++) {
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int mb = 0; mb < j; mb++) {
|
||||
uarray_r[j][0][mb] = 0.0;
|
||||
uarray_i[j][0][mb] = 0.0;
|
||||
|
||||
for (int ma = 0; ma < j; ma++) {
|
||||
rootpq = rootpqarray[j - ma][j - mb];
|
||||
uarray_r[j][ma][mb] +=
|
||||
rootpq *
|
||||
(a_r * uarray_r[j - 1][ma][mb] +
|
||||
a_i * uarray_i[j - 1][ma][mb]);
|
||||
uarray_i[j][ma][mb] +=
|
||||
rootpq *
|
||||
(a_r * uarray_i[j - 1][ma][mb] -
|
||||
a_i * uarray_r[j - 1][ma][mb]);
|
||||
|
||||
rootpq = rootpqarray[ma + 1][j - mb];
|
||||
uarray_r[j][ma + 1][mb] =
|
||||
-rootpq *
|
||||
(b_r * uarray_r[j - 1][ma][mb] +
|
||||
b_i * uarray_i[j - 1][ma][mb]);
|
||||
uarray_i[j][ma + 1][mb] =
|
||||
-rootpq *
|
||||
(b_r * uarray_i[j - 1][ma][mb] -
|
||||
b_i * uarray_r[j - 1][ma][mb]);
|
||||
}
|
||||
}
|
||||
|
||||
int mb = j;
|
||||
uarray_r[j][0][mb] = 0.0;
|
||||
uarray_i[j][0][mb] = 0.0;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int ma = 0; ma < j; ma++) {
|
||||
rootpq = rootpqarray[j - ma][mb];
|
||||
uarray_r[j][ma][mb] +=
|
||||
rootpq *
|
||||
(b_r * uarray_r[j - 1][ma][mb - 1] -
|
||||
b_i * uarray_i[j - 1][ma][mb - 1]);
|
||||
uarray_i[j][ma][mb] +=
|
||||
rootpq *
|
||||
(b_r * uarray_i[j - 1][ma][mb - 1] +
|
||||
b_i * uarray_r[j - 1][ma][mb - 1]);
|
||||
|
||||
rootpq = rootpqarray[ma + 1][mb];
|
||||
uarray_r[j][ma + 1][mb] =
|
||||
rootpq *
|
||||
(a_r * uarray_r[j - 1][ma][mb - 1] -
|
||||
a_i * uarray_i[j - 1][ma][mb - 1]);
|
||||
uarray_i[j][ma + 1][mb] =
|
||||
rootpq *
|
||||
(a_r * uarray_i[j - 1][ma][mb - 1] +
|
||||
a_i * uarray_r[j - 1][ma][mb - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute derivatives of Wigner U-functions for one neighbor
|
||||
see comments in compute_uarray()
|
||||
|
@ -1644,20 +1272,18 @@ void SNA::create_twojmax_arrays()
|
|||
bzero = NULL;
|
||||
|
||||
|
||||
if(!use_shared_arrays) {
|
||||
memory->create(uarraytot_r, jdim, jdim, jdim,
|
||||
"sna:uarraytot");
|
||||
memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim,
|
||||
"sna:zarray");
|
||||
memory->create(uarraytot_i, jdim, jdim, jdim,
|
||||
"sna:uarraytot");
|
||||
memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim,
|
||||
"sna:zarray");
|
||||
memory->create(yarray_r, jdim, jdim, jdim,
|
||||
"sna:yarray");
|
||||
memory->create(yarray_i, jdim, jdim, jdim,
|
||||
"sna:yarray");
|
||||
}
|
||||
memory->create(uarraytot_r, jdim, jdim, jdim,
|
||||
"sna:uarraytot");
|
||||
memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim,
|
||||
"sna:zarray");
|
||||
memory->create(uarraytot_i, jdim, jdim, jdim,
|
||||
"sna:uarraytot");
|
||||
memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim,
|
||||
"sna:zarray");
|
||||
memory->create(yarray_r, jdim, jdim, jdim,
|
||||
"sna:yarray");
|
||||
memory->create(yarray_i, jdim, jdim, jdim,
|
||||
"sna:yarray");
|
||||
|
||||
}
|
||||
|
||||
|
@ -1680,14 +1306,12 @@ void SNA::destroy_twojmax_arrays()
|
|||
if (bzero_flag)
|
||||
memory->destroy(bzero);
|
||||
|
||||
if(!use_shared_arrays) {
|
||||
memory->destroy(uarraytot_r);
|
||||
memory->destroy(zarray_r);
|
||||
memory->destroy(uarraytot_i);
|
||||
memory->destroy(zarray_i);
|
||||
memory->destroy(yarray_r);
|
||||
memory->destroy(yarray_i);
|
||||
}
|
||||
memory->destroy(uarraytot_r);
|
||||
memory->destroy(zarray_r);
|
||||
memory->destroy(uarraytot_i);
|
||||
memory->destroy(zarray_i);
|
||||
memory->destroy(yarray_r);
|
||||
memory->destroy(yarray_i);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -24,14 +24,19 @@
|
|||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct SNA_LOOPINDICES {
|
||||
struct SNA_ZINDICES {
|
||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
||||
double betaj;
|
||||
};
|
||||
|
||||
struct SNA_BINDICES {
|
||||
int j1, j2, j;
|
||||
};
|
||||
|
||||
class SNA : protected Pointers {
|
||||
|
||||
public:
|
||||
SNA(LAMMPS*, double, int, int, int, double, int, int);
|
||||
SNA(LAMMPS*, double, int, int, double, int, int);
|
||||
|
||||
SNA(LAMMPS* lmp) : Pointers(lmp) {};
|
||||
~SNA();
|
||||
|
@ -44,10 +49,8 @@ public:
|
|||
// functions for bispectrum coefficients
|
||||
|
||||
void compute_ui(int);
|
||||
void compute_ui_omp(int, int);
|
||||
void compute_zi();
|
||||
void compute_zi_omp(int);
|
||||
void compute_yi(double*);
|
||||
void compute_yi(const double*);
|
||||
void compute_bi();
|
||||
void copy_bi2bvec();
|
||||
|
||||
|
@ -56,20 +59,10 @@ public:
|
|||
void compute_duidrj(double*, double, double);
|
||||
void compute_dbidrj();
|
||||
void compute_deidrj(double*);
|
||||
void compute_dbidrj_nonsymm();
|
||||
void copy_dbi2dbvec();
|
||||
double compute_sfac(double, double);
|
||||
double compute_dsfac(double, double);
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
double* timers;
|
||||
timespec starttime, endtime;
|
||||
int print;
|
||||
int counter;
|
||||
#endif
|
||||
|
||||
//per sna class instance for OMP use
|
||||
|
||||
double* bvec, ** dbvec;
|
||||
double** rij;
|
||||
int* inside;
|
||||
|
@ -83,16 +76,17 @@ public:
|
|||
double*** uarraytot_r, *** uarraytot_i;
|
||||
double***** zarray_r, ***** zarray_i;
|
||||
double*** yarray_r, *** yarray_i;
|
||||
double*** uarraytot_r_b, *** uarraytot_i_b;
|
||||
double***** zarray_r_b, ***** zarray_i_b;
|
||||
double*** uarray_r, *** uarray_i;
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
|
||||
//use indexlist instead of loops, constructor generates these
|
||||
SNA_LOOPINDICES* idxj;
|
||||
int idxj_max;
|
||||
// use indexlist instead of loops, constructor generates these
|
||||
|
||||
SNA_ZINDICES* idxz;
|
||||
SNA_BINDICES* idxb;
|
||||
int idxcg_max, idxu_max, idxz_max, idxb_max;
|
||||
|
||||
// data for bispectrum coefficients
|
||||
|
||||
double***** cgarray;
|
||||
|
@ -104,6 +98,21 @@ private:
|
|||
double**** duarray_r, **** duarray_i;
|
||||
double**** dbarray;
|
||||
|
||||
double* cglist;
|
||||
int*** idxcg_block;
|
||||
|
||||
double* ulisttot_r, * ulisttot_i;
|
||||
double* ulist_r, * ulist_i;
|
||||
int* idxu_block;
|
||||
|
||||
double* zlist_r, * zlist_i;
|
||||
int*** idxz_block;
|
||||
|
||||
int*** idxb_block;
|
||||
|
||||
double** dulist_r, ** dulist_i;
|
||||
double* ylist_r, * ylist_i;
|
||||
|
||||
static const int nmaxfactorial = 167;
|
||||
static const double nfac_table[];
|
||||
double factorial(int);
|
||||
|
@ -118,22 +127,13 @@ private:
|
|||
void zero_uarraytot();
|
||||
void addself_uarraytot(double);
|
||||
void add_uarraytot(double, double, double);
|
||||
void add_uarraytot_omp(double, double, double);
|
||||
void compute_uarray(double, double, double,
|
||||
double, double);
|
||||
void compute_uarray_omp(double, double, double,
|
||||
double, double, int);
|
||||
double deltacg(int, int, int);
|
||||
int compute_ncoeff();
|
||||
void compute_duarray(double, double, double,
|
||||
double, double, double, double, double);
|
||||
|
||||
// if number of atoms are small use per atom arrays
|
||||
// for twojmax arrays, rij, inside, bvec
|
||||
// this will increase the memory footprint considerably,
|
||||
// but allows parallel filling and reuse of these arrays
|
||||
int use_shared_arrays;
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
// 1 = cosine
|
||||
|
|
Loading…
Reference in New Issue