Added compact arrays, removed unused openmp stuff

This commit is contained in:
Aidan Thompson 2019-06-03 19:50:40 -06:00
parent 803e0631c5
commit 960a975e2a
11 changed files with 332 additions and 1614 deletions

View File

@ -25,7 +25,6 @@
#include "comm.h"
#include "memory.h"
#include "error.h"
#include "openmp_snap.h"
using namespace LAMMPS_NS;
@ -115,20 +114,10 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
} else error->all(FLERR,"Illegal compute sna/atom command");
}
nthreads = comm->nthreads;
snaptr = new SNA*[nthreads];
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
#endif
{
int tid = omp_get_thread_num();
snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle,
rmin0,switchflag,bzeroflag);
// always unset use_shared_arrays since it does not work with computes
snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
}
ncoeff = snaptr[0]->ncoeff;
ncoeff = snaptr->ncoeff;
size_peratom_cols = ncoeff;
if (quadraticflag) size_peratom_cols += (ncoeff*(ncoeff+1))/2;
peratom_flag = 1;
@ -136,7 +125,6 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
nmax = 0;
njmax = 0;
sna = NULL;
}
/* ---------------------------------------------------------------------- */
@ -147,9 +135,7 @@ ComputeSNAAtom::~ComputeSNAAtom()
memory->destroy(radelem);
memory->destroy(wjelem);
memory->destroy(cutsq);
for (int tid = 0; tid<nthreads; tid++)
delete snaptr[tid];
delete [] snaptr;
delete snaptr;
}
/* ---------------------------------------------------------------------- */
@ -176,13 +162,7 @@ void ComputeSNAAtom::init()
if (strcmp(modify->compute[i]->style,"sna/atom") == 0) count++;
if (count > 1 && comm->me == 0)
error->warning(FLERR,"More than one compute sna/atom");
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int tid = omp_get_thread_num();
snaptr[tid]->init();
}
snaptr->init();
}
/* ---------------------------------------------------------------------- */
@ -223,11 +203,7 @@ void ComputeSNAAtom::compute_peratom()
double** const x = atom->x;
const int* const mask = atom->mask;
#if defined(_OPENMP)
#pragma omp parallel for default(none)
#endif
for (int ii = 0; ii < inum; ii++) {
const int tid = omp_get_thread_num();
const int i = ilist[ii];
if (mask[i] & groupbit) {
@ -241,7 +217,7 @@ void ComputeSNAAtom::compute_peratom()
// insure rij, inside, and typej are of size jnum
snaptr[tid]->grow_rij(jnum);
snaptr->grow_rij(jnum);
// rij[][3] = displacements between atom I and those neighbors
// inside = indices of neighbors of I within cutoff
@ -258,26 +234,26 @@ void ComputeSNAAtom::compute_peratom()
const double rsq = delx*delx + dely*dely + delz*delz;
int jtype = type[j];
if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
snaptr[tid]->rij[ninside][0] = delx;
snaptr[tid]->rij[ninside][1] = dely;
snaptr[tid]->rij[ninside][2] = delz;
snaptr[tid]->inside[ninside] = j;
snaptr[tid]->wj[ninside] = wjelem[jtype];
snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
snaptr->rij[ninside][0] = delx;
snaptr->rij[ninside][1] = dely;
snaptr->rij[ninside][2] = delz;
snaptr->inside[ninside] = j;
snaptr->wj[ninside] = wjelem[jtype];
snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
ninside++;
}
}
snaptr[tid]->compute_ui(ninside);
snaptr[tid]->compute_zi();
snaptr[tid]->compute_bi();
snaptr[tid]->copy_bi2bvec();
snaptr->compute_ui(ninside);
snaptr->compute_zi();
snaptr->compute_bi();
snaptr->copy_bi2bvec();
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
sna[i][icoeff] = snaptr[tid]->bvec[icoeff];
sna[i][icoeff] = snaptr->bvec[icoeff];
if (quadraticflag) {
int ncount = ncoeff;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
double bi = snaptr[tid]->bvec[icoeff];
double bi = snaptr->bvec[icoeff];
// diagonal element of quadratic matrix
@ -286,7 +262,7 @@ void ComputeSNAAtom::compute_peratom()
// upper-triangular elements of quadratic matrix
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++)
sna[i][ncount++] = bi*snaptr[tid]->bvec[jcoeff];
sna[i][ncount++] = bi*snaptr->bvec[jcoeff];
}
}
} else {
@ -305,7 +281,6 @@ double ComputeSNAAtom::memory_usage()
double bytes = nmax*size_peratom_cols * sizeof(double);
bytes += 3*njmax*sizeof(double);
bytes += njmax*sizeof(int);
bytes += snaptr[0]->memory_usage()*comm->nthreads;
return bytes;
}

View File

@ -42,10 +42,9 @@ class ComputeSNAAtom : public Compute {
double rcutfac;
double *radelem;
double *wjelem;
class SNA** snaptr;
class SNA* snaptr;
double cutmax;
int quadraticflag;
int nthreads;
};
}

View File

@ -25,7 +25,6 @@
#include "comm.h"
#include "memory.h"
#include "error.h"
#include "openmp_snap.h"
using namespace LAMMPS_NS;
@ -113,20 +112,10 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) :
} else error->all(FLERR,"Illegal compute snad/atom command");
}
nthreads = comm->nthreads;
snaptr = new SNA*[nthreads];
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
#endif
{
int tid = omp_get_thread_num();
snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle,
rmin0,switchflag,bzeroflag);
// always unset use_shared_arrays since it does not work with computes
snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
}
ncoeff = snaptr[0]->ncoeff;
ncoeff = snaptr->ncoeff;
nperdim = ncoeff;
if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2;
yoffset = nperdim;
@ -138,7 +127,6 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) :
nmax = 0;
njmax = 0;
snad = NULL;
}
/* ---------------------------------------------------------------------- */
@ -149,9 +137,7 @@ ComputeSNADAtom::~ComputeSNADAtom()
memory->destroy(radelem);
memory->destroy(wjelem);
memory->destroy(cutsq);
for (int tid = 0; tid<nthreads; tid++)
delete snaptr[tid];
delete [] snaptr;
delete snaptr;
}
/* ---------------------------------------------------------------------- */
@ -178,13 +164,7 @@ void ComputeSNADAtom::init()
if (strcmp(modify->compute[i]->style,"snad/atom") == 0) count++;
if (count > 1 && comm->me == 0)
error->warning(FLERR,"More than one compute snad/atom");
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int tid = omp_get_thread_num();
snaptr[tid]->init();
}
snaptr->init();
}
/* ---------------------------------------------------------------------- */
@ -235,11 +215,7 @@ void ComputeSNADAtom::compute_peratom()
double** const x = atom->x;
const int* const mask = atom->mask;
#if defined(_OPENMP)
#pragma omp parallel for default(none)
#endif
for (int ii = 0; ii < inum; ii++) {
const int tid = omp_get_thread_num();
const int i = ilist[ii];
if (mask[i] & groupbit) {
@ -258,7 +234,7 @@ void ComputeSNADAtom::compute_peratom()
// insure rij, inside, and typej are of size jnum
snaptr[tid]->grow_rij(jnum);
snaptr->grow_rij(jnum);
// rij[][3] = displacements between atom I and those neighbors
// inside = indices of neighbors of I within cutoff
@ -276,30 +252,30 @@ void ComputeSNADAtom::compute_peratom()
const double rsq = delx*delx + dely*dely + delz*delz;
int jtype = type[j];
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
snaptr[tid]->rij[ninside][0] = delx;
snaptr[tid]->rij[ninside][1] = dely;
snaptr[tid]->rij[ninside][2] = delz;
snaptr[tid]->inside[ninside] = j;
snaptr[tid]->wj[ninside] = wjelem[jtype];
snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
snaptr->rij[ninside][0] = delx;
snaptr->rij[ninside][1] = dely;
snaptr->rij[ninside][2] = delz;
snaptr->inside[ninside] = j;
snaptr->wj[ninside] = wjelem[jtype];
snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
ninside++;
}
}
snaptr[tid]->compute_ui(ninside);
snaptr[tid]->compute_zi();
snaptr->compute_ui(ninside);
snaptr->compute_zi();
if (quadraticflag) {
snaptr[tid]->compute_bi();
snaptr[tid]->copy_bi2bvec();
snaptr->compute_bi();
snaptr->copy_bi2bvec();
}
for (int jj = 0; jj < ninside; jj++) {
const int j = snaptr[tid]->inside[jj];
snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
snaptr[tid]->wj[jj],
snaptr[tid]->rcutij[jj]);
snaptr[tid]->compute_dbidrj();
snaptr[tid]->copy_dbi2dbvec();
const int j = snaptr->inside[jj];
snaptr->compute_duidrj(snaptr->rij[jj],
snaptr->wj[jj],
snaptr->rcutij[jj]);
snaptr->compute_dbidrj();
snaptr->copy_dbi2dbvec();
// Accumulate -dBi/dRi, -dBi/dRj
@ -307,12 +283,12 @@ void ComputeSNADAtom::compute_peratom()
double *snadj = snad[j]+typeoffset;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0];
snadi[icoeff+yoffset] += snaptr[tid]->dbvec[icoeff][1];
snadi[icoeff+zoffset] += snaptr[tid]->dbvec[icoeff][2];
snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0];
snadj[icoeff+yoffset] -= snaptr[tid]->dbvec[icoeff][1];
snadj[icoeff+zoffset] -= snaptr[tid]->dbvec[icoeff][2];
snadi[icoeff] += snaptr->dbvec[icoeff][0];
snadi[icoeff+yoffset] += snaptr->dbvec[icoeff][1];
snadi[icoeff+zoffset] += snaptr->dbvec[icoeff][2];
snadj[icoeff] -= snaptr->dbvec[icoeff][0];
snadj[icoeff+yoffset] -= snaptr->dbvec[icoeff][1];
snadj[icoeff+zoffset] -= snaptr->dbvec[icoeff][2];
}
if (quadraticflag) {
@ -321,10 +297,10 @@ void ComputeSNADAtom::compute_peratom()
snadj += quadraticoffset;
int ncount = 0;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
double bi = snaptr[tid]->bvec[icoeff];
double bix = snaptr[tid]->dbvec[icoeff][0];
double biy = snaptr[tid]->dbvec[icoeff][1];
double biz = snaptr[tid]->dbvec[icoeff][2];
double bi = snaptr->bvec[icoeff];
double bix = snaptr->dbvec[icoeff][0];
double biy = snaptr->dbvec[icoeff][1];
double biz = snaptr->dbvec[icoeff][2];
// diagonal elements of quadratic matrix
@ -343,12 +319,12 @@ void ComputeSNADAtom::compute_peratom()
// upper-triangular elements of quadratic matrix
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0]
+ bix*snaptr[tid]->bvec[jcoeff];
double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1]
+ biy*snaptr[tid]->bvec[jcoeff];
double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2]
+ biz*snaptr[tid]->bvec[jcoeff];
double dbxtmp = bi*snaptr->dbvec[jcoeff][0]
+ bix*snaptr->bvec[jcoeff];
double dbytmp = bi*snaptr->dbvec[jcoeff][1]
+ biy*snaptr->bvec[jcoeff];
double dbztmp = bi*snaptr->dbvec[jcoeff][2]
+ biz*snaptr->bvec[jcoeff];
snadi[ncount] += dbxtmp;
snadi[ncount+yoffset] += dbytmp;
@ -408,6 +384,5 @@ double ComputeSNADAtom::memory_usage()
bytes += 3*njmax*sizeof(double);
bytes += njmax*sizeof(int);
bytes += 3*nperdim*atom->ntypes;
bytes += snaptr[0]->memory_usage()*comm->nthreads;
return bytes;
}

View File

@ -44,10 +44,9 @@ class ComputeSNADAtom : public Compute {
double rcutfac;
double *radelem;
double *wjelem;
class SNA** snaptr;
class SNA* snaptr;
double cutmax;
int quadraticflag;
int nthreads;
};
}

View File

@ -25,7 +25,6 @@
#include "comm.h"
#include "memory.h"
#include "error.h"
#include "openmp_snap.h"
using namespace LAMMPS_NS;
@ -109,20 +108,10 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) :
} else error->all(FLERR,"Illegal compute snav/atom command");
}
nthreads = comm->nthreads;
snaptr = new SNA*[nthreads];
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
#endif
{
int tid = omp_get_thread_num();
snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle,
rmin0,switchflag,bzeroflag);
// always unset use_shared_arrays since it does not work with computes
snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
}
ncoeff = snaptr[0]->ncoeff;
ncoeff = snaptr->ncoeff;
nperdim = ncoeff;
if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2;
size_peratom_cols = 6*nperdim*atom->ntypes;
@ -144,9 +133,7 @@ ComputeSNAVAtom::~ComputeSNAVAtom()
memory->destroy(wjelem);
memory->destroy(cutsq);
for (int tid = 0; tid<nthreads; tid++)
delete snaptr[tid];
delete [] snaptr;
delete snaptr;
}
/* ---------------------------------------------------------------------- */
@ -174,13 +161,7 @@ void ComputeSNAVAtom::init()
if (strcmp(modify->compute[i]->style,"snav/atom") == 0) count++;
if (count > 1 && comm->me == 0)
error->warning(FLERR,"More than one compute snav/atom");
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int tid = omp_get_thread_num();
snaptr[tid]->init();
}
snaptr->init();
}
/* ---------------------------------------------------------------------- */
@ -230,11 +211,7 @@ void ComputeSNAVAtom::compute_peratom()
double** const x = atom->x;
const int* const mask = atom->mask;
#if defined(_OPENMP)
#pragma omp parallel for default(none)
#endif
for (int ii = 0; ii < inum; ii++) {
const int tid = omp_get_thread_num();
const int i = ilist[ii];
if (mask[i] & groupbit) {
@ -251,7 +228,7 @@ void ComputeSNAVAtom::compute_peratom()
// insure rij, inside, and typej are of size jnum
snaptr[tid]->grow_rij(jnum);
snaptr->grow_rij(jnum);
// rij[][3] = displacements between atom I and those neighbors
// inside = indices of neighbors of I within cutoff
@ -269,31 +246,31 @@ void ComputeSNAVAtom::compute_peratom()
const double rsq = delx*delx + dely*dely + delz*delz;
int jtype = type[j];
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
snaptr[tid]->rij[ninside][0] = delx;
snaptr[tid]->rij[ninside][1] = dely;
snaptr[tid]->rij[ninside][2] = delz;
snaptr[tid]->inside[ninside] = j;
snaptr[tid]->wj[ninside] = wjelem[jtype];
snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
snaptr->rij[ninside][0] = delx;
snaptr->rij[ninside][1] = dely;
snaptr->rij[ninside][2] = delz;
snaptr->inside[ninside] = j;
snaptr->wj[ninside] = wjelem[jtype];
snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
ninside++;
}
}
snaptr[tid]->compute_ui(ninside);
snaptr[tid]->compute_zi();
snaptr->compute_ui(ninside);
snaptr->compute_zi();
if (quadraticflag) {
snaptr[tid]->compute_bi();
snaptr[tid]->copy_bi2bvec();
snaptr->compute_bi();
snaptr->copy_bi2bvec();
}
for (int jj = 0; jj < ninside; jj++) {
const int j = snaptr[tid]->inside[jj];
const int j = snaptr->inside[jj];
snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
snaptr[tid]->wj[jj],
snaptr[tid]->rcutij[jj]);
snaptr[tid]->compute_dbidrj();
snaptr[tid]->copy_dbi2dbvec();
snaptr->compute_duidrj(snaptr->rij[jj],
snaptr->wj[jj],
snaptr->rcutij[jj]);
snaptr->compute_dbidrj();
snaptr->copy_dbi2dbvec();
// Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj
@ -301,18 +278,18 @@ void ComputeSNAVAtom::compute_peratom()
double *snavj = snav[j]+typeoffset;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
snavi[icoeff] += snaptr[tid]->dbvec[icoeff][0]*xtmp;
snavi[icoeff+nperdim] += snaptr[tid]->dbvec[icoeff][1]*ytmp;
snavi[icoeff+2*nperdim] += snaptr[tid]->dbvec[icoeff][2]*ztmp;
snavi[icoeff+3*nperdim] += snaptr[tid]->dbvec[icoeff][1]*ztmp;
snavi[icoeff+4*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ztmp;
snavi[icoeff+5*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ytmp;
snavj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][0];
snavj[icoeff+nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][1];
snavj[icoeff+2*nperdim] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2];
snavj[icoeff+3*nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2];
snavj[icoeff+4*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2];
snavj[icoeff+5*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1];
snavi[icoeff] += snaptr->dbvec[icoeff][0]*xtmp;
snavi[icoeff+nperdim] += snaptr->dbvec[icoeff][1]*ytmp;
snavi[icoeff+2*nperdim] += snaptr->dbvec[icoeff][2]*ztmp;
snavi[icoeff+3*nperdim] += snaptr->dbvec[icoeff][1]*ztmp;
snavi[icoeff+4*nperdim] += snaptr->dbvec[icoeff][0]*ztmp;
snavi[icoeff+5*nperdim] += snaptr->dbvec[icoeff][0]*ytmp;
snavj[icoeff] -= snaptr->dbvec[icoeff][0]*x[j][0];
snavj[icoeff+nperdim] -= snaptr->dbvec[icoeff][1]*x[j][1];
snavj[icoeff+2*nperdim] -= snaptr->dbvec[icoeff][2]*x[j][2];
snavj[icoeff+3*nperdim] -= snaptr->dbvec[icoeff][1]*x[j][2];
snavj[icoeff+4*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][2];
snavj[icoeff+5*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][1];
}
if (quadraticflag) {
@ -321,10 +298,10 @@ void ComputeSNAVAtom::compute_peratom()
snavj += quadraticoffset;
int ncount = 0;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
double bi = snaptr[tid]->bvec[icoeff];
double bix = snaptr[tid]->dbvec[icoeff][0];
double biy = snaptr[tid]->dbvec[icoeff][1];
double biz = snaptr[tid]->dbvec[icoeff][2];
double bi = snaptr->bvec[icoeff];
double bix = snaptr->dbvec[icoeff][0];
double biy = snaptr->dbvec[icoeff][1];
double biz = snaptr->dbvec[icoeff][2];
// diagonal element of quadratic matrix
@ -348,12 +325,12 @@ void ComputeSNAVAtom::compute_peratom()
// upper-triangular elements of quadratic matrix
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0]
+ bix*snaptr[tid]->bvec[jcoeff];
double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1]
+ biy*snaptr[tid]->bvec[jcoeff];
double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2]
+ biz*snaptr[tid]->bvec[jcoeff];
double dbxtmp = bi*snaptr->dbvec[jcoeff][0]
+ bix*snaptr->bvec[jcoeff];
double dbytmp = bi*snaptr->dbvec[jcoeff][1]
+ biy*snaptr->bvec[jcoeff];
double dbztmp = bi*snaptr->dbvec[jcoeff][2]
+ biz*snaptr->bvec[jcoeff];
snavi[ncount] += dbxtmp*xtmp;
snavi[ncount+nperdim] += dbytmp*ytmp;
snavi[ncount+2*nperdim] += dbztmp*ztmp;
@ -419,6 +396,5 @@ double ComputeSNAVAtom::memory_usage()
bytes += njmax*sizeof(int);
bytes += 6*nperdim*atom->ntypes;
if (quadraticflag) bytes += 6*nperdim*atom->ntypes;
bytes += snaptr[0]->memory_usage()*comm->nthreads;
return bytes;
}

View File

@ -44,9 +44,8 @@ class ComputeSNAVAtom : public Compute {
double rcutfac;
double *radelem;
double *wjelem;
class SNA** snaptr;
class SNA* snaptr;
int quadraticflag;
int nthreads;
};
}

View File

@ -1,16 +0,0 @@
#ifndef LMP_OPENMP_SNAP_H
#define LMP_OPENMP_SNAP_H
#if defined(_OPENMP)
#include <omp.h>
#else
enum omp_sched_t {omp_sched_static, omp_sched_dynamic, omp_sched_guided, omp_sched_auto};
inline int omp_get_thread_num() { return 0;}
inline int omp_set_num_threads(int num_threads) {return 1;}
/* inline int __sync_fetch_and_add(int* ptr, int value) {int tmp = *ptr; ptr[0]+=value; return tmp;} */
inline void omp_set_schedule(omp_sched_t schedule,int modifier=1) {}
inline int omp_in_parallel() {return 0;}
#endif
#endif

View File

@ -23,7 +23,6 @@
#include "neigh_list.h"
#include "neigh_request.h"
#include "sna.h"
#include "openmp_snap.h"
#include "domain.h"
#include "memory.h"
#include "error.h"
@ -55,51 +54,6 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp)
coeffelem = NULL;
nmax = 0;
nthreads = 1;
schedule_user = 0;
schedule_time_guided = -1;
schedule_time_dynamic = -1;
ncalls_neigh =-1;
ilistmask_max = 0;
ilistmask = NULL;
ghostinum = 0;
ghostilist_max = 0;
ghostilist = NULL;
ghostnumneigh_max = 0;
ghostnumneigh = NULL;
ghostneighs = NULL;
ghostfirstneigh = NULL;
ghostneighs_total = 0;
ghostneighs_max = 0;
i_max = 0;
i_neighmax = 0;
i_numpairs = 0;
i_rij = NULL;
i_inside = NULL;
i_wj = NULL;
i_rcutij = NULL;
i_ninside = NULL;
i_pairs = NULL;
i_uarraytot_r = NULL;
i_uarraytot_i = NULL;
i_zarray_r = NULL;
i_zarray_i = NULL;
use_shared_arrays = 0;
#ifdef TIMING_INFO
timers[0] = 0;
timers[1] = 0;
timers[2] = 0;
timers[3] = 0;
#endif
// Need to set this because restart not handled by PairHybrid
sna = NULL;
beta_max = 0;
beta = NULL;
@ -123,35 +77,7 @@ PairSNAP::~PairSNAP()
memory->destroy(beta);
memory->destroy(bispectrum);
// Need to set this because restart not handled by PairHybrid
if (sna) {
#ifdef TIMING_INFO
double time[5];
double timeave[5];
double timeave_mpi[5];
double timemax_mpi[5];
for (int i = 0; i < 5; i++) {
time[i] = 0;
timeave[i] = 0;
for (int tid = 0; tid<nthreads; tid++) {
if (sna[tid]->timers[i]>time[i])
time[i] = sna[tid]->timers[i];
timeave[i] += sna[tid]->timers[i];
}
timeave[i] /= nthreads;
}
MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world);
MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world);
#endif
for (int tid = 0; tid<nthreads; tid++)
delete sna[tid];
delete [] sna;
}
delete snaptr;
if (allocated) {
memory->destroy(setflag);
@ -161,22 +87,11 @@ PairSNAP::~PairSNAP()
}
void PairSNAP::compute(int eflag, int vflag)
{
// if (use_optimized)
// compute_optimized(eflag, vflag);
// else
// hard-code compute_regular()
compute_regular(eflag, vflag);
}
/* ----------------------------------------------------------------------
This version is a straightforward implementation
---------------------------------------------------------------------- */
void PairSNAP::compute_regular(int eflag, int vflag)
void PairSNAP::compute(int eflag, int vflag)
{
int i,j,jnum,ninside;
double delx,dely,delz,evdwl,rsq;
@ -191,7 +106,6 @@ void PairSNAP::compute_regular(int eflag, int vflag)
int *type = atom->type;
int nlocal = atom->nlocal;
int newton_pair = force->newton_pair;
class SNA* snaptr = sna[0];
if (beta_max < list->inum) {
memory->grow(beta,list->inum,ncoeff,"PairSNAP:beta");
@ -315,573 +229,6 @@ void PairSNAP::compute_regular(int eflag, int vflag)
if (vflag_fdotr) virial_fdotr_compute();
}
inline int PairSNAP::equal(double* x,double* y)
{
double dist2 =
(x[0]-y[0])*(x[0]-y[0]) +
(x[1]-y[1])*(x[1]-y[1]) +
(x[2]-y[2])*(x[2]-y[2]);
if (dist2 < 1e-20) return 1;
return 0;
}
inline double PairSNAP::dist2(double* x,double* y)
{
return
(x[0]-y[0])*(x[0]-y[0]) +
(x[1]-y[1])*(x[1]-y[1]) +
(x[2]-y[2])*(x[2]-y[2]);
}
// return extra communication cutoff
// extra_cutoff = max(subdomain_length)
double PairSNAP::extra_cutoff()
{
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
for (int dim = 0 ; dim < 3 ; dim++) {
sublo[dim] = domain->sublo[dim];
subhi[dim] = domain->subhi[dim];
}
} else {
domain->lamda2x(domain->sublo_lamda,sublo);
domain->lamda2x(domain->subhi_lamda,subhi);
}
double sub_size[3];
for (int dim = 0; dim < 3; dim++)
sub_size[dim] = subhi[dim] - sublo[dim];
double max_sub_size = 0;
for (int dim = 0; dim < 3; dim++)
max_sub_size = MAX(max_sub_size,sub_size[dim]);
// note: for triclinic, probably need something different
// see Comm::setup()
return max_sub_size;
}
// micro load_balancer: each MPI process will
// check with each of its 26 neighbors,
// whether an imbalance exists in the number
// of atoms to calculate forces for.
// If it does it will set ilistmask of one of
// its local atoms to zero, and send its Tag
// to the neighbor process. The neighboring process
// will check its ghost list for the
// ghost atom with the same Tag which is closest
// to its domain center, and build a
// neighborlist for this ghost atom. For this to work,
// the communication cutoff has to be
// as large as the neighbor cutoff +
// maximum subdomain length.
// Note that at most one atom is exchanged per processor pair.
// Also note that the local atom assignment
// doesn't change. This load balancer will cause
// some ghost atoms to have full neighborlists
// which are unique to PairSNAP.
// They are not part of the generally accessible neighborlist.
// At the same time corresponding local atoms on
// other MPI processes will not be
// included in the force computation since
// their ilistmask is 0. This does not effect
// any other classes which might
// access the same general neighborlist.
// Reverse communication (newton on) of forces is required.
// Currently the load balancer does two passes,
// since its exchanging atoms upstream and downstream.
void PairSNAP::load_balance()
{
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
double* sublotmp = domain->sublo;
double* subhitmp = domain->subhi;
for (int dim = 0 ; dim<3 ; dim++) {
sublo[dim]=sublotmp[dim];
subhi[dim]=subhitmp[dim];
}
} else {
double* sublotmp = domain->sublo_lamda;
double* subhitmp = domain->subhi_lamda;
domain->lamda2x(sublotmp,sublo);
domain->lamda2x(subhitmp,subhi);
}
//if (list->inum==0) list->grow(atom->nmax);
int nlocal = ghostinum;
for (int i=0; i < list->inum; i++)
if (ilistmask[i]) nlocal++;
int ***grid2proc = comm->grid2proc;
int* procgrid = comm->procgrid;
int nlocal_up,nlocal_down;
MPI_Request request;
double sub_mid[3];
for (int dim=0; dim<3; dim++)
sub_mid[dim] = (subhi[dim] + sublo[dim])/2;
if (comm->cutghostuser <
neighbor->cutneighmax+extra_cutoff())
error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing");
int nrecv = ghostinum;
int totalsend = 0;
int nsend = 0;
int depth = 1;
for (int dx = -depth; dx < depth+1; dx++)
for (int dy = -depth; dy < depth+1; dy++)
for (int dz = -depth; dz < depth+1; dz++) {
if (dx == dy && dy == dz && dz == 0) continue;
int sendloc[3] = {comm->myloc[0],
comm->myloc[1], comm->myloc[2]
};
sendloc[0] += dx;
sendloc[1] += dy;
sendloc[2] += dz;
for (int dim = 0; dim < 3; dim++)
if (sendloc[dim] >= procgrid[dim])
sendloc[dim] = sendloc[dim] - procgrid[dim];
for (int dim = 0; dim < 3; dim++)
if (sendloc[dim] < 0)
sendloc[dim] = procgrid[dim] + sendloc[dim];
int recvloc[3] = {comm->myloc[0],
comm->myloc[1], comm->myloc[2]
};
recvloc[0] -= dx;
recvloc[1] -= dy;
recvloc[2] -= dz;
for (int dim = 0; dim < 3; dim++)
if (recvloc[dim] < 0)
recvloc[dim] = procgrid[dim] + recvloc[dim];
for (int dim = 0; dim < 3; dim++)
if (recvloc[dim] >= procgrid[dim])
recvloc[dim] = recvloc[dim] - procgrid[dim];
int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]];
int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]];
// two stage process, first upstream movement, then downstream
MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
&nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
&nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
nsend = 0;
// send upstream
if (nlocal > nlocal_up+1) {
int i = totalsend++;
while(i < list->inum && ilistmask[i] == 0)
i = totalsend++;
if (i < list->inum)
MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request);
else {
int j = -1;
MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request);
}
if (i < list->inum) {
for (int j = 0; j < list->inum; j++)
if (list->ilist[j] == i)
ilistmask[j] = 0;
nsend = 1;
}
}
// recv downstream
if (nlocal < nlocal_down-1) {
nlocal++;
int get_tag = -1;
MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
// if get_tag -1 the other process didnt have local atoms to send
if (get_tag >= 0) {
if (ghostinum >= ghostilist_max) {
memory->grow(ghostilist,ghostinum+10,
"PairSnap::ghostilist");
ghostilist_max = ghostinum+10;
}
if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
ghostnumneigh_max = atom->nlocal+atom->nghost+100;
memory->grow(ghostnumneigh,ghostnumneigh_max,
"PairSnap::ghostnumneigh");
memory->grow(ghostfirstneigh,ghostnumneigh_max,
"PairSnap::ghostfirstneigh");
}
// find closest ghost image of the transfered particle
double mindist = 1e200;
int closestghost = -1;
for (int j = 0; j < atom->nlocal + atom->nghost; j++)
if (atom->tag[j] == get_tag)
if (dist2(sub_mid, atom->x[j]) < mindist) {
closestghost = j;
mindist = dist2(sub_mid, atom->x[j]);
}
// build neighborlist for this particular
// ghost atom, and add it to list->ilist
if (ghostneighs_max - ghostneighs_total <
neighbor->oneatom) {
memory->grow(ghostneighs,
ghostneighs_total + neighbor->oneatom,
"PairSnap::ghostneighs");
ghostneighs_max = ghostneighs_total + neighbor->oneatom;
}
int j = closestghost;
ghostilist[ghostinum] = j;
ghostnumneigh[j] = 0;
ghostfirstneigh[j] = ghostneighs_total;
ghostinum++;
int* jlist = ghostneighs + ghostfirstneigh[j];
// find all neighbors by looping
// over all local and ghost atoms
for (int k = 0; k < atom->nlocal + atom->nghost; k++)
if (dist2(atom->x[j],atom->x[k]) <
neighbor->cutneighmax*neighbor->cutneighmax) {
jlist[ghostnumneigh[j]] = k;
ghostnumneigh[j]++;
ghostneighs_total++;
}
}
if (get_tag >= 0) nrecv++;
}
// decrease nlocal later, so that it is the
// initial number both for receiving and sending
if (nsend) nlocal--;
// second pass through the grid
MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
&nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
&nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
// send downstream
nsend=0;
if (nlocal > nlocal_down+1) {
int i = totalsend++;
while(i < list->inum && ilistmask[i]==0) i = totalsend++;
if (i < list->inum)
MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request);
else {
int j =- 1;
MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request);
}
if (i < list->inum) {
for (int j=0; j<list->inum; j++)
if (list->ilist[j] == i) ilistmask[j] = 0;
nsend = 1;
}
}
// receive upstream
if (nlocal < nlocal_up-1) {
nlocal++;
int get_tag = -1;
MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
if (get_tag >= 0) {
if (ghostinum >= ghostilist_max) {
memory->grow(ghostilist,ghostinum+10,
"PairSnap::ghostilist");
ghostilist_max = ghostinum+10;
}
if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
ghostnumneigh_max = atom->nlocal + atom->nghost + 100;
memory->grow(ghostnumneigh,ghostnumneigh_max,
"PairSnap::ghostnumneigh");
memory->grow(ghostfirstneigh,ghostnumneigh_max,
"PairSnap::ghostfirstneigh");
}
// find closest ghost image of the transfered particle
double mindist = 1e200;
int closestghost = -1;
for (int j = 0; j < atom->nlocal + atom->nghost; j++)
if (atom->tag[j] == get_tag)
if (dist2(sub_mid,atom->x[j])<mindist) {
closestghost = j;
mindist = dist2(sub_mid,atom->x[j]);
}
// build neighborlist for this particular ghost atom
if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) {
memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom,
"PairSnap::ghostneighs");
ghostneighs_max = ghostneighs_total + neighbor->oneatom;
}
int j = closestghost;
ghostilist[ghostinum] = j;
ghostnumneigh[j] = 0;
ghostfirstneigh[j] = ghostneighs_total;
ghostinum++;
int* jlist = ghostneighs + ghostfirstneigh[j];
for (int k = 0; k < atom->nlocal + atom->nghost; k++)
if (dist2(atom->x[j],atom->x[k]) <
neighbor->cutneighmax*neighbor->cutneighmax) {
jlist[ghostnumneigh[j]] = k;
ghostnumneigh[j]++;
ghostneighs_total++;
}
}
if (get_tag >= 0) nrecv++;
}
if (nsend) nlocal--;
}
}
void PairSNAP::set_sna_to_shared(int snaid,int i)
{
sna[snaid]->rij = i_rij[i];
sna[snaid]->inside = i_inside[i];
sna[snaid]->wj = i_wj[i];
sna[snaid]->rcutij = i_rcutij[i];
sna[snaid]->zarray_r = i_zarray_r[i];
sna[snaid]->zarray_i = i_zarray_i[i];
sna[snaid]->uarraytot_r = i_uarraytot_r[i];
sna[snaid]->uarraytot_i = i_uarraytot_i[i];
}
void PairSNAP::build_per_atom_arrays()
{
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&starttime);
#endif
int count = 0;
int neighmax = 0;
for (int ii = 0; ii < list->inum; ii++)
if ((do_load_balance <= 0) || ilistmask[ii]) {
neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]);
++count;
}
for (int ii = 0; ii < ghostinum; ii++) {
neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]);
++count;
}
if (i_max < count || i_neighmax < neighmax) {
int i_maxt = MAX(count,i_max);
i_neighmax = MAX(neighmax,i_neighmax);
memory->destroy(i_rij);
memory->destroy(i_inside);
memory->destroy(i_wj);
memory->destroy(i_rcutij);
memory->destroy(i_ninside);
memory->destroy(i_pairs);
memory->create(i_rij,i_maxt,i_neighmax,3,"PairSNAP::i_rij");
memory->create(i_inside,i_maxt,i_neighmax,"PairSNAP::i_inside");
memory->create(i_wj,i_maxt,i_neighmax,"PairSNAP::i_wj");
memory->create(i_rcutij,i_maxt,i_neighmax,"PairSNAP::i_rcutij");
memory->create(i_ninside,i_maxt,"PairSNAP::i_ninside");
memory->create(i_pairs,i_maxt*i_neighmax,4,"PairSNAP::i_pairs");
}
if (i_max < count) {
int jdim = sna[0]->twojmax+1;
memory->destroy(i_uarraytot_r);
memory->destroy(i_uarraytot_i);
memory->create(i_uarraytot_r,count,jdim,jdim,jdim,
"PairSNAP::i_uarraytot_r");
memory->create(i_uarraytot_i,count,jdim,jdim,jdim,
"PairSNAP::i_uarraytot_i");
if (i_zarray_r != NULL)
for (int i = 0; i < i_max; i++) {
memory->destroy(i_zarray_r[i]);
memory->destroy(i_zarray_i[i]);
}
delete [] i_zarray_r;
delete [] i_zarray_i;
i_zarray_r = new double*****[count];
i_zarray_i = new double*****[count];
for (int i = 0; i < count; i++) {
memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim,
"PairSNAP::i_zarray_r");
memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim,
"PairSNAP::i_zarray_i");
}
}
if (i_max < count)
i_max = count;
count = 0;
i_numpairs = 0;
for (int ii = 0; ii < list->inum; ii++) {
if ((do_load_balance <= 0) || ilistmask[ii]) {
int i = list->ilist[ii];
int jnum = list->numneigh[i];
int* jlist = list->firstneigh[i];
const double xtmp = atom->x[i][0];
const double ytmp = atom->x[i][1];
const double ztmp = atom->x[i][2];
const int itype = atom->type[i];
const int ielem = map[itype];
const double radi = radelem[ielem];
int ninside = 0;
for (int jj = 0; jj < jnum; jj++) {
int j = jlist[jj];
j &= NEIGHMASK;
const double delx = atom->x[j][0] - xtmp;
const double dely = atom->x[j][1] - ytmp;
const double delz = atom->x[j][2] - ztmp;
const double rsq = delx*delx + dely*dely + delz*delz;
int jtype = atom->type[j];
int jelem = map[jtype];
i_pairs[i_numpairs][0] = i;
i_pairs[i_numpairs][1] = jj;
i_pairs[i_numpairs][2] = -1;
i_pairs[i_numpairs][3] = count;
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
i_rij[count][ninside][0] = delx;
i_rij[count][ninside][1] = dely;
i_rij[count][ninside][2] = delz;
i_inside[count][ninside] = j;
i_wj[count][ninside] = wjelem[jelem];
i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
// update index list with inside index
i_pairs[i_numpairs][2] = ninside++;
}
i_numpairs++;
}
i_ninside[count] = ninside;
count++;
}
}
for (int ii = 0; ii < ghostinum; ii++) {
int i = ghostilist[ii];
int jnum = ghostnumneigh[i];
int* jlist = ghostneighs+ghostfirstneigh[i];
const double xtmp = atom->x[i][0];
const double ytmp = atom->x[i][1];
const double ztmp = atom->x[i][2];
const int itype = atom->type[i];
const int ielem = map[itype];
const double radi = radelem[ielem];
int ninside = 0;
for (int jj = 0; jj < jnum; jj++) {
int j = jlist[jj];
j &= NEIGHMASK;
const double delx = atom->x[j][0] - xtmp;
const double dely = atom->x[j][1] - ytmp;
const double delz = atom->x[j][2] - ztmp;
const double rsq = delx*delx + dely*dely + delz*delz;
int jtype = atom->type[j];
int jelem = map[jtype];
i_pairs[i_numpairs][0] = i;
i_pairs[i_numpairs][1] = jj;
i_pairs[i_numpairs][2] = -1;
i_pairs[i_numpairs][3] = count;
if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
i_rij[count][ninside][0] = delx;
i_rij[count][ninside][1] = dely;
i_rij[count][ninside][2] = delz;
i_inside[count][ninside] = j;
i_wj[count][ninside] = wjelem[jelem];
i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
// update index list with inside index
i_pairs[i_numpairs][2] = ninside++;
}
i_numpairs++;
}
i_ninside[count] = ninside;
count++;
}
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&endtime);
timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
#endif
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&starttime);
#endif
#if defined(_OPENMP)
#pragma omp parallel for shared(count) default(none)
#endif
for (int ii=0; ii < count; ii++) {
int tid = omp_get_thread_num();
set_sna_to_shared(tid,ii);
//sna[tid]->compute_ui(i_ninside[ii]);
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&starttime);
#endif
sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1));
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&endtime);
sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
#endif
}
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&starttime);
#endif
for (int ii=0; ii < count; ii++) {
int tid = 0;//omp_get_thread_num();
set_sna_to_shared(tid,ii);
sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1));
}
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&endtime);
sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
#endif
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME,&endtime);
timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
(endtime.tv_nsec-starttime.tv_nsec)/1000000000);
#endif
}
/* ----------------------------------------------------------------------
compute beta
------------------------------------------------------------------------- */
@ -914,7 +261,6 @@ void PairSNAP::compute_bispectrum()
double **x = atom->x;
int *type = atom->type;
class SNA* snaptr = sna[0];
for (int ii = 0; ii < list->inum; ii++) {
i = list->ilist[ii];
@ -991,96 +337,8 @@ void PairSNAP::allocate()
void PairSNAP::settings(int narg, char **arg)
{
// set default values for optional arguments
nthreads = -1;
use_shared_arrays=-1;
do_load_balance = 0;
use_optimized = 1;
// optional arguments
for (int i=0; i < narg; i++) {
if (i+2>narg) error->all(FLERR,"Illegal pair_style command");
if (strcmp(arg[i],"nthreads")==0) {
nthreads=force->inumeric(FLERR,arg[++i]);
#if defined(LMP_USER_OMP)
error->all(FLERR,"Must set number of threads via package omp command");
#else
omp_set_num_threads(nthreads);
comm->nthreads=nthreads;
#endif
continue;
}
if (strcmp(arg[i],"optimized")==0) {
use_optimized=force->inumeric(FLERR,arg[++i]);
continue;
}
if (strcmp(arg[i],"shared")==0) {
use_shared_arrays=force->inumeric(FLERR,arg[++i]);
continue;
}
if (strcmp(arg[i],"loadbalance")==0) {
do_load_balance = force->inumeric(FLERR,arg[++i]);
if (do_load_balance) {
double mincutoff = extra_cutoff() +
rcutmax + neighbor->skin;
if (comm->cutghostuser < mincutoff) {
char buffer[255];
//apparently mincutoff is 0 after sprintf command ?????
double tmp = mincutoff + 0.1;
sprintf(buffer, "Communication cutoff is too small "
"for SNAP micro load balancing, increased to %lf",
mincutoff+0.1);
if (comm->me==0)
error->warning(FLERR,buffer);
comm->cutghostuser = tmp;
}
}
continue;
}
if (strcmp(arg[i],"schedule")==0) {
i++;
if (strcmp(arg[i],"static")==0)
schedule_user = 1;
if (strcmp(arg[i],"dynamic")==0)
schedule_user = 2;
if (strcmp(arg[i],"guided")==0)
schedule_user = 3;
if (strcmp(arg[i],"auto")==0)
schedule_user = 4;
if (strcmp(arg[i],"determine")==0)
schedule_user = 5;
if (schedule_user == 0)
error->all(FLERR,"Illegal pair_style command");
continue;
}
for (int i=0; i < narg; i++)
error->all(FLERR,"Illegal pair_style command");
}
if (nthreads < 0)
nthreads = comm->nthreads;
if (use_shared_arrays < 0) {
if (nthreads > 1 && atom->nlocal <= 2*nthreads)
use_shared_arrays = 1;
else use_shared_arrays = 0;
}
// check if running non-optimized code with
// optimization flags set
if (!use_optimized)
if (nthreads > 1 ||
use_shared_arrays ||
do_load_balance ||
schedule_user)
error->all(FLERR,"Illegal pair_style command");
}
/* ----------------------------------------------------------------------
@ -1170,26 +428,14 @@ void PairSNAP::coeff(int narg, char **arg)
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
sna = new SNA*[nthreads];
snaptr = new SNA(lmp,rfac0,twojmax,
diagonalstyle,
rmin0,switchflag,bzeroflag);
snaptr->grow_rij(nmax);
// allocate memory for per OpenMP thread data which
// is wrapped into the sna class
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int tid = omp_get_thread_num();
sna[tid] = new SNA(lmp,rfac0,twojmax,
diagonalstyle,use_shared_arrays,
rmin0,switchflag,bzeroflag);
if (!use_shared_arrays)
sna[tid]->grow_rij(nmax);
}
if (ncoeff != sna[0]->ncoeff) {
if (ncoeff != snaptr->ncoeff) {
if (comm->me == 0)
printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff);
printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff);
error->all(FLERR,"Incorrect SNAP parameter file");
}
@ -1216,13 +462,7 @@ void PairSNAP::init_style()
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int tid = omp_get_thread_num();
sna[tid]->init();
}
snaptr->init();
}
@ -1370,6 +610,8 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
}
}
if (comm->me == 0) fclose(fpcoeff);
// set flags for required keywords
rcutfacflag = 0;
@ -1471,7 +713,6 @@ double PairSNAP::memory_usage()
bytes += nmax*sizeof(int);
bytes += (2*ncoeffall)*sizeof(double);
bytes += (ncoeff*3)*sizeof(double);
bytes += sna[0]->memory_usage()*nthreads;
return bytes;
}

View File

@ -29,8 +29,6 @@ public:
PairSNAP(class LAMMPS *);
~PairSNAP();
virtual void compute(int, int);
void compute_regular(int, int);
void compute_optimized(int, int);
void settings(int, char **);
virtual void coeff(int, char **);
virtual void init_style();
@ -43,59 +41,16 @@ public:
protected:
int ncoeffq, ncoeffall;
double **bvec, ***dbvec;
class SNA** sna;
class SNA* snaptr;
int nmax;
int nthreads;
virtual void allocate();
void read_files(char *, char *);
inline int equal(double* x,double* y);
inline double dist2(double* x,double* y);
double extra_cutoff();
void load_balance();
void set_sna_to_shared(int snaid,int i);
void build_per_atom_arrays();
void compute_beta();
void compute_bispectrum();
int schedule_user;
double schedule_time_guided;
double schedule_time_dynamic;
int ncalls_neigh;
int do_load_balance;
int ilistmask_max;
int* ilistmask;
int ghostinum;
int ghostilist_max;
int* ghostilist;
int ghostnumneigh_max;
int* ghostnumneigh;
int* ghostneighs;
int* ghostfirstneigh;
int ghostneighs_total;
int ghostneighs_max;
int use_optimized;
int use_shared_arrays;
int i_max;
int i_neighmax;
int i_numpairs;
int **i_pairs;
double ***i_rij;
int **i_inside;
double **i_wj;
double **i_rcutij;
int *i_ninside;
double ****i_uarraytot_r, ****i_uarraytot_i;
double ******i_zarray_r, ******i_zarray_i;
#ifdef TIMING_INFO
// timespec starttime, endtime;
double timers[4];
#endif
double rcutmax; // max cutoff for all elements
int nelements; // # of unique elements
char **elements; // names of unique elements
@ -130,15 +85,6 @@ Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Must set number of threads via package omp command
Because you are using the USER-OMP package, set the number of threads
via its settings, not by the pair_style snap nthreads setting.
W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf
Self-explanatory.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.

View File

@ -21,7 +21,6 @@
#include "math_extra.h"
#include <cstring>
#include <cstdlib>
#include "openmp_snap.h"
#include "memory.h"
#include "error.h"
@ -114,12 +113,11 @@ using namespace MathConst;
------------------------------------------------------------------------- */
SNA::SNA(LAMMPS* lmp, double rfac0_in,
int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in,
int twojmax_in, int diagonalstyle_in,
double rmin0_in, int switch_flag_in, int bzero_flag_in) : Pointers(lmp)
{
wself = 1.0;
use_shared_arrays = use_shared_arrays_in;
rfac0 = rfac0_in;
rmin0 = rmin0_in;
switch_flag = switch_flag_in;
@ -141,7 +139,8 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in,
wj = NULL;
rcutij = NULL;
nmax = 0;
idxj = NULL;
idxz = NULL;
idxb= NULL;
if (bzero_flag) {
double www = wself*wself*wself;
@ -149,133 +148,178 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in,
bzero[j] = www*(j+1);
}
#ifdef TIMING_INFO
timers = new double[20];
for(int i = 0; i < 20; i++) timers[i] = 0;
print = 0;
counter = 0;
#endif
build_indexlist();
}
/* ---------------------------------------------------------------------- */
SNA::~SNA()
{
if(!use_shared_arrays) {
destroy_twojmax_arrays();
memory->destroy(rij);
memory->destroy(inside);
memory->destroy(wj);
memory->destroy(rcutij);
memory->destroy(bvec);
memory->destroy(dbvec);
}
delete[] idxj;
destroy_twojmax_arrays();
memory->destroy(rij);
memory->destroy(inside);
memory->destroy(wj);
memory->destroy(rcutij);
memory->destroy(bvec);
memory->destroy(dbvec);
delete[] idxz;
delete[] idxb;
}
void SNA::build_indexlist()
{
if(diagonalstyle == 0) {
int idxj_count = 0;
if(diagonalstyle != 3)
error->all(FLERR, "diagonal_style must be 3\n");
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
idxj_count++;
// index list for cglist
// indexList can be changed here
int jdim = twojmax + 1;
memory->create(idxcg_block, jdim, jdim, jdim,
"sna:idxcg_block");
idxj = new SNA_LOOPINDICES[idxj_count];
idxj_max = idxj_count;
int idxcg_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
idxcg_block[j1][j2][j] = idxcg_count;
for (int m1 = 0; m1 <= j1; m1++)
for (int m2 = 0; m2 <= j2; m2++)
idxcg_count++;
}
idxcg_max = idxcg_count;
idxj_count = 0;
// index list for uarray
// need to include both halves
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
idxj[idxj_count].j1 = j1;
idxj[idxj_count].j2 = j2;
idxj[idxj_count].j = j;
idxj_count++;
memory->create(idxu_block, jdim,
"sna:idxu_block");
int idxu_count = 0;
for(int j = 0; j <= twojmax; j++) {
idxu_block[j] = idxu_count;
for(int mb = 0; mb <= j; mb++)
for(int ma = 0; ma <= j; ma++)
idxu_count++;
}
idxu_max = idxu_count;
// index list for beta and B
int idxb_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) idxb_count++;
idxb_max = idxb_count;
idxb = new SNA_BINDICES[idxb_max];
idxb_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) {
idxb[idxb_count].j1 = j1;
idxb[idxb_count].j2 = j2;
idxb[idxb_count].j = j;
idxb_count++;
}
}
if(diagonalstyle == 1) {
int idxj_count = 0;
// reverse index list for beta and b
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
idxj_count++;
memory->create(idxb_block, jdim, jdim, jdim,
"sna:idxb_block");
idxb_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
if (j < j1) continue;
idxb_block[j1][j2][j] = idxb_count;
idxb_count++;
}
// indexList can be changed here
// index list for zlist
idxj = new SNA_LOOPINDICES[idxj_count];
idxj_max = idxj_count;
int idxz_count = 0;
idxj_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
for (int mb = 0; 2*mb <= j; mb++)
for (int ma = 0; ma <= j; ma++)
idxz_count++;
idxz_max = idxz_count;
idxz = new SNA_ZINDICES[idxz_max];
memory->create(idxz_block, jdim, jdim, jdim,
"sna:idxz_block");
idxz_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
idxz_block[j1][j2][j] = idxz_count;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
idxj[idxj_count].j1 = j1;
idxj[idxj_count].j2 = j1;
idxj[idxj_count].j = j;
idxj_count++;
}
}
// find right beta[jjb] entry
// multiply and divide by j+1 factors
// account for multiplicity of 1, 2, or 3
if(diagonalstyle == 2) {
int idxj_count = 0;
// CODE HORROR!! Need to figure this out later
double betaj = 1.0;
// #ifdef USE_YDIRECT_ZLIST
// double betaj;
// if (j >= j1) {
// const int jjb = idxb_block[j1][j2][j];
// if (j1 == j) {
// if (j2 == j) betaj = 3*beta[jjb];
// else betaj = 2*beta[jjb];
// } else betaj = beta[jjb];
// } else if (j >= j2) {
// const int jjb = idxb_block[j][j2][j1];
// if (j2 == j) betaj = 2*beta[jjb]*(j1+1)/(j+1.0);
// else betaj = beta[jjb]*(j1+1)/(j+1.0);
// } else {
// const int jjb = idxb_block[j2][j][j1];
// betaj = beta[jjb]*(j1+1)/(j+1.0);
// }
// #else
// double betaj;
// if (j >= j1) {
// const int jjb = idxb_block[j1][j2][j];
// betaj = beta[jjb];
// } else if (j >= j2) {
// const int jjb = idxb_block[j][j2][j1];
// betaj = beta[jjb]*(j1+1)/(j+1.0);
// } else {
// const int jjb = idxb_block[j2][j][j1];
// betaj = beta[jjb]*(j1+1)/(j+1.0);
// }
// #endif
for(int j1 = 0; j1 <= twojmax; j1++) {
idxj_count++;
}
for (int mb = 0; 2*mb <= j; mb++)
for (int ma = 0; ma <= j; ma++) {
idxz[idxz_count].j1 = j1;
idxz[idxz_count].j2 = j2;
idxz[idxz_count].j = j;
idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2);
idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2;
idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1;
idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2);
idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2;
idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1;
// indexList can be changed here
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
// find right beta[jjb] and y_list[jju] entries
idxj = new SNA_LOOPINDICES[idxj_count];
idxj_max = idxj_count;
const int jju = idxu_block[j] + (j+1)*mb + ma;
idxz[idxz_count].jju = jju;
idxz[idxz_count].betaj = betaj;
idxj_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++) {
idxj[idxj_count].j1 = j1;
idxj[idxj_count].j2 = j1;
idxj[idxj_count].j = j1;
idxj_count++;
}
}
if(diagonalstyle == 3) {
int idxj_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) idxj_count++;
// indexList can be changed here
idxj = new SNA_LOOPINDICES[idxj_count];
idxj_max = idxj_count;
idxj_count = 0;
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
if (j >= j1) {
idxj[idxj_count].j1 = j1;
idxj[idxj_count].j2 = j2;
idxj[idxj_count].j = j;
idxj_count++;
idxz_count++;
}
}
}
}
/* ---------------------------------------------------------------------- */
@ -292,16 +336,14 @@ void SNA::grow_rij(int newnmax)
nmax = newnmax;
if(!use_shared_arrays) {
memory->destroy(rij);
memory->destroy(inside);
memory->destroy(wj);
memory->destroy(rcutij);
memory->create(rij, nmax, 3, "pair:rij");
memory->create(inside, nmax, "pair:inside");
memory->create(wj, nmax, "pair:wj");
memory->create(rcutij, nmax, "pair:rcutij");
}
memory->destroy(rij);
memory->destroy(inside);
memory->destroy(wj);
memory->destroy(rcutij);
memory->create(rij, nmax, 3, "pair:rij");
memory->create(inside, nmax, "pair:inside");
memory->create(wj, nmax, "pair:wj");
memory->create(rcutij, nmax, "pair:rcutij");
}
/* ----------------------------------------------------------------------
compute Ui by summing over neighbors j
@ -320,10 +362,6 @@ void SNA::compute_ui(int jnum)
zero_uarraytot();
addself_uarraytot(wself);
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &starttime);
#endif
for(int j = 0; j < jnum; j++) {
x = rij[j][0];
y = rij[j][1];
@ -339,48 +377,6 @@ void SNA::compute_ui(int jnum)
add_uarraytot(r, wj[j], rcutij[j]);
}
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &endtime);
timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
#endif
}
void SNA::compute_ui_omp(int jnum, int sub_threads)
{
double rsq, r, x, y, z, z0, theta0;
// utot(j,ma,mb) = 0 for all j,ma,ma
// utot(j,ma,ma) = 1 for all j,ma
// for j in neighbors of i:
// compute r0 = (x,y,z,z0)
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
zero_uarraytot();
addself_uarraytot(wself);
for(int j = 0; j < jnum; j++) {
x = rij[j][0];
y = rij[j][1];
z = rij[j][2];
rsq = x * x + y * y + z * z;
r = sqrt(rsq);
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
// theta0 = (r - rmin0) * rscale0;
z0 = r / tan(theta0);
omp_set_num_threads(sub_threads);
#if defined(_OPENMP)
#pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none)
#endif
{
compute_uarray_omp(x, y, z, z0, r, sub_threads);
}
add_uarraytot(r, wj[j], rcutij[j]);
}
}
/* ----------------------------------------------------------------------
@ -389,24 +385,6 @@ void SNA::compute_ui_omp(int jnum, int sub_threads)
void SNA::compute_zi()
{
// for j1 = 0,...,twojmax
// for j2 = 0,twojmax
// for j = |j1-j2|,Min(twojmax,j1+j2),2
// for ma = 0,...,j
// for mb = 0,...,jmid
// z(j1,j2,j,ma,mb) = 0
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
// sumb1 = 0
// ma2 = ma-ma1+(j1+j2-j)/2;
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
// mb2 = mb-mb1+(j1+j2-j)/2;
// sumb1 += cg(j1,mb1,j2,mb2,j) *
// u(j1,ma1,mb1) * u(j2,ma2,mb2)
// z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &starttime);
#endif
// compute_dbidrj() requires full j1/j2/j chunk of z elements
// use zarray j1/j2 symmetry
@ -449,84 +427,13 @@ void SNA::compute_zi()
} // end loop over j
} // end loop over j1, j2
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &endtime);
timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
#endif
}
void SNA::compute_zi_omp(int sub_threads)
{
// for j1 = 0,...,twojmax
// for j2 = 0,twojmax
// for j = |j1-j2|,Min(twojmax,j1+j2),2
// for ma = 0,...,j
// for mb = 0,...,j
// z(j1,j2,j,ma,mb) = 0
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
// sumb1 = 0
// ma2 = ma-ma1+(j1+j2-j)/2;
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
// mb2 = mb-mb1+(j1+j2-j)/2;
// sumb1 += cg(j1,mb1,j2,mb2,j) *
// u(j1,ma1,mb1) * u(j2,ma2,mb2)
// z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
if(omp_in_parallel())
omp_set_num_threads(sub_threads);
// compute_dbidrj() requires full j1/j2/j chunk of z elements
// use zarray j1/j2 symmetry
#if defined(_OPENMP)
#pragma omp parallel for schedule(auto) default(none)
#endif
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++)
for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
double sumb1_r, sumb1_i;
int ma2, mb2;
for(int ma = 0; ma <= j; ma++)
for(int mb = 0; mb <= j; mb++) {
zarray_r[j1][j2][j][ma][mb] = 0.0;
zarray_i[j1][j2][j][ma][mb] = 0.0;
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) {
sumb1_r = 0.0;
sumb1_i = 0.0;
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2);
mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) {
mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2;
sumb1_r += cgarray[j1][j2][j][mb1][mb2] *
(uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] -
uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]);
sumb1_i += cgarray[j1][j2][j][mb1][mb2] *
(uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] +
uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]);
}
zarray_r[j1][j2][j][ma][mb] +=
sumb1_r * cgarray[j1][j2][j][ma1][ma2];
zarray_i[j1][j2][j][ma][mb] +=
sumb1_i * cgarray[j1][j2][j][ma1][ma2];
}
}
}
}
/* ----------------------------------------------------------------------
compute Yi by summing over products of beta and Zi
------------------------------------------------------------------------- */
void SNA::compute_yi(double* beta)
void SNA::compute_yi(const double* beta)
{
int j;
int idxz_count;
@ -540,18 +447,18 @@ void SNA::compute_yi(double* beta)
} // end loop over ma, mb
} // end loop over j
for(int JJ = 0; JJ < idxj_max; JJ++) {
const int j1 = idxj[JJ].j1;
const int j2 = idxj[JJ].j2;
const int j3 = idxj[JJ].j;
for(int jjb = 0; jjb < idxb_max; jjb++) {
const int j1 = idxb[jjb].j1;
const int j2 = idxb[jjb].j2;
const int j3 = idxb[jjb].j;
j = j3;
jjjzarray_r = zarray_r[j1][j2][j3];
jjjzarray_i = zarray_i[j1][j2][j3];
for(int mb = 0; 2*mb <= j; mb++)
for(int ma = 0; ma <= j; ma++) {
yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb];
yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb];
yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb];
yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb];
} // end loop over ma, mb
j = j1;
@ -560,8 +467,8 @@ void SNA::compute_yi(double* beta)
double j1fac = (j3+1)/(j+1.0);
for(int mb = 0; 2*mb <= j; mb++)
for(int ma = 0; ma <= j; ma++) {
yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j1fac;
yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j1fac;
yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j1fac;
yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j1fac;
} // end loop over ma, mb
j = j2;
@ -570,8 +477,8 @@ void SNA::compute_yi(double* beta)
double j2fac = (j3+1)/(j+1.0);
for(int mb = 0; 2*mb <= j; mb++)
for(int ma = 0; ma <= j; ma++) {
yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j2fac;
yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j2fac;
yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j2fac;
yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j2fac;
} // end loop over ma, mb
} // end loop over jjb
@ -655,10 +562,6 @@ void SNA::compute_bi()
// b(j1,j2,j) +=
// 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &starttime);
#endif
for(int j1 = 0; j1 <= twojmax; j1++)
for(int j2 = 0; j2 <= j1; j2++) {
for(int j = abs(j1 - j2);
@ -691,12 +594,6 @@ void SNA::compute_bi()
}
}
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &endtime);
timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
#endif
}
/* ----------------------------------------------------------------------
@ -760,164 +657,8 @@ void SNA::compute_duidrj(double* rij, double wj, double rcut)
z0 = r * cs / sn;
dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &starttime);
#endif
compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut);
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &endtime);
timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
#endif
}
/* ----------------------------------------------------------------------
calculate derivative of Bi w.r.t. atom j
variant using indexlist for j1,j2,j
variant not using symmetry relation
------------------------------------------------------------------------- */
void SNA::compute_dbidrj_nonsymm()
{
// for j1 = 0,...,twojmax
// for j2 = 0,twojmax
// for j = |j1-j2|,Min(twojmax,j1+j2),2
// dbdr(j1,j2,j) = 0
// for ma = 0,...,j
// for mb = 0,...,j
// dzdr = 0
// for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
// sumb1 = 0
// ma2 = ma-ma1+(j1+j2-j)/2;
// for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
// mb2 = mb-mb1+(j1+j2-j)/2;
// sumb1 += cg(j1,mb1,j2,mb2,j) *
// (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) +
// u(j1,ma1,mb1) * dudr(j2,ma2,mb2))
// dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
// dbdr(j1,j2,j) +=
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
// Conj(u(j,ma,mb))*dzdr
double* dbdr;
double* dudr_r, *dudr_i;
double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3];
int ma2;
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &starttime);
#endif
for(int JJ = 0; JJ < idxj_max; JJ++) {
const int j1 = idxj[JJ].j1;
const int j2 = idxj[JJ].j2;
const int j = idxj[JJ].j;
dbdr = dbarray[j1][j2][j];
dbdr[0] = 0.0;
dbdr[1] = 0.0;
dbdr[2] = 0.0;
double** *j1duarray_r = duarray_r[j1];
double** *j2duarray_r = duarray_r[j2];
double** *j1duarray_i = duarray_i[j1];
double** *j2duarray_i = duarray_i[j2];
double** j1uarraytot_r = uarraytot_r[j1];
double** j2uarraytot_r = uarraytot_r[j2];
double** j1uarraytot_i = uarraytot_i[j1];
double** j2uarraytot_i = uarraytot_i[j2];
double** j1j2jcgarray = cgarray[j1][j2][j];
for(int ma = 0; ma <= j; ma++)
for(int mb = 0; mb <= j; mb++) {
dzdr_r[0] = 0.0;
dzdr_r[1] = 0.0;
dzdr_r[2] = 0.0;
dzdr_i[0] = 0.0;
dzdr_i[1] = 0.0;
dzdr_i[2] = 0.0;
const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1;
const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1;
for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
ma1 < max_ma1; ma1++) {
ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
sumb1_r[0] = 0.0;
sumb1_r[1] = 0.0;
sumb1_r[2] = 0.0;
sumb1_i[0] = 0.0;
sumb1_i[1] = 0.0;
sumb1_i[2] = 0.0;
//inside loop 54 operations (mul and add)
for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2),
mb2 = mb + (j1 + j2 - j) / 2 - mb1;
mb1 < max_mb1; mb1++, mb2--) {
double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i;
dudr1_r = j1duarray_r[ma1][mb1];
dudr2_r = j2duarray_r[ma2][mb2];
dudr1_i = j1duarray_i[ma1][mb1];
dudr2_i = j2duarray_i[ma2][mb2];
const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2];
const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2];
const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1];
const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2];
const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1];
for(int k = 0; k < 3; k++) {
sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2;
sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2;
sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2;
sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2;
sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1;
sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1;
sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1;
sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1;
}
} // end loop over mb1,mb2
// dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2];
dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2];
dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2];
dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2];
dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2];
dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2];
} // end loop over ma1,ma2
// dbdr(j1,j2,j) +=
// Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
// Conj(u(j,ma,mb))*dzdr
dudr_r = duarray_r[j][ma][mb];
dudr_i = duarray_i[j][ma][mb];
for(int k = 0; k < 3; k++)
dbdr[k] +=
(dudr_r[k] * zarray_r[j1][j2][j][ma][mb] +
dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) +
(uarraytot_r[j][ma][mb] * dzdr_r[k] +
uarraytot_i[j][ma][mb] * dzdr_i[k]);
} //end loop over ma mb
} //end loop over j1 j2 j
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &endtime);
timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
#endif
}
/* ----------------------------------------------------------------------
@ -958,14 +699,10 @@ void SNA::compute_dbidrj()
double jjjmambzarray_r;
double jjjmambzarray_i;
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &starttime);
#endif
for(int JJ = 0; JJ < idxj_max; JJ++) {
const int j1 = idxj[JJ].j1;
const int j2 = idxj[JJ].j2;
const int j = idxj[JJ].j;
for(int jjb = 0; jjb < idxb_max; jjb++) {
const int j1 = idxb[jjb].j1;
const int j2 = idxb[jjb].j2;
const int j = idxb[jjb].j;
dbdr = dbarray[j1][j2][j];
dbdr[0] = 0.0;
@ -1149,12 +886,6 @@ void SNA::compute_dbidrj()
} //end loop over j1 j2 j
#ifdef TIMING_INFO
clock_gettime(CLOCK_REALTIME, &endtime);
timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
(endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
#endif
}
/* ----------------------------------------------------------------------
@ -1251,27 +982,6 @@ void SNA::add_uarraytot(double r, double wj, double rcut)
}
}
void SNA::add_uarraytot_omp(double r, double wj, double rcut)
{
double sfac;
sfac = compute_sfac(r, rcut);
sfac *= wj;
#if defined(_OPENMP)
#pragma omp for
#endif
for (int j = 0; j <= twojmax; j++)
for (int ma = 0; ma <= j; ma++)
for (int mb = 0; mb <= j; mb++) {
uarraytot_r[j][ma][mb] +=
sfac * uarray_r[j][ma][mb];
uarraytot_i[j][ma][mb] +=
sfac * uarray_i[j][ma][mb];
}
}
/* ----------------------------------------------------------------------
compute Wigner U-functions for one neighbor
------------------------------------------------------------------------- */
@ -1348,88 +1058,6 @@ void SNA::compute_uarray(double x, double y, double z,
}
}
void SNA::compute_uarray_omp(double x, double y, double z,
double z0, double r, int /*sub_threads*/)
{
double r0inv;
double a_r, b_r, a_i, b_i;
double rootpq;
// compute Cayley-Klein parameters for unit quaternion
r0inv = 1.0 / sqrt(r * r + z0 * z0);
a_r = r0inv * z0;
a_i = -r0inv * z;
b_r = r0inv * y;
b_i = -r0inv * x;
// VMK Section 4.8.2
uarray_r[0][0][0] = 1.0;
uarray_i[0][0][0] = 0.0;
for (int j = 1; j <= twojmax; j++) {
#if defined(_OPENMP)
#pragma omp for
#endif
for (int mb = 0; mb < j; mb++) {
uarray_r[j][0][mb] = 0.0;
uarray_i[j][0][mb] = 0.0;
for (int ma = 0; ma < j; ma++) {
rootpq = rootpqarray[j - ma][j - mb];
uarray_r[j][ma][mb] +=
rootpq *
(a_r * uarray_r[j - 1][ma][mb] +
a_i * uarray_i[j - 1][ma][mb]);
uarray_i[j][ma][mb] +=
rootpq *
(a_r * uarray_i[j - 1][ma][mb] -
a_i * uarray_r[j - 1][ma][mb]);
rootpq = rootpqarray[ma + 1][j - mb];
uarray_r[j][ma + 1][mb] =
-rootpq *
(b_r * uarray_r[j - 1][ma][mb] +
b_i * uarray_i[j - 1][ma][mb]);
uarray_i[j][ma + 1][mb] =
-rootpq *
(b_r * uarray_i[j - 1][ma][mb] -
b_i * uarray_r[j - 1][ma][mb]);
}
}
int mb = j;
uarray_r[j][0][mb] = 0.0;
uarray_i[j][0][mb] = 0.0;
#if defined(_OPENMP)
#pragma omp for
#endif
for (int ma = 0; ma < j; ma++) {
rootpq = rootpqarray[j - ma][mb];
uarray_r[j][ma][mb] +=
rootpq *
(b_r * uarray_r[j - 1][ma][mb - 1] -
b_i * uarray_i[j - 1][ma][mb - 1]);
uarray_i[j][ma][mb] +=
rootpq *
(b_r * uarray_i[j - 1][ma][mb - 1] +
b_i * uarray_r[j - 1][ma][mb - 1]);
rootpq = rootpqarray[ma + 1][mb];
uarray_r[j][ma + 1][mb] =
rootpq *
(a_r * uarray_r[j - 1][ma][mb - 1] -
a_i * uarray_i[j - 1][ma][mb - 1]);
uarray_i[j][ma + 1][mb] =
rootpq *
(a_r * uarray_i[j - 1][ma][mb - 1] +
a_i * uarray_r[j - 1][ma][mb - 1]);
}
}
}
/* ----------------------------------------------------------------------
compute derivatives of Wigner U-functions for one neighbor
see comments in compute_uarray()
@ -1644,20 +1272,18 @@ void SNA::create_twojmax_arrays()
bzero = NULL;
if(!use_shared_arrays) {
memory->create(uarraytot_r, jdim, jdim, jdim,
"sna:uarraytot");
memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim,
"sna:zarray");
memory->create(uarraytot_i, jdim, jdim, jdim,
"sna:uarraytot");
memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim,
"sna:zarray");
memory->create(yarray_r, jdim, jdim, jdim,
"sna:yarray");
memory->create(yarray_i, jdim, jdim, jdim,
"sna:yarray");
}
memory->create(uarraytot_r, jdim, jdim, jdim,
"sna:uarraytot");
memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim,
"sna:zarray");
memory->create(uarraytot_i, jdim, jdim, jdim,
"sna:uarraytot");
memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim,
"sna:zarray");
memory->create(yarray_r, jdim, jdim, jdim,
"sna:yarray");
memory->create(yarray_i, jdim, jdim, jdim,
"sna:yarray");
}
@ -1680,14 +1306,12 @@ void SNA::destroy_twojmax_arrays()
if (bzero_flag)
memory->destroy(bzero);
if(!use_shared_arrays) {
memory->destroy(uarraytot_r);
memory->destroy(zarray_r);
memory->destroy(uarraytot_i);
memory->destroy(zarray_i);
memory->destroy(yarray_r);
memory->destroy(yarray_i);
}
memory->destroy(uarraytot_r);
memory->destroy(zarray_r);
memory->destroy(uarraytot_i);
memory->destroy(zarray_i);
memory->destroy(yarray_r);
memory->destroy(yarray_i);
}
/* ----------------------------------------------------------------------

View File

@ -24,14 +24,19 @@
namespace LAMMPS_NS {
struct SNA_LOOPINDICES {
struct SNA_ZINDICES {
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
double betaj;
};
struct SNA_BINDICES {
int j1, j2, j;
};
class SNA : protected Pointers {
public:
SNA(LAMMPS*, double, int, int, int, double, int, int);
SNA(LAMMPS*, double, int, int, double, int, int);
SNA(LAMMPS* lmp) : Pointers(lmp) {};
~SNA();
@ -44,10 +49,8 @@ public:
// functions for bispectrum coefficients
void compute_ui(int);
void compute_ui_omp(int, int);
void compute_zi();
void compute_zi_omp(int);
void compute_yi(double*);
void compute_yi(const double*);
void compute_bi();
void copy_bi2bvec();
@ -56,20 +59,10 @@ public:
void compute_duidrj(double*, double, double);
void compute_dbidrj();
void compute_deidrj(double*);
void compute_dbidrj_nonsymm();
void copy_dbi2dbvec();
double compute_sfac(double, double);
double compute_dsfac(double, double);
#ifdef TIMING_INFO
double* timers;
timespec starttime, endtime;
int print;
int counter;
#endif
//per sna class instance for OMP use
double* bvec, ** dbvec;
double** rij;
int* inside;
@ -83,16 +76,17 @@ public:
double*** uarraytot_r, *** uarraytot_i;
double***** zarray_r, ***** zarray_i;
double*** yarray_r, *** yarray_i;
double*** uarraytot_r_b, *** uarraytot_i_b;
double***** zarray_r_b, ***** zarray_i_b;
double*** uarray_r, *** uarray_i;
private:
double rmin0, rfac0;
//use indexlist instead of loops, constructor generates these
SNA_LOOPINDICES* idxj;
int idxj_max;
// use indexlist instead of loops, constructor generates these
SNA_ZINDICES* idxz;
SNA_BINDICES* idxb;
int idxcg_max, idxu_max, idxz_max, idxb_max;
// data for bispectrum coefficients
double***** cgarray;
@ -104,6 +98,21 @@ private:
double**** duarray_r, **** duarray_i;
double**** dbarray;
double* cglist;
int*** idxcg_block;
double* ulisttot_r, * ulisttot_i;
double* ulist_r, * ulist_i;
int* idxu_block;
double* zlist_r, * zlist_i;
int*** idxz_block;
int*** idxb_block;
double** dulist_r, ** dulist_i;
double* ylist_r, * ylist_i;
static const int nmaxfactorial = 167;
static const double nfac_table[];
double factorial(int);
@ -118,22 +127,13 @@ private:
void zero_uarraytot();
void addself_uarraytot(double);
void add_uarraytot(double, double, double);
void add_uarraytot_omp(double, double, double);
void compute_uarray(double, double, double,
double, double);
void compute_uarray_omp(double, double, double,
double, double, int);
double deltacg(int, int, int);
int compute_ncoeff();
void compute_duarray(double, double, double,
double, double, double, double, double);
// if number of atoms are small use per atom arrays
// for twojmax arrays, rij, inside, bvec
// this will increase the memory footprint considerably,
// but allows parallel filling and reuse of these arrays
int use_shared_arrays;
// Sets the style for the switching function
// 0 = none
// 1 = cosine