From a243be2dc9c25e7fc175d899b2f2d796e9cb7bd6 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Sun, 21 Apr 2019 22:10:03 -0600 Subject: [PATCH 01/21] Added bare-bones yarray algorithm, 2x speedup --- src/SNAP/pair_snap.cpp | 84 ++++++++++++++++------------ src/SNAP/sna.cpp | 124 +++++++++++++++++++++++++++++++++++++++++ src/SNAP/sna.h | 3 + 3 files changed, 174 insertions(+), 37 deletions(-) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 8b547e6e73..73faaa71f7 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -158,9 +158,12 @@ PairSNAP::~PairSNAP() void PairSNAP::compute(int eflag, int vflag) { - if (use_optimized) - compute_optimized(eflag, vflag); - else +// if (use_optimized) +// compute_optimized(eflag, vflag); +// else + +// hard-code compute_regular() + compute_regular(eflag, vflag); } @@ -248,51 +251,58 @@ void PairSNAP::compute_regular(int eflag, int vflag) double* coeffi = coeffelem[ielem]; + // omit beta0 from beta vector + + double* beta = coeffi+1; + snaptr->compute_yi(beta); + for (int jj = 0; jj < ninside; jj++) { int j = snaptr->inside[jj]; snaptr->compute_duidrj(snaptr->rij[jj], snaptr->wj[jj],snaptr->rcutij[jj]); - snaptr->compute_dbidrj(); - snaptr->copy_dbi2dbvec(); +// snaptr->compute_dbidrj(); +// snaptr->copy_dbi2dbvec(); - fij[0] = 0.0; - fij[1] = 0.0; - fij[2] = 0.0; +// fij[0] = 0.0; +// fij[1] = 0.0; +// fij[2] = 0.0; - // linear contributions +// // linear contributions - for (int k = 1; k <= ncoeff; k++) { - double bgb = coeffi[k]; - fij[0] += bgb*snaptr->dbvec[k-1][0]; - fij[1] += bgb*snaptr->dbvec[k-1][1]; - fij[2] += bgb*snaptr->dbvec[k-1][2]; - } +// for (int k = 1; k <= ncoeff; k++) { +// double bgb = coeffi[k]; +// fij[0] += bgb*snaptr->dbvec[k-1][0]; +// fij[1] += bgb*snaptr->dbvec[k-1][1]; +// fij[2] += bgb*snaptr->dbvec[k-1][2]; +// } - // quadratic contributions +// // quadratic contributions - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = snaptr->bvec[icoeff]; - double fack = coeffi[k]*bveci; - double* dbveci = snaptr->dbvec[icoeff]; - fij[0] += fack*dbveci[0]; - fij[1] += fack*dbveci[1]; - fij[2] += fack*dbveci[2]; - k++; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double facki = coeffi[k]*bveci; - double fackj = coeffi[k]*snaptr->bvec[jcoeff]; - double* dbvecj = snaptr->dbvec[jcoeff]; +// if (quadraticflag) { +// int k = ncoeff+1; +// for (int icoeff = 0; icoeff < ncoeff; icoeff++) { +// double bveci = snaptr->bvec[icoeff]; +// double fack = coeffi[k]*bveci; +// double* dbveci = snaptr->dbvec[icoeff]; +// fij[0] += fack*dbveci[0]; +// fij[1] += fack*dbveci[1]; +// fij[2] += fack*dbveci[2]; +// k++; +// for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { +// double facki = coeffi[k]*bveci; +// double fackj = coeffi[k]*snaptr->bvec[jcoeff]; +// double* dbvecj = snaptr->dbvec[jcoeff]; - fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; - fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; - fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; - k++; - } - } - } +// fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; +// fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; +// fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; +// k++; +// } +// } +// } + + snaptr->compute_deidrj(fij); f[i][0] += fij[0]; f[i][1] += fij[1]; diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index 7ed1bc1e23..d30d94dc9d 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -522,6 +522,124 @@ void SNA::compute_zi_omp(int sub_threads) } } +/* ---------------------------------------------------------------------- + compute Yi by summing over products of beta and Zi +------------------------------------------------------------------------- */ + +void SNA::compute_yi(double* beta) +{ + int j; + int idxz_count; + double **jjjzarray_r, **jjjzarray_i; + + for(int j = 0; j <= twojmax; j++) { + for(int mb = 0; 2*mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) { + yarray_r[j][ma][mb] = 0.0; + yarray_i[j][ma][mb] = 0.0; + } // end loop over ma, mb + } // end loop over j + + for(int JJ = 0; JJ < idxj_max; JJ++) { + const int j1 = idxj[JJ].j1; + const int j2 = idxj[JJ].j2; + const int j3 = idxj[JJ].j; + + j = j3; + jjjzarray_r = zarray_r[j1][j2][j3]; + jjjzarray_i = zarray_i[j1][j2][j3]; + for(int mb = 0; 2*mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) { + yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]; + yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]; + } // end loop over ma, mb + + j = j1; + jjjzarray_r = zarray_r[j3][j2][j1]; + jjjzarray_i = zarray_i[j3][j2][j1]; + double j1fac = (j3+1)/(j+1.0); + for(int mb = 0; 2*mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) { + yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j1fac; + yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j1fac; + } // end loop over ma, mb + + j = j2; + jjjzarray_r = zarray_r[j3][j1][j2]; + jjjzarray_i = zarray_i[j3][j1][j2]; + double j2fac = (j3+1)/(j+1.0); + for(int mb = 0; 2*mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) { + yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j2fac; + yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j2fac; + } // end loop over ma, mb + + } // end loop over jjb + +} + +/* ---------------------------------------------------------------------- + compute dEidRj +------------------------------------------------------------------------- */ + +void SNA::compute_deidrj(double* dedr) +{ + + for(int k = 0; k < 3; k++) + dedr[k] = 0.0; + + for(int j = 0; j <= twojmax; j++) { + + for(int mb = 0; 2*mb < j; mb++) + for(int ma = 0; ma <= j; ma++) { + + double* dudr_r = duarray_r[j][ma][mb]; + double* dudr_i = duarray_i[j][ma][mb]; + double jjjmambyarray_r = yarray_r[j][ma][mb]; + double jjjmambyarray_i = yarray_i[j][ma][mb]; + for(int k = 0; k < 3; k++) + dedr[k] += + dudr_r[k] * jjjmambyarray_r + + dudr_i[k] * jjjmambyarray_i; + + } //end loop over ma mb + + // For j even, handle middle column + + if (j%2 == 0) { + + int mb = j/2; + for(int ma = 0; ma < mb; ma++) { + double* dudr_r = duarray_r[j][ma][mb]; + double* dudr_i = duarray_i[j][ma][mb]; + double jjjmambyarray_r = yarray_r[j][ma][mb]; + double jjjmambyarray_i = yarray_i[j][ma][mb]; + for(int k = 0; k < 3; k++) + dedr[k] += + dudr_r[k] * jjjmambyarray_r + + dudr_i[k] * jjjmambyarray_i; + + } + + int ma = mb; + double* dudr_r = duarray_r[j][ma][mb]; + double* dudr_i = duarray_i[j][ma][mb]; + double jjjmambyarray_r = yarray_r[j][ma][mb]; + double jjjmambyarray_i = yarray_i[j][ma][mb]; + for(int k = 0; k < 3; k++) + dedr[k] += + (dudr_r[k] * jjjmambyarray_r + + dudr_i[k] * jjjmambyarray_i)*0.5; + + } // end if jeven + + } // End loop over j + + for(int k = 0; k < 3; k++) + dedr[k] *= 2.0; + +} + /* ---------------------------------------------------------------------- compute Bi by summing conj(Ui)*Zi ------------------------------------------------------------------------- */ @@ -1535,6 +1653,10 @@ void SNA::create_twojmax_arrays() "sna:uarraytot"); memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, "sna:zarray"); + memory->create(yarray_r, jdim, jdim, jdim, + "sna:yarray"); + memory->create(yarray_i, jdim, jdim, jdim, + "sna:yarray"); } } @@ -1563,6 +1685,8 @@ void SNA::destroy_twojmax_arrays() memory->destroy(zarray_r); memory->destroy(uarraytot_i); memory->destroy(zarray_i); + memory->destroy(yarray_r); + memory->destroy(yarray_i); } } diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h index d05ad0fb84..2c90da1d30 100644 --- a/src/SNAP/sna.h +++ b/src/SNAP/sna.h @@ -47,6 +47,7 @@ public: void compute_ui_omp(int, int); void compute_zi(); void compute_zi_omp(int); + void compute_yi(double*); void compute_bi(); void copy_bi2bvec(); @@ -54,6 +55,7 @@ public: void compute_duidrj(double*, double, double); void compute_dbidrj(); + void compute_deidrj(double*); void compute_dbidrj_nonsymm(); void copy_dbi2dbvec(); double compute_sfac(double, double); @@ -80,6 +82,7 @@ public: int twojmax, diagonalstyle; double*** uarraytot_r, *** uarraytot_i; double***** zarray_r, ***** zarray_i; + double*** yarray_r, *** yarray_i; double*** uarraytot_r_b, *** uarraytot_i_b; double***** zarray_r_b, ***** zarray_i_b; double*** uarray_r, *** uarray_i; From 6d84bd6138fedd3738979e791244ccc4f38b6606 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Fri, 10 May 2019 10:34:01 -0600 Subject: [PATCH 02/21] Added compute_beta() --- src/SNAP/pair_snap.cpp | 43 +++++++++++++++++++++--------------------- src/SNAP/pair_snap.h | 3 +++ src/SNAP/sna.cpp | 8 ++++---- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 73faaa71f7..268e5663da 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -116,6 +116,7 @@ PairSNAP::~PairSNAP() memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(coeffelem); + memory->destroy(beta); } // Need to set this because restart not handled by PairHybrid @@ -246,14 +247,15 @@ void PairSNAP::compute_regular(int eflag, int vflag) } // for neighbors of I within cutoff: - // compute dUi/drj and dBi/drj - // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj + // compute Fij = dEi/dRj = -dEi/dRi + // add to Fi, subtract from Fj - double* coeffi = coeffelem[ielem]; + // compute dE_i/dB_i = beta_i - // omit beta0 from beta vector + compute_betai(ielem); + + // compute beta_i*Z_i = Y_i - double* beta = coeffi+1; snaptr->compute_yi(beta); for (int jj = 0; jj < ninside; jj++) { @@ -261,22 +263,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) snaptr->compute_duidrj(snaptr->rij[jj], snaptr->wj[jj],snaptr->rcutij[jj]); -// snaptr->compute_dbidrj(); -// snaptr->copy_dbi2dbvec(); - -// fij[0] = 0.0; -// fij[1] = 0.0; -// fij[2] = 0.0; - -// // linear contributions - -// for (int k = 1; k <= ncoeff; k++) { -// double bgb = coeffi[k]; -// fij[0] += bgb*snaptr->dbvec[k-1][0]; -// fij[1] += bgb*snaptr->dbvec[k-1][1]; -// fij[2] += bgb*snaptr->dbvec[k-1][2]; -// } - // // quadratic contributions // if (quadraticflag) { @@ -326,6 +312,7 @@ void PairSNAP::compute_regular(int eflag, int vflag) // evdwl = energy of atom I, sum over coeffs_k * Bi_k + double* coeffi = coeffelem[ielem]; evdwl = coeffi[0]; if (!quadraticflag) { snaptr->compute_bi(); @@ -1306,6 +1293,18 @@ void PairSNAP::build_per_atom_arrays() #endif } +/* ---------------------------------------------------------------------- + compute beta_i +------------------------------------------------------------------------- */ + +void PairSNAP::compute_betai(int ielem) +{ + double* coeffi = coeffelem[ielem]; + + for (int k = 1; k <= ncoeff; k++) + beta[k-1] = coeffi[k]; +} + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ @@ -1434,6 +1433,7 @@ void PairSNAP::coeff(int narg, char **arg) memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(coeffelem); + memory->destroy(beta); } char* type1 = arg[0]; @@ -1631,6 +1631,7 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) memory->create(radelem,nelements,"pair:radelem"); memory->create(wjelem,nelements,"pair:wjelem"); memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem"); + memory->create(beta,ncoeffall,"pair:beta"); // Loop over nelements blocks in the SNAP coefficient file diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h index 1fa065755c..33d1fb8bc9 100644 --- a/src/SNAP/pair_snap.h +++ b/src/SNAP/pair_snap.h @@ -55,6 +55,8 @@ protected: void set_sna_to_shared(int snaid,int i); void build_per_atom_arrays(); + void compute_betai(int); + int schedule_user; double schedule_time_guided; double schedule_time_dynamic; @@ -99,6 +101,7 @@ protected: double *radelem; // element radii double *wjelem; // elements weights double **coeffelem; // element bispectrum coefficients + double* beta; // beta for current atom int *map; // mapping from atom types to elements int twojmax, diagonalstyle, switchflag, bzeroflag; double rfac0, rmin0, wj1, wj2; diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index d30d94dc9d..b729e4d0d6 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -1653,10 +1653,10 @@ void SNA::create_twojmax_arrays() "sna:uarraytot"); memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, "sna:zarray"); - memory->create(yarray_r, jdim, jdim, jdim, - "sna:yarray"); - memory->create(yarray_i, jdim, jdim, jdim, - "sna:yarray"); + memory->create(yarray_r, jdim, jdim, jdim, + "sna:yarray"); + memory->create(yarray_i, jdim, jdim, jdim, + "sna:yarray"); } } From a1f421cd5400de9cb0c486cd7b32e936f09c9eb0 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Sat, 11 May 2019 12:41:54 -0600 Subject: [PATCH 03/21] Moved compute_beta outside of main force loop --- src/SNAP/pair_snap.cpp | 37 +++++++++++++++++++++++++------------ src/SNAP/pair_snap.h | 5 +++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 268e5663da..0bf367b5dc 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -86,7 +86,7 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) i_uarraytot_r = NULL; i_uarraytot_i = NULL; i_zarray_r = NULL; - i_zarray_i =NULL; + i_zarray_i = NULL; use_shared_arrays = 0; @@ -101,6 +101,7 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) sna = NULL; + beta_max = 0; } /* ---------------------------------------------------------------------- */ @@ -189,6 +190,15 @@ void PairSNAP::compute_regular(int eflag, int vflag) int newton_pair = force->newton_pair; class SNA* snaptr = sna[0]; + if (beta_max < list->inum) { + memory->grow(beta,list->inum,ncoeff,"PairSNAP:beta"); + beta_max = list->inum; + } + + // compute dE_i/dB_i = beta_i for all i in list + + compute_beta(); + numneigh = list->numneigh; firstneigh = list->firstneigh; @@ -250,13 +260,9 @@ void PairSNAP::compute_regular(int eflag, int vflag) // compute Fij = dEi/dRj = -dEi/dRi // add to Fi, subtract from Fj - // compute dE_i/dB_i = beta_i - - compute_betai(ielem); - // compute beta_i*Z_i = Y_i - snaptr->compute_yi(beta); + snaptr->compute_yi(beta[ii]); for (int jj = 0; jj < ninside; jj++) { int j = snaptr->inside[jj]; @@ -1294,15 +1300,23 @@ void PairSNAP::build_per_atom_arrays() } /* ---------------------------------------------------------------------- - compute beta_i + compute beta ------------------------------------------------------------------------- */ -void PairSNAP::compute_betai(int ielem) +void PairSNAP::compute_beta() { - double* coeffi = coeffelem[ielem]; + int i; + int *type = atom->type; - for (int k = 1; k <= ncoeff; k++) - beta[k-1] = coeffi[k]; + for (int ii = 0; ii < list->inum; ii++) { + i = list->ilist[ii]; + const int itype = type[i]; + const int ielem = map[itype]; + double* coeffi = coeffelem[ielem]; + + for (int k = 1; k <= ncoeff; k++) + beta[ii][k-1] = coeffi[k]; + } } /* ---------------------------------------------------------------------- @@ -1631,7 +1645,6 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) memory->create(radelem,nelements,"pair:radelem"); memory->create(wjelem,nelements,"pair:wjelem"); memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem"); - memory->create(beta,ncoeffall,"pair:beta"); // Loop over nelements blocks in the SNAP coefficient file diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h index 33d1fb8bc9..94d21162e2 100644 --- a/src/SNAP/pair_snap.h +++ b/src/SNAP/pair_snap.h @@ -55,7 +55,7 @@ protected: void set_sna_to_shared(int snaid,int i); void build_per_atom_arrays(); - void compute_betai(int); + void compute_beta(); int schedule_user; double schedule_time_guided; @@ -101,11 +101,12 @@ protected: double *radelem; // element radii double *wjelem; // elements weights double **coeffelem; // element bispectrum coefficients - double* beta; // beta for current atom + double** beta; // betas for all atoms in list int *map; // mapping from atom types to elements int twojmax, diagonalstyle, switchflag, bzeroflag; double rfac0, rmin0, wj1, wj2; int rcutfacflag, twojmaxflag; // flags for required parameters + int beta_max; // length of beta }; } From e13c661f774ea40f39b042f46d540c703fb88c5d Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Sat, 11 May 2019 12:54:18 -0600 Subject: [PATCH 04/21] Added placeholder for neural network SNAP potential --- src/SNAP/pair_nn_snap.cpp | 1824 +++++++++++++++++++++++++++++++++++++ src/SNAP/pair_nn_snap.h | 184 ++++ 2 files changed, 2008 insertions(+) create mode 100644 src/SNAP/pair_nn_snap.cpp create mode 100644 src/SNAP/pair_nn_snap.h diff --git a/src/SNAP/pair_nn_snap.cpp b/src/SNAP/pair_nn_snap.cpp new file mode 100644 index 0000000000..e90f6d6b1b --- /dev/null +++ b/src/SNAP/pair_nn_snap.cpp @@ -0,0 +1,1824 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include +#include +#include +#include "pair_nn_snap.h" +#include "atom.h" +#include "atom_vec.h" +#include "force.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "sna.h" +#include "openmp_snap.h" +#include "domain.h" +#include "memory.h" +#include "error.h" + +#include + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 +#define MAXWORD 3 + +// Outstanding issues with quadratic term +// 1. there seems to a problem with compute_optimized energy calc +// it does not match compute_regular, even when quadratic coeffs = 0 + +/* ---------------------------------------------------------------------- */ + +PairNNSNAP::PairNNSNAP(LAMMPS *lmp) : Pair(lmp) +{ + single_enable = 0; + restartinfo = 0; + one_coeff = 1; + manybody_flag = 1; + + nelements = 0; + elements = NULL; + radelem = NULL; + wjelem = NULL; + coeffelem = NULL; + + nmax = 0; + nthreads = 1; + + schedule_user = 0; + schedule_time_guided = -1; + schedule_time_dynamic = -1; + ncalls_neigh =-1; + + ilistmask_max = 0; + ilistmask = NULL; + ghostinum = 0; + ghostilist_max = 0; + ghostilist = NULL; + ghostnumneigh_max = 0; + ghostnumneigh = NULL; + ghostneighs = NULL; + ghostfirstneigh = NULL; + ghostneighs_total = 0; + ghostneighs_max = 0; + + i_max = 0; + i_neighmax = 0; + i_numpairs = 0; + i_rij = NULL; + i_inside = NULL; + i_wj = NULL; + i_rcutij = NULL; + i_ninside = NULL; + i_pairs = NULL; + i_uarraytot_r = NULL; + i_uarraytot_i = NULL; + i_zarray_r = NULL; + i_zarray_i = NULL; + + use_shared_arrays = 0; + +#ifdef TIMING_INFO + timers[0] = 0; + timers[1] = 0; + timers[2] = 0; + timers[3] = 0; +#endif + + // Need to set this because restart not handled by PairHybrid + + sna = NULL; + + beta_max = 0; +} + +/* ---------------------------------------------------------------------- */ + +PairNNSNAP::~PairNNSNAP() +{ + if (copymode) return; + + if (nelements) { + for (int i = 0; i < nelements; i++) + delete[] elements[i]; + delete[] elements; + memory->destroy(radelem); + memory->destroy(wjelem); + memory->destroy(coeffelem); + memory->destroy(beta); + } + + // Need to set this because restart not handled by PairHybrid + + if (sna) { + +#ifdef TIMING_INFO + double time[5]; + double timeave[5]; + double timeave_mpi[5]; + double timemax_mpi[5]; + + for (int i = 0; i < 5; i++) { + time[i] = 0; + timeave[i] = 0; + for (int tid = 0; tidtimers[i]>time[i]) + time[i] = sna[tid]->timers[i]; + timeave[i] += sna[tid]->timers[i]; + } + timeave[i] /= nthreads; + } + MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world); + MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world); +#endif + + for (int tid = 0; tiddestroy(setflag); + memory->destroy(cutsq); + memory->destroy(map); + } + +} + +void PairNNSNAP::compute(int eflag, int vflag) +{ +// if (use_optimized) +// compute_optimized(eflag, vflag); +// else + +// hard-code compute_regular() + + compute_regular(eflag, vflag); +} + +/* ---------------------------------------------------------------------- + This version is a straightforward implementation + ---------------------------------------------------------------------- */ + +void PairNNSNAP::compute_regular(int eflag, int vflag) +{ + int i,j,jnum,ninside; + double delx,dely,delz,evdwl,rsq; + double fij[3]; + int *jlist,*numneigh,**firstneigh; + evdwl = 0.0; + + ev_init(eflag,vflag); + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + class SNA* snaptr = sna[0]; + + if (beta_max < list->inum) { + memory->grow(beta,list->inum,ncoeff,"PairNNSNAP:beta"); + beta_max = list->inum; + } + + // compute dE_i/dB_i = beta_i for all i in list + + compute_beta(); + + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + for (int ii = 0; ii < list->inum; ii++) { + i = list->ilist[ii]; + + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int itype = type[i]; + const int ielem = map[itype]; + const double radi = radelem[ielem]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // insure rij, inside, wj, and rcutij are of size jnum + + snaptr->grow_rij(jnum); + + // rij[][3] = displacements between atom I and those neighbors + // inside = indices of neighbors of I within cutoff + // wj = weights for neighbors of I within cutoff + // rcutij = cutoffs for neighbors of I within cutoff + // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi + + ninside = 0; + for (int jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + delx = x[j][0] - xtmp; + dely = x[j][1] - ytmp; + delz = x[j][2] - ztmp; + rsq = delx*delx + dely*dely + delz*delz; + int jtype = type[j]; + int jelem = map[jtype]; + + if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jelem]; + snaptr->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; + ninside++; + } + } + + // compute Ui, Zi, and Bi for atom I + + snaptr->compute_ui(ninside); + snaptr->compute_zi(); + if (quadraticflag) { + snaptr->compute_bi(); + snaptr->copy_bi2bvec(); + } + + // for neighbors of I within cutoff: + // compute Fij = dEi/dRj = -dEi/dRi + // add to Fi, subtract from Fj + + // compute beta_i*Z_i = Y_i + + snaptr->compute_yi(beta[ii]); + + for (int jj = 0; jj < ninside; jj++) { + int j = snaptr->inside[jj]; + snaptr->compute_duidrj(snaptr->rij[jj], + snaptr->wj[jj],snaptr->rcutij[jj]); + +// // quadratic contributions + +// if (quadraticflag) { +// int k = ncoeff+1; +// for (int icoeff = 0; icoeff < ncoeff; icoeff++) { +// double bveci = snaptr->bvec[icoeff]; +// double fack = coeffi[k]*bveci; +// double* dbveci = snaptr->dbvec[icoeff]; +// fij[0] += fack*dbveci[0]; +// fij[1] += fack*dbveci[1]; +// fij[2] += fack*dbveci[2]; +// k++; +// for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { +// double facki = coeffi[k]*bveci; +// double fackj = coeffi[k]*snaptr->bvec[jcoeff]; +// double* dbvecj = snaptr->dbvec[jcoeff]; + +// fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; +// fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; +// fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; +// k++; +// } +// } +// } + + snaptr->compute_deidrj(fij); + + f[i][0] += fij[0]; + f[i][1] += fij[1]; + f[i][2] += fij[2]; + f[j][0] -= fij[0]; + f[j][1] -= fij[1]; + f[j][2] -= fij[2]; + + // tally per-atom virial contribution + + if (vflag) + ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, + fij[0],fij[1],fij[2], + -snaptr->rij[jj][0],-snaptr->rij[jj][1], + -snaptr->rij[jj][2]); + } + + // tally energy contribution + + if (eflag) { + + // evdwl = energy of atom I, sum over coeffs_k * Bi_k + + double* coeffi = coeffelem[ielem]; + evdwl = coeffi[0]; + if (!quadraticflag) { + snaptr->compute_bi(); + snaptr->copy_bi2bvec(); + } + + // E = beta.B + 0.5*B^t.alpha.B + // coeff[k] = beta[k-1] or + // coeff[k] = alpha_ii or + // coeff[k] = alpha_ij = alpha_ji, j != i + + // linear contributions + + for (int k = 1; k <= ncoeff; k++) + evdwl += coeffi[k]*snaptr->bvec[k-1]; + + // quadratic contributions + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = snaptr->bvec[icoeff]; + evdwl += 0.5*coeffi[k++]*bveci*bveci; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + evdwl += coeffi[k++]*bveci*snaptr->bvec[jcoeff]; + } + } + } + ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); + } + + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + + +/* ---------------------------------------------------------------------- + This version is optimized for threading, micro-load balancing + ---------------------------------------------------------------------- */ + +void PairNNSNAP::compute_optimized(int eflag, int vflag) +{ + // if reneighboring took place do load_balance if requested + if (do_load_balance > 0 && + (neighbor->ncalls != ncalls_neigh)) { + ghostinum = 0; + // reset local ghost neighbor lists + ncalls_neigh = neighbor->ncalls; + if (ilistmask_max < list->inum) { + memory->grow(ilistmask,list->inum,"PairSnap::ilistmask"); + ilistmask_max = list->inum; + } + for (int i = 0; i < list->inum; i++) + ilistmask[i] = 1; + + //multiple passes for loadbalancing + for (int i = 0; i < do_load_balance; i++) + load_balance(); + } + + int numpairs = 0; + for (int ii = 0; ii < list->inum; ii++) { + if ((do_load_balance <= 0) || ilistmask[ii]) { + int i = list->ilist[ii]; + int jnum = list->numneigh[i]; + numpairs += jnum; + } + } + + if (do_load_balance) + for (int ii = 0; ii < ghostinum; ii++) { + int i = ghostilist[ii]; + int jnum = ghostnumneigh[i]; + numpairs += jnum; + } + + // optimized schedule setting + + int time_dynamic = 0; + int time_guided = 0; + + if (schedule_user == 0) schedule_user = 4; + + switch (schedule_user) { + case 1: + omp_set_schedule(omp_sched_static,1); + break; + case 2: + omp_set_schedule(omp_sched_dynamic,1); + break; + case 3: + omp_set_schedule(omp_sched_guided,2); + break; + case 4: + omp_set_schedule(omp_sched_auto,0); + break; + case 5: + if (numpairs < 8*nthreads) omp_set_schedule(omp_sched_dynamic,1); + else if (schedule_time_guided < 0.0) { + omp_set_schedule(omp_sched_guided,2); + if (!eflag && !vflag) time_guided = 1; + } else if (schedule_time_dynamic<0.0) { + omp_set_schedule(omp_sched_dynamic,1); + if (!eflag && !vflag) time_dynamic = 1; + } else if (schedule_time_guidedcreate(pairs_tid_unique,numpairs,4,"numpairs"); + pairs = pairs_tid_unique; + } + + if (!use_shared_arrays) { + numpairs = 0; + for (int ii = 0; ii < list->inum; ii++) { + if ((do_load_balance <= 0) || ilistmask[ii]) { + int i = list->ilist[ii]; + int jnum = list->numneigh[i]; + for (int jj = 0; jjx; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + numneigh = list->numneigh; + firstneigh = list->firstneigh; + +#ifdef TIMING_INFO + // only update micro timers after setup + static int count=0; + if (count<2) { + sna[tid]->timers[0] = 0; + sna[tid]->timers[1] = 0; + sna[tid]->timers[2] = 0; + sna[tid]->timers[3] = 0; + sna[tid]->timers[4] = 0; + } + count++; +#endif + + // did thread start working on interactions of new atom + int iold = -1; + + double starttime, endtime; + if (time_dynamic || time_guided) + starttime = MPI_Wtime(); + +#if defined(_OPENMP) +#pragma omp for schedule(runtime) +#endif + for (int iijj = 0; iijj < numpairs; iijj++) { + int i = 0; + if (use_shared_arrays) { + i = i_pairs[iijj][0]; + if (iold != i) { + set_sna_to_shared(tid,i_pairs[iijj][3]); + ielem = map[type[i]]; + } + iold = i; + } else { + i = pairs[iijj][0]; + if (iold != i) { + iold = i; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int itype = type[i]; + ielem = map[itype]; + const double radi = radelem[ielem]; + + if (i < nlocal) { + jlist = firstneigh[i]; + jnum = numneigh[i]; + } else { + jlist = ghostneighs+ghostfirstneigh[i]; + jnum = ghostnumneigh[i]; + } + + // insure rij, inside, wj, and rcutij are of size jnum + + sna[tid]->grow_rij(jnum); + + // rij[][3] = displacements between atom I and those neighbors + // inside = indices of neighbors of I within cutoff + // wj = weights of neighbors of I within cutoff + // rcutij = cutoffs of neighbors of I within cutoff + // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi + + ninside = 0; + for (jj = 0; jj < jnum; jj++) { + int j = jlist[jj]; + j &= NEIGHMASK; + delx = x[j][0] - xtmp; //unitialised + dely = x[j][1] - ytmp; + delz = x[j][2] - ztmp; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + int jelem = map[jtype]; + + if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { //unitialised + sna[tid]->rij[ninside][0] = delx; + sna[tid]->rij[ninside][1] = dely; + sna[tid]->rij[ninside][2] = delz; + sna[tid]->inside[ninside] = j; + sna[tid]->wj[ninside] = wjelem[jelem]; + sna[tid]->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; + ninside++; + + // update index list with inside index + pairs[iijj + (jj - pairs[iijj][1])][2] = + ninside-1; //unitialised + } + } + + // compute Ui and Zi for atom I + + sna[tid]->compute_ui(ninside); //unitialised + sna[tid]->compute_zi(); + } + } + if (quadraticflag) { + sna[tid]->compute_bi(); + sna[tid]->copy_bi2bvec(); + } + + // for neighbors of I within cutoff: + // compute dUi/drj and dBi/drj + // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj + + // entry into loop if inside index is set + + double* coeffi = coeffelem[ielem]; + + if (pairs[iijj][2] >= 0) { + jj = pairs[iijj][2]; + int j = sna[tid]->inside[jj]; + sna[tid]->compute_duidrj(sna[tid]->rij[jj], + sna[tid]->wj[jj],sna[tid]->rcutij[jj]); + + sna[tid]->compute_dbidrj(); + sna[tid]->copy_dbi2dbvec(); + + fij[0] = 0.0; + fij[1] = 0.0; + fij[2] = 0.0; + + // linear contributions + + for (k = 1; k <= ncoeff; k++) { + double bgb = coeffi[k]; + fij[0] += bgb*sna[tid]->dbvec[k-1][0]; + fij[1] += bgb*sna[tid]->dbvec[k-1][1]; + fij[2] += bgb*sna[tid]->dbvec[k-1][2]; + } + + // quadratic contributions + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = sna[tid]->bvec[icoeff]; + double fack = coeffi[k]*bveci; + double* dbveci = sna[tid]->dbvec[icoeff]; + fij[0] += fack*sna[tid]->dbvec[icoeff][0]; + fij[1] += fack*sna[tid]->dbvec[icoeff][1]; + fij[2] += fack*sna[tid]->dbvec[icoeff][2]; + k++; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + double facki = coeffi[k]*bveci; + double fackj = coeffi[k]*sna[tid]->bvec[jcoeff]; + double* dbvecj = sna[tid]->dbvec[jcoeff]; + fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; + fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; + fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; + k++; + } + } + } + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + f[i][0] += fij[0]; + f[i][1] += fij[1]; + f[i][2] += fij[2]; + f[j][0] -= fij[0]; + f[j][1] -= fij[1]; + f[j][2] -= fij[2]; + + // tally per-atom virial contribution + + if (vflag) + ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, + fij[0],fij[1],fij[2], + -sna[tid]->rij[jj][0],-sna[tid]->rij[jj][1], + -sna[tid]->rij[jj][2]); + } + } + + // evdwl = energy of atom I, sum over coeffs_k * Bi_k + // only call this for first pair of each atom i + // if atom has no pairs, eatom=0, which is wrong + + if (eflag&&pairs[iijj][1] == 0) { + evdwl = coeffi[0]; + + if (!quadraticflag) { + sna[tid]->compute_bi(); + sna[tid]->copy_bi2bvec(); + } + + // E = beta.B + 0.5*B^t.alpha.B + // coeff[k] = beta[k-1] or + // coeff[k] = alpha_ii or + // coeff[k] = alpha_ij = alpha_ji, j != i + + // linear contributions + + for (int k = 1; k <= ncoeff; k++) + evdwl += coeffi[k]*sna[tid]->bvec[k-1]; + + // quadratic contributions + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = sna[tid]->bvec[icoeff]; + evdwl += 0.5*coeffi[k++]*bveci*bveci; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + evdwl += coeffi[k++]*bveci*sna[tid]->bvec[jcoeff]; + } + } + } + +#if defined(_OPENMP) +#pragma omp critical +#endif + ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); + } + + } + if (time_dynamic || time_guided) + endtime = MPI_Wtime(); + if (time_dynamic) schedule_time_dynamic = endtime - starttime; + if (time_guided) schedule_time_guided = endtime - starttime; + if (!use_shared_arrays) memory->destroy(pairs); + + }// end of pragma omp parallel + + if (vflag_fdotr) virial_fdotr_compute(); + +} + +inline int PairNNSNAP::equal(double* x,double* y) +{ + double dist2 = + (x[0]-y[0])*(x[0]-y[0]) + + (x[1]-y[1])*(x[1]-y[1]) + + (x[2]-y[2])*(x[2]-y[2]); + if (dist2 < 1e-20) return 1; + return 0; +} + +inline double PairNNSNAP::dist2(double* x,double* y) +{ + return + (x[0]-y[0])*(x[0]-y[0]) + + (x[1]-y[1])*(x[1]-y[1]) + + (x[2]-y[2])*(x[2]-y[2]); +} + +// return extra communication cutoff +// extra_cutoff = max(subdomain_length) + +double PairNNSNAP::extra_cutoff() +{ + double sublo[3],subhi[3]; + + if (domain->triclinic == 0) { + for (int dim = 0 ; dim < 3 ; dim++) { + sublo[dim] = domain->sublo[dim]; + subhi[dim] = domain->subhi[dim]; + } + } else { + domain->lamda2x(domain->sublo_lamda,sublo); + domain->lamda2x(domain->subhi_lamda,subhi); + } + + double sub_size[3]; + for (int dim = 0; dim < 3; dim++) + sub_size[dim] = subhi[dim] - sublo[dim]; + + double max_sub_size = 0; + for (int dim = 0; dim < 3; dim++) + max_sub_size = MAX(max_sub_size,sub_size[dim]); + + // note: for triclinic, probably need something different + // see Comm::setup() + + return max_sub_size; +} + +// micro load_balancer: each MPI process will +// check with each of its 26 neighbors, +// whether an imbalance exists in the number +// of atoms to calculate forces for. +// If it does it will set ilistmask of one of +// its local atoms to zero, and send its Tag +// to the neighbor process. The neighboring process +// will check its ghost list for the +// ghost atom with the same Tag which is closest +// to its domain center, and build a +// neighborlist for this ghost atom. For this to work, +// the communication cutoff has to be +// as large as the neighbor cutoff + +// maximum subdomain length. + +// Note that at most one atom is exchanged per processor pair. + +// Also note that the local atom assignment +// doesn't change. This load balancer will cause +// some ghost atoms to have full neighborlists +// which are unique to PairNNSNAP. +// They are not part of the generally accessible neighborlist. +// At the same time corresponding local atoms on +// other MPI processes will not be +// included in the force computation since +// their ilistmask is 0. This does not effect +// any other classes which might +// access the same general neighborlist. +// Reverse communication (newton on) of forces is required. + +// Currently the load balancer does two passes, +// since its exchanging atoms upstream and downstream. + +void PairNNSNAP::load_balance() +{ + double sublo[3],subhi[3]; + if (domain->triclinic == 0) { + double* sublotmp = domain->sublo; + double* subhitmp = domain->subhi; + for (int dim = 0 ; dim<3 ; dim++) { + sublo[dim]=sublotmp[dim]; + subhi[dim]=subhitmp[dim]; + } + } else { + double* sublotmp = domain->sublo_lamda; + double* subhitmp = domain->subhi_lamda; + domain->lamda2x(sublotmp,sublo); + domain->lamda2x(subhitmp,subhi); + } + + //if (list->inum==0) list->grow(atom->nmax); + + int nlocal = ghostinum; + for (int i=0; i < list->inum; i++) + if (ilistmask[i]) nlocal++; + int ***grid2proc = comm->grid2proc; + int* procgrid = comm->procgrid; + + int nlocal_up,nlocal_down; + MPI_Request request; + + double sub_mid[3]; + for (int dim=0; dim<3; dim++) + sub_mid[dim] = (subhi[dim] + sublo[dim])/2; + + if (comm->cutghostuser < + neighbor->cutneighmax+extra_cutoff()) + error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing"); + + int nrecv = ghostinum; + int totalsend = 0; + int nsend = 0; + int depth = 1; + + for (int dx = -depth; dx < depth+1; dx++) + for (int dy = -depth; dy < depth+1; dy++) + for (int dz = -depth; dz < depth+1; dz++) { + + if (dx == dy && dy == dz && dz == 0) continue; + + int sendloc[3] = {comm->myloc[0], + comm->myloc[1], comm->myloc[2] + }; + sendloc[0] += dx; + sendloc[1] += dy; + sendloc[2] += dz; + for (int dim = 0; dim < 3; dim++) + if (sendloc[dim] >= procgrid[dim]) + sendloc[dim] = sendloc[dim] - procgrid[dim]; + for (int dim = 0; dim < 3; dim++) + if (sendloc[dim] < 0) + sendloc[dim] = procgrid[dim] + sendloc[dim]; + int recvloc[3] = {comm->myloc[0], + comm->myloc[1], comm->myloc[2] + }; + recvloc[0] -= dx; + recvloc[1] -= dy; + recvloc[2] -= dz; + for (int dim = 0; dim < 3; dim++) + if (recvloc[dim] < 0) + recvloc[dim] = procgrid[dim] + recvloc[dim]; + for (int dim = 0; dim < 3; dim++) + if (recvloc[dim] >= procgrid[dim]) + recvloc[dim] = recvloc[dim] - procgrid[dim]; + + int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]]; + int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]]; + + // two stage process, first upstream movement, then downstream + + MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, + &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); + MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, + &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); + nsend = 0; + + // send upstream + + if (nlocal > nlocal_up+1) { + + int i = totalsend++; + while(i < list->inum && ilistmask[i] == 0) + i = totalsend++; + + if (i < list->inum) + MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request); + else { + int j = -1; + MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request); + } + + if (i < list->inum) { + for (int j = 0; j < list->inum; j++) + if (list->ilist[j] == i) + ilistmask[j] = 0; + nsend = 1; + } + } + + // recv downstream + + if (nlocal < nlocal_down-1) { + nlocal++; + int get_tag = -1; + MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); + + // if get_tag -1 the other process didnt have local atoms to send + + if (get_tag >= 0) { + if (ghostinum >= ghostilist_max) { + memory->grow(ghostilist,ghostinum+10, + "PairSnap::ghostilist"); + ghostilist_max = ghostinum+10; + } + if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { + ghostnumneigh_max = atom->nlocal+atom->nghost+100; + memory->grow(ghostnumneigh,ghostnumneigh_max, + "PairSnap::ghostnumneigh"); + memory->grow(ghostfirstneigh,ghostnumneigh_max, + "PairSnap::ghostfirstneigh"); + } + + // find closest ghost image of the transfered particle + + double mindist = 1e200; + int closestghost = -1; + for (int j = 0; j < atom->nlocal + atom->nghost; j++) + if (atom->tag[j] == get_tag) + if (dist2(sub_mid, atom->x[j]) < mindist) { + closestghost = j; + mindist = dist2(sub_mid, atom->x[j]); + } + + // build neighborlist for this particular + // ghost atom, and add it to list->ilist + + if (ghostneighs_max - ghostneighs_total < + neighbor->oneatom) { + memory->grow(ghostneighs, + ghostneighs_total + neighbor->oneatom, + "PairSnap::ghostneighs"); + ghostneighs_max = ghostneighs_total + neighbor->oneatom; + } + + int j = closestghost; + + ghostilist[ghostinum] = j; + ghostnumneigh[j] = 0; + ghostfirstneigh[j] = ghostneighs_total; + + ghostinum++; + int* jlist = ghostneighs + ghostfirstneigh[j]; + + // find all neighbors by looping + // over all local and ghost atoms + + for (int k = 0; k < atom->nlocal + atom->nghost; k++) + if (dist2(atom->x[j],atom->x[k]) < + neighbor->cutneighmax*neighbor->cutneighmax) { + jlist[ghostnumneigh[j]] = k; + ghostnumneigh[j]++; + ghostneighs_total++; + } + } + + if (get_tag >= 0) nrecv++; + } + + // decrease nlocal later, so that it is the + // initial number both for receiving and sending + + if (nsend) nlocal--; + + // second pass through the grid + + MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, + &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); + MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, + &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); + + // send downstream + + nsend=0; + if (nlocal > nlocal_down+1) { + int i = totalsend++; + while(i < list->inum && ilistmask[i]==0) i = totalsend++; + + if (i < list->inum) + MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request); + else { + int j =- 1; + MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request); + } + + if (i < list->inum) { + for (int j=0; jinum; j++) + if (list->ilist[j] == i) ilistmask[j] = 0; + nsend = 1; + } + } + + // receive upstream + + if (nlocal < nlocal_up-1) { + nlocal++; + int get_tag = -1; + + MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); + + if (get_tag >= 0) { + if (ghostinum >= ghostilist_max) { + memory->grow(ghostilist,ghostinum+10, + "PairSnap::ghostilist"); + ghostilist_max = ghostinum+10; + } + if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { + ghostnumneigh_max = atom->nlocal + atom->nghost + 100; + memory->grow(ghostnumneigh,ghostnumneigh_max, + "PairSnap::ghostnumneigh"); + memory->grow(ghostfirstneigh,ghostnumneigh_max, + "PairSnap::ghostfirstneigh"); + } + + // find closest ghost image of the transfered particle + + double mindist = 1e200; + int closestghost = -1; + for (int j = 0; j < atom->nlocal + atom->nghost; j++) + if (atom->tag[j] == get_tag) + if (dist2(sub_mid,atom->x[j])x[j]); + } + + // build neighborlist for this particular ghost atom + + if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) { + memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom, + "PairSnap::ghostneighs"); + ghostneighs_max = ghostneighs_total + neighbor->oneatom; + } + + int j = closestghost; + + ghostilist[ghostinum] = j; + ghostnumneigh[j] = 0; + ghostfirstneigh[j] = ghostneighs_total; + + ghostinum++; + int* jlist = ghostneighs + ghostfirstneigh[j]; + + for (int k = 0; k < atom->nlocal + atom->nghost; k++) + if (dist2(atom->x[j],atom->x[k]) < + neighbor->cutneighmax*neighbor->cutneighmax) { + jlist[ghostnumneigh[j]] = k; + ghostnumneigh[j]++; + ghostneighs_total++; + } + } + + if (get_tag >= 0) nrecv++; + } + if (nsend) nlocal--; + } +} + +void PairNNSNAP::set_sna_to_shared(int snaid,int i) +{ + sna[snaid]->rij = i_rij[i]; + sna[snaid]->inside = i_inside[i]; + sna[snaid]->wj = i_wj[i]; + sna[snaid]->rcutij = i_rcutij[i]; + sna[snaid]->zarray_r = i_zarray_r[i]; + sna[snaid]->zarray_i = i_zarray_i[i]; + sna[snaid]->uarraytot_r = i_uarraytot_r[i]; + sna[snaid]->uarraytot_i = i_uarraytot_i[i]; +} + +void PairNNSNAP::build_per_atom_arrays() +{ + +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&starttime); +#endif + + int count = 0; + int neighmax = 0; + for (int ii = 0; ii < list->inum; ii++) + if ((do_load_balance <= 0) || ilistmask[ii]) { + neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]); + ++count; + } + for (int ii = 0; ii < ghostinum; ii++) { + neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]); + ++count; + } + + if (i_max < count || i_neighmax < neighmax) { + int i_maxt = MAX(count,i_max); + i_neighmax = MAX(neighmax,i_neighmax); + memory->destroy(i_rij); + memory->destroy(i_inside); + memory->destroy(i_wj); + memory->destroy(i_rcutij); + memory->destroy(i_ninside); + memory->destroy(i_pairs); + memory->create(i_rij,i_maxt,i_neighmax,3,"PairNNSNAP::i_rij"); + memory->create(i_inside,i_maxt,i_neighmax,"PairNNSNAP::i_inside"); + memory->create(i_wj,i_maxt,i_neighmax,"PairNNSNAP::i_wj"); + memory->create(i_rcutij,i_maxt,i_neighmax,"PairNNSNAP::i_rcutij"); + memory->create(i_ninside,i_maxt,"PairNNSNAP::i_ninside"); + memory->create(i_pairs,i_maxt*i_neighmax,4,"PairNNSNAP::i_pairs"); + } + + if (i_max < count) { + int jdim = sna[0]->twojmax+1; + memory->destroy(i_uarraytot_r); + memory->destroy(i_uarraytot_i); + memory->create(i_uarraytot_r,count,jdim,jdim,jdim, + "PairNNSNAP::i_uarraytot_r"); + memory->create(i_uarraytot_i,count,jdim,jdim,jdim, + "PairNNSNAP::i_uarraytot_i"); + if (i_zarray_r != NULL) + for (int i = 0; i < i_max; i++) { + memory->destroy(i_zarray_r[i]); + memory->destroy(i_zarray_i[i]); + } + + delete [] i_zarray_r; + delete [] i_zarray_i; + i_zarray_r = new double*****[count]; + i_zarray_i = new double*****[count]; + for (int i = 0; i < count; i++) { + memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim, + "PairNNSNAP::i_zarray_r"); + memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim, + "PairNNSNAP::i_zarray_i"); + } + } + + if (i_max < count) + i_max = count; + + count = 0; + i_numpairs = 0; + for (int ii = 0; ii < list->inum; ii++) { + if ((do_load_balance <= 0) || ilistmask[ii]) { + int i = list->ilist[ii]; + int jnum = list->numneigh[i]; + int* jlist = list->firstneigh[i]; + const double xtmp = atom->x[i][0]; + const double ytmp = atom->x[i][1]; + const double ztmp = atom->x[i][2]; + const int itype = atom->type[i]; + const int ielem = map[itype]; + const double radi = radelem[ielem]; + int ninside = 0; + for (int jj = 0; jj < jnum; jj++) { + int j = jlist[jj]; + j &= NEIGHMASK; + const double delx = atom->x[j][0] - xtmp; + const double dely = atom->x[j][1] - ytmp; + const double delz = atom->x[j][2] - ztmp; + const double rsq = delx*delx + dely*dely + delz*delz; + int jtype = atom->type[j]; + int jelem = map[jtype]; + + i_pairs[i_numpairs][0] = i; + i_pairs[i_numpairs][1] = jj; + i_pairs[i_numpairs][2] = -1; + i_pairs[i_numpairs][3] = count; + if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { + i_rij[count][ninside][0] = delx; + i_rij[count][ninside][1] = dely; + i_rij[count][ninside][2] = delz; + i_inside[count][ninside] = j; + i_wj[count][ninside] = wjelem[jelem]; + i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; + + // update index list with inside index + i_pairs[i_numpairs][2] = ninside++; + } + i_numpairs++; + } + i_ninside[count] = ninside; + count++; + } + } + + for (int ii = 0; ii < ghostinum; ii++) { + int i = ghostilist[ii]; + int jnum = ghostnumneigh[i]; + int* jlist = ghostneighs+ghostfirstneigh[i]; + const double xtmp = atom->x[i][0]; + const double ytmp = atom->x[i][1]; + const double ztmp = atom->x[i][2]; + const int itype = atom->type[i]; + const int ielem = map[itype]; + const double radi = radelem[ielem]; + int ninside = 0; + + for (int jj = 0; jj < jnum; jj++) { + int j = jlist[jj]; + j &= NEIGHMASK; + const double delx = atom->x[j][0] - xtmp; + const double dely = atom->x[j][1] - ytmp; + const double delz = atom->x[j][2] - ztmp; + const double rsq = delx*delx + dely*dely + delz*delz; + int jtype = atom->type[j]; + int jelem = map[jtype]; + + i_pairs[i_numpairs][0] = i; + i_pairs[i_numpairs][1] = jj; + i_pairs[i_numpairs][2] = -1; + i_pairs[i_numpairs][3] = count; + if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { + i_rij[count][ninside][0] = delx; + i_rij[count][ninside][1] = dely; + i_rij[count][ninside][2] = delz; + i_inside[count][ninside] = j; + i_wj[count][ninside] = wjelem[jelem]; + i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; + // update index list with inside index + i_pairs[i_numpairs][2] = ninside++; + } + i_numpairs++; + } + i_ninside[count] = ninside; + count++; + } +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&endtime); + timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* + (endtime.tv_nsec-starttime.tv_nsec)/1000000000); +#endif +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&starttime); +#endif + +#if defined(_OPENMP) +#pragma omp parallel for shared(count) default(none) +#endif + for (int ii=0; ii < count; ii++) { + int tid = omp_get_thread_num(); + set_sna_to_shared(tid,ii); + //sna[tid]->compute_ui(i_ninside[ii]); +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&starttime); +#endif + sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1)); +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&endtime); + sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* + (endtime.tv_nsec-starttime.tv_nsec)/1000000000); +#endif + } + +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&starttime); +#endif + for (int ii=0; ii < count; ii++) { + int tid = 0;//omp_get_thread_num(); + set_sna_to_shared(tid,ii); + sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1)); + } +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&endtime); + sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* + (endtime.tv_nsec-starttime.tv_nsec)/1000000000); +#endif + +#ifdef TIMING_INFO + clock_gettime(CLOCK_REALTIME,&endtime); + timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* + (endtime.tv_nsec-starttime.tv_nsec)/1000000000); +#endif +} + +/* ---------------------------------------------------------------------- + compute beta +------------------------------------------------------------------------- */ + +void PairNNSNAP::compute_beta() +{ + int i; + int *type = atom->type; + + for (int ii = 0; ii < list->inum; ii++) { + i = list->ilist[ii]; + const int itype = type[i]; + const int ielem = map[itype]; + double* coeffi = coeffelem[ielem]; + + for (int k = 1; k <= ncoeff; k++) + beta[ii][k-1] = coeffi[k]; + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairNNSNAP::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + memory->create(map,n+1,"pair:map"); +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairNNSNAP::settings(int narg, char **arg) +{ + + // set default values for optional arguments + + nthreads = -1; + use_shared_arrays=-1; + do_load_balance = 0; + use_optimized = 1; + + // optional arguments + + for (int i=0; i < narg; i++) { + if (i+2>narg) error->all(FLERR,"Illegal pair_style command"); + if (strcmp(arg[i],"nthreads")==0) { + nthreads=force->inumeric(FLERR,arg[++i]); +#if defined(LMP_USER_OMP) + error->all(FLERR,"Must set number of threads via package omp command"); +#else + omp_set_num_threads(nthreads); + comm->nthreads=nthreads; +#endif + continue; + } + if (strcmp(arg[i],"optimized")==0) { + use_optimized=force->inumeric(FLERR,arg[++i]); + continue; + } + if (strcmp(arg[i],"shared")==0) { + use_shared_arrays=force->inumeric(FLERR,arg[++i]); + continue; + } + if (strcmp(arg[i],"loadbalance")==0) { + do_load_balance = force->inumeric(FLERR,arg[++i]); + if (do_load_balance) { + double mincutoff = extra_cutoff() + + rcutmax + neighbor->skin; + if (comm->cutghostuser < mincutoff) { + char buffer[255]; + + //apparently mincutoff is 0 after sprintf command ????? + + double tmp = mincutoff + 0.1; + sprintf(buffer, "Communication cutoff is too small " + "for SNAP micro load balancing, increased to %lf", + mincutoff+0.1); + if (comm->me==0) + error->warning(FLERR,buffer); + + comm->cutghostuser = tmp; + + } + } + continue; + } + if (strcmp(arg[i],"schedule")==0) { + i++; + if (strcmp(arg[i],"static")==0) + schedule_user = 1; + if (strcmp(arg[i],"dynamic")==0) + schedule_user = 2; + if (strcmp(arg[i],"guided")==0) + schedule_user = 3; + if (strcmp(arg[i],"auto")==0) + schedule_user = 4; + if (strcmp(arg[i],"determine")==0) + schedule_user = 5; + if (schedule_user == 0) + error->all(FLERR,"Illegal pair_style command"); + continue; + } + error->all(FLERR,"Illegal pair_style command"); + } + + if (nthreads < 0) + nthreads = comm->nthreads; + + if (use_shared_arrays < 0) { + if (nthreads > 1 && atom->nlocal <= 2*nthreads) + use_shared_arrays = 1; + else use_shared_arrays = 0; + } + + // check if running non-optimized code with + // optimization flags set + + if (!use_optimized) + if (nthreads > 1 || + use_shared_arrays || + do_load_balance || + schedule_user) + error->all(FLERR,"Illegal pair_style command"); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairNNSNAP::coeff(int narg, char **arg) +{ + if (narg < 5) error->all(FLERR,"Incorrect args for pair coefficients"); + if (!allocated) allocate(); + + if (nelements) { + for (int i = 0; i < nelements; i++) + delete[] elements[i]; + delete[] elements; + memory->destroy(radelem); + memory->destroy(wjelem); + memory->destroy(coeffelem); + memory->destroy(beta); + } + + char* type1 = arg[0]; + char* type2 = arg[1]; + char* coefffilename = arg[2]; + char* paramfilename = arg[3]; + char** elemtypes = &arg[4]; + + // insure I,J args are * * + + if (strcmp(type1,"*") != 0 || strcmp(type2,"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read snapcoeff and snapparam files + + read_files(coefffilename,paramfilename); + + if (!quadraticflag) + ncoeff = ncoeffall - 1; + else { + + // ncoeffall should be (ncoeff+2)*(ncoeff+1)/2 + // so, ncoeff = floor(sqrt(2*ncoeffall))-1 + + ncoeff = sqrt(2*ncoeffall)-1; + ncoeffq = (ncoeff*(ncoeff+1))/2; + int ntmp = 1+ncoeff+ncoeffq; + if (ntmp != ncoeffall) { + printf("ncoeffall = %d ntmp = %d ncoeff = %d \n",ncoeffall,ntmp,ncoeff); + error->all(FLERR,"Incorrect SNAP coeff file"); + } + } + + // read args that map atom types to SNAP elements + // map[i] = which element the Ith atom type is, -1 if not mapped + // map[0] is not used + + for (int i = 1; i <= atom->ntypes; i++) { + char* elemname = elemtypes[i-1]; + int jelem; + for (jelem = 0; jelem < nelements; jelem++) + if (strcmp(elemname,elements[jelem]) == 0) + break; + + if (jelem < nelements) + map[i] = jelem; + else if (strcmp(elemname,"NULL") == 0) map[i] = -1; + else error->all(FLERR,"Incorrect args for pair coefficients"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + + int count = 0; + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + count++; + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); + + sna = new SNA*[nthreads]; + + // allocate memory for per OpenMP thread data which + // is wrapped into the sna class + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int tid = omp_get_thread_num(); + sna[tid] = new SNA(lmp,rfac0,twojmax, + diagonalstyle,use_shared_arrays, + rmin0,switchflag,bzeroflag); + if (!use_shared_arrays) + sna[tid]->grow_rij(nmax); + } + + if (ncoeff != sna[0]->ncoeff) { + if (comm->me == 0) + printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff); + error->all(FLERR,"Incorrect SNAP parameter file"); + } + + // Calculate maximum cutoff for all elements + + rcutmax = 0.0; + for (int ielem = 0; ielem < nelements; ielem++) + rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax); + +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairNNSNAP::init_style() +{ + if (force->newton_pair == 0) + error->all(FLERR,"Pair style SNAP requires newton pair on"); + + // need a full neighbor list + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int tid = omp_get_thread_num(); + sna[tid]->init(); + } + +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairNNSNAP::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + return (radelem[map[i]] + + radelem[map[j]])*rcutfac; +} + +/* ---------------------------------------------------------------------- */ + +void PairNNSNAP::read_files(char *coefffilename, char *paramfilename) +{ + + // open SNAP coefficient file on proc 0 + + FILE *fpcoeff; + if (comm->me == 0) { + fpcoeff = force->open_potential(coefffilename); + if (fpcoeff == NULL) { + char str[128]; + snprintf(str,128,"Cannot open SNAP coefficient file %s",coefffilename); + error->one(FLERR,str); + } + } + + char line[MAXLINE],*ptr; + int eof = 0; + + int n; + int nwords = 0; + while (nwords == 0) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcoeff); + if (ptr == NULL) { + eof = 1; + fclose(fpcoeff); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + if (nwords != 2) + error->all(FLERR,"Incorrect format in SNAP coefficient file"); + + // words = ptrs to all words in line + // strip single and double quotes from words + + char* words[MAXWORD]; + int iword = 0; + words[iword] = strtok(line,"' \t\n\r\f"); + iword = 1; + words[iword] = strtok(NULL,"' \t\n\r\f"); + + nelements = atoi(words[0]); + ncoeffall = atoi(words[1]); + + // set up element lists + + elements = new char*[nelements]; + memory->create(radelem,nelements,"pair:radelem"); + memory->create(wjelem,nelements,"pair:wjelem"); + memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem"); + + // Loop over nelements blocks in the SNAP coefficient file + + for (int ielem = 0; ielem < nelements; ielem++) { + + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcoeff); + if (ptr == NULL) { + eof = 1; + fclose(fpcoeff); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) + error->all(FLERR,"Incorrect format in SNAP coefficient file"); + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + nwords = atom->count_words(line); + if (nwords != 3) + error->all(FLERR,"Incorrect format in SNAP coefficient file"); + + iword = 0; + words[iword] = strtok(line,"' \t\n\r\f"); + iword = 1; + words[iword] = strtok(NULL,"' \t\n\r\f"); + iword = 2; + words[iword] = strtok(NULL,"' \t\n\r\f"); + + char* elemtmp = words[0]; + int n = strlen(elemtmp) + 1; + elements[ielem] = new char[n]; + strcpy(elements[ielem],elemtmp); + + radelem[ielem] = atof(words[1]); + wjelem[ielem] = atof(words[2]); + + + if (comm->me == 0) { + if (screen) fprintf(screen,"SNAP Element = %s, Radius %g, Weight %g \n", + elements[ielem], radelem[ielem], wjelem[ielem]); + if (logfile) fprintf(logfile,"SNAP Element = %s, Radius %g, Weight %g \n", + elements[ielem], radelem[ielem], wjelem[ielem]); + } + + for (int icoeff = 0; icoeff < ncoeffall; icoeff++) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpcoeff); + if (ptr == NULL) { + eof = 1; + fclose(fpcoeff); + } else n = strlen(line) + 1; + } + + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) + error->all(FLERR,"Incorrect format in SNAP coefficient file"); + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + nwords = atom->count_words(line); + if (nwords != 1) + error->all(FLERR,"Incorrect format in SNAP coefficient file"); + + iword = 0; + words[iword] = strtok(line,"' \t\n\r\f"); + + coeffelem[ielem][icoeff] = atof(words[0]); + + } + } + + // set flags for required keywords + + rcutfacflag = 0; + twojmaxflag = 0; + + // Set defaults for optional keywords + + rfac0 = 0.99363; + rmin0 = 0.0; + diagonalstyle = 3; + switchflag = 1; + bzeroflag = 1; + quadraticflag = 0; + + // open SNAP parameter file on proc 0 + + FILE *fpparam; + if (comm->me == 0) { + fpparam = force->open_potential(paramfilename); + if (fpparam == NULL) { + char str[128]; + snprintf(str,128,"Cannot open SNAP parameter file %s",paramfilename); + error->one(FLERR,str); + } + } + + eof = 0; + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fpparam); + if (ptr == NULL) { + eof = 1; + fclose(fpparam); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + if (nwords != 2) + error->all(FLERR,"Incorrect format in SNAP parameter file"); + + // words = ptrs to all words in line + // strip single and double quotes from words + + char* keywd = strtok(line,"' \t\n\r\f"); + char* keyval = strtok(NULL,"' \t\n\r\f"); + + if (comm->me == 0) { + if (screen) fprintf(screen,"SNAP keyword %s %s \n",keywd,keyval); + if (logfile) fprintf(logfile,"SNAP keyword %s %s \n",keywd,keyval); + } + + if (strcmp(keywd,"rcutfac") == 0) { + rcutfac = atof(keyval); + rcutfacflag = 1; + } else if (strcmp(keywd,"twojmax") == 0) { + twojmax = atoi(keyval); + twojmaxflag = 1; + } else if (strcmp(keywd,"rfac0") == 0) + rfac0 = atof(keyval); + else if (strcmp(keywd,"rmin0") == 0) + rmin0 = atof(keyval); + else if (strcmp(keywd,"diagonalstyle") == 0) + diagonalstyle = atoi(keyval); + else if (strcmp(keywd,"switchflag") == 0) + switchflag = atoi(keyval); + else if (strcmp(keywd,"bzeroflag") == 0) + bzeroflag = atoi(keyval); + else if (strcmp(keywd,"quadraticflag") == 0) + quadraticflag = atoi(keyval); + else + error->all(FLERR,"Incorrect SNAP parameter file"); + } + + if (rcutfacflag == 0 || twojmaxflag == 0) + error->all(FLERR,"Incorrect SNAP parameter file"); + +} + +/* ---------------------------------------------------------------------- + memory usage +------------------------------------------------------------------------- */ + +double PairNNSNAP::memory_usage() +{ + double bytes = Pair::memory_usage(); + int n = atom->ntypes+1; + bytes += n*n*sizeof(int); + bytes += n*n*sizeof(double); + bytes += 3*nmax*sizeof(double); + bytes += nmax*sizeof(int); + bytes += (2*ncoeffall)*sizeof(double); + bytes += (ncoeff*3)*sizeof(double); + bytes += sna[0]->memory_usage()*nthreads; + return bytes; +} + diff --git a/src/SNAP/pair_nn_snap.h b/src/SNAP/pair_nn_snap.h new file mode 100644 index 0000000000..f77ddee207 --- /dev/null +++ b/src/SNAP/pair_nn_snap.h @@ -0,0 +1,184 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(nn/snap,PairNNSNAP) + +#else + +#ifndef LMP_PAIR_NN_SNAP_H +#define LMP_PAIR_NN_SNAP_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairNNSNAP : public Pair { +public: + PairNNSNAP(class LAMMPS *); + ~PairNNSNAP(); + virtual void compute(int, int); + void compute_regular(int, int); + void compute_optimized(int, int); + void settings(int, char **); + virtual void coeff(int, char **); + virtual void init_style(); + virtual double init_one(int, int); + virtual double memory_usage(); + + double rcutfac, quadraticflag; // declared public to workaround gcc 4.9 + int ncoeff; // compiler bug, manifest in KOKKOS package + +protected: + int ncoeffq, ncoeffall; + double **bvec, ***dbvec; + class SNA** sna; + int nmax; + int nthreads; + virtual void allocate(); + void read_files(char *, char *); + inline int equal(double* x,double* y); + inline double dist2(double* x,double* y); + double extra_cutoff(); + void load_balance(); + void set_sna_to_shared(int snaid,int i); + void build_per_atom_arrays(); + + void compute_beta(); + + int schedule_user; + double schedule_time_guided; + double schedule_time_dynamic; + + int ncalls_neigh; + int do_load_balance; + int ilistmask_max; + int* ilistmask; + int ghostinum; + int ghostilist_max; + int* ghostilist; + int ghostnumneigh_max; + int* ghostnumneigh; + int* ghostneighs; + int* ghostfirstneigh; + int ghostneighs_total; + int ghostneighs_max; + + int use_optimized; + int use_shared_arrays; + + int i_max; + int i_neighmax; + int i_numpairs; + int **i_pairs; + double ***i_rij; + int **i_inside; + double **i_wj; + double **i_rcutij; + int *i_ninside; + double ****i_uarraytot_r, ****i_uarraytot_i; + double ******i_zarray_r, ******i_zarray_i; + +#ifdef TIMING_INFO + // timespec starttime, endtime; + double timers[4]; +#endif + + double rcutmax; // max cutoff for all elements + int nelements; // # of unique elements + char **elements; // names of unique elements + double *radelem; // element radii + double *wjelem; // elements weights + double **coeffelem; // element bispectrum coefficients + double** beta; // betas for all atoms in list + int *map; // mapping from atom types to elements + int twojmax, diagonalstyle, switchflag, bzeroflag; + double rfac0, rmin0, wj1, wj2; + int rcutfacflag, twojmaxflag; // flags for required parameters + int beta_max; // length of beta +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Communication cutoff too small for SNAP micro load balancing + +This can happen if you change the neighbor skin after your pair_style +command or if your box dimensions grow during a run. You can set the +cutoff explicitly via the comm_modify cutoff command. + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Must set number of threads via package omp command + +Because you are using the USER-OMP package, set the number of threads +via its settings, not by the pair_style snap nthreads setting. + +W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf + +Self-explanatory. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Incorrect SNAP coeff file + +UNDOCUMENTED + +E: Incorrect SNAP parameter file + +The file cannot be parsed correctly, check its internal syntax. + +E: Pair style SNAP requires newton pair on + +See the newton command. This is a restriction to use the SNAP +potential. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Cannot open SNAP coefficient file %s + +The specified SNAP coefficient file cannot be opened. Check that the +path and name are correct. + +E: Incorrect format in SNAP coefficient file + +Incorrect number of words per line in the coefficient file. + +E: Cannot open SNAP parameter file %s + +The specified SNAP parameter file cannot be opened. Check that the +path and name are correct. + +E: Incorrect format in SNAP parameter file + +Incorrect number of words per line in the parameter file. + +E: Did not find all elements in SNAP coefficient file. + +One or more elements listed in the pair_coeff command were not found in the coefficient file. + +*/ From f2d881470d0edfbe68adc12204ddd1f44621d069 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Sat, 11 May 2019 12:55:11 -0600 Subject: [PATCH 05/21] Added placeholder for neural network SNAP potential --- examples/snap/W.nnsnap | 16 +++++++++++++++ examples/snap/in.nnsnap | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 examples/snap/W.nnsnap create mode 100644 examples/snap/in.nnsnap diff --git a/examples/snap/W.nnsnap b/examples/snap/W.nnsnap new file mode 100644 index 0000000000..93c2bf866a --- /dev/null +++ b/examples/snap/W.nnsnap @@ -0,0 +1,16 @@ +# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. "Quantum-Accurate Molecular Dynamics Potential for Tungsten" arXiv:1702.07042 [physics.comp-ph] +# +# Definition of SNAP+ZBL potential. +variable zblcutinner equal 4 +variable zblcutouter equal 4.8 +variable zblz equal 74 + +# Specify hybrid with SNAP and ZBL + +pair_style hybrid/overlay & +zbl ${zblcutinner} ${zblcutouter} & +snap +pair_coeff 1 1 zbl ${zblz} ${zblz} +pair_coeff * * snap W_2940_2017_2.snapcoeff W_2940_2017_2.snapparam W + +#Nomenclature on the snap files are Element_DakotaID_Year_Month diff --git a/examples/snap/in.nnsnap b/examples/snap/in.nnsnap new file mode 100644 index 0000000000..d575757d56 --- /dev/null +++ b/examples/snap/in.nnsnap @@ -0,0 +1,45 @@ +# Demonstrate SNAP Ta potential + +# Initialize simulation + +variable nsteps index 100 +variable nrep equal 4 +variable a equal 3.1803 +units metal + +# generate the box and atom positions using a BCC lattice + +variable nx equal ${nrep} +variable ny equal ${nrep} +variable nz equal ${nrep} + +boundary p p p + +lattice bcc $a +region box block 0 ${nx} 0 ${ny} 0 ${nz} +create_box 1 box +create_atoms 1 box + +mass 1 183.84 + +# choose potential + +include W.nnsnap + +# Setup output + +thermo 10 +thermo_modify norm yes + +# Set up NVE run + +timestep 0.5e-3 +neighbor 1.0 bin +neigh_modify once no every 1 delay 0 check yes + +# Run MD + +velocity all create 300.0 4928459 +fix 1 all nve +run ${nsteps} + From a0cc6b5b59a41b24f646d218551cac1ff47b7566 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Sat, 11 May 2019 14:04:21 -0600 Subject: [PATCH 06/21] Forgot to change pair style to nn/snap --- examples/snap/W.nnsnap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/snap/W.nnsnap b/examples/snap/W.nnsnap index 93c2bf866a..6ca97a701a 100644 --- a/examples/snap/W.nnsnap +++ b/examples/snap/W.nnsnap @@ -9,8 +9,8 @@ variable zblz equal 74 pair_style hybrid/overlay & zbl ${zblcutinner} ${zblcutouter} & -snap +nn/snap pair_coeff 1 1 zbl ${zblz} ${zblz} -pair_coeff * * snap W_2940_2017_2.snapcoeff W_2940_2017_2.snapparam W +pair_coeff * * nn/snap W_2940_2017_2.snapcoeff W_2940_2017_2.snapparam W #Nomenclature on the snap files are Element_DakotaID_Year_Month From 98d9c45ad97be8e2f26a7310d2f0627429dc4a43 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Wed, 15 May 2019 17:18:24 -0600 Subject: [PATCH 07/21] compute_bispectrum --- src/SNAP/pair_snap.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 0bf367b5dc..4913044369 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -1319,6 +1319,28 @@ void PairSNAP::compute_beta() } } +/* ---------------------------------------------------------------------- + compute bispectrum +------------------------------------------------------------------------- */ + +void PairSNAP::compute_bispectrum() +{ + int i; + int *type = atom->type; + + for (int ii = 0; ii < list->inum; ii++) { + i = list->ilist[ii]; + const int itype = type[i]; + const int ielem = map[itype]; + double* coeffi = coeffelem[ielem]; + snaptr->compute_bi(); + snaptr->copy_bi2bvec(); + + for (int k = 0; k < ncoeff; k++) + bispectrum[ii][k] = snaptr->bvec[k]; + } +} + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ From 5b71b3fc57f8cc9bd935202f35db0538006e37e9 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 16 May 2019 21:51:24 -0600 Subject: [PATCH 08/21] Added bispectrum compute --- src/SNAP/pair_snap.cpp | 480 +++++------------------------------------ src/SNAP/pair_snap.h | 2 + 2 files changed, 56 insertions(+), 426 deletions(-) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 4913044369..f3b678971d 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -192,11 +192,13 @@ void PairSNAP::compute_regular(int eflag, int vflag) if (beta_max < list->inum) { memory->grow(beta,list->inum,ncoeff,"PairSNAP:beta"); + memory->grow(bispectrum,list->inum,ncoeff,"PairSNAP:bispectrum"); beta_max = list->inum; } // compute dE_i/dB_i = beta_i for all i in list + compute_bispectrum(); compute_beta(); numneigh = list->numneigh; @@ -251,10 +253,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) snaptr->compute_ui(ninside); snaptr->compute_zi(); - if (quadraticflag) { - snaptr->compute_bi(); - snaptr->copy_bi2bvec(); - } // for neighbors of I within cutoff: // compute Fij = dEi/dRj = -dEi/dRi @@ -269,31 +267,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) snaptr->compute_duidrj(snaptr->rij[jj], snaptr->wj[jj],snaptr->rcutij[jj]); -// // quadratic contributions - -// if (quadraticflag) { -// int k = ncoeff+1; -// for (int icoeff = 0; icoeff < ncoeff; icoeff++) { -// double bveci = snaptr->bvec[icoeff]; -// double fack = coeffi[k]*bveci; -// double* dbveci = snaptr->dbvec[icoeff]; -// fij[0] += fack*dbveci[0]; -// fij[1] += fack*dbveci[1]; -// fij[2] += fack*dbveci[2]; -// k++; -// for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { -// double facki = coeffi[k]*bveci; -// double fackj = coeffi[k]*snaptr->bvec[jcoeff]; -// double* dbvecj = snaptr->dbvec[jcoeff]; - -// fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; -// fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; -// fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; -// k++; -// } -// } -// } - snaptr->compute_deidrj(fij); f[i][0] += fij[0]; @@ -320,10 +293,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) double* coeffi = coeffelem[ielem]; evdwl = coeffi[0]; - if (!quadraticflag) { - snaptr->compute_bi(); - snaptr->copy_bi2bvec(); - } // E = beta.B + 0.5*B^t.alpha.B // coeff[k] = beta[k-1] or @@ -332,21 +301,9 @@ void PairSNAP::compute_regular(int eflag, int vflag) // linear contributions - for (int k = 1; k <= ncoeff; k++) - evdwl += coeffi[k]*snaptr->bvec[k-1]; + for (int k = 0; k < ncoeff; k++) + evdwl += beta[ii][k]*bispectrum[ii][k]; - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = snaptr->bvec[icoeff]; - evdwl += 0.5*coeffi[k++]*bveci*bveci; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += coeffi[k++]*bveci*snaptr->bvec[jcoeff]; - } - } - } ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); } @@ -355,383 +312,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) if (vflag_fdotr) virial_fdotr_compute(); } - -/* ---------------------------------------------------------------------- - This version is optimized for threading, micro-load balancing - ---------------------------------------------------------------------- */ - -void PairSNAP::compute_optimized(int eflag, int vflag) -{ - // if reneighboring took place do load_balance if requested - if (do_load_balance > 0 && - (neighbor->ncalls != ncalls_neigh)) { - ghostinum = 0; - // reset local ghost neighbor lists - ncalls_neigh = neighbor->ncalls; - if (ilistmask_max < list->inum) { - memory->grow(ilistmask,list->inum,"PairSnap::ilistmask"); - ilistmask_max = list->inum; - } - for (int i = 0; i < list->inum; i++) - ilistmask[i] = 1; - - //multiple passes for loadbalancing - for (int i = 0; i < do_load_balance; i++) - load_balance(); - } - - int numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - numpairs += jnum; - } - } - - if (do_load_balance) - for (int ii = 0; ii < ghostinum; ii++) { - int i = ghostilist[ii]; - int jnum = ghostnumneigh[i]; - numpairs += jnum; - } - - // optimized schedule setting - - int time_dynamic = 0; - int time_guided = 0; - - if (schedule_user == 0) schedule_user = 4; - - switch (schedule_user) { - case 1: - omp_set_schedule(omp_sched_static,1); - break; - case 2: - omp_set_schedule(omp_sched_dynamic,1); - break; - case 3: - omp_set_schedule(omp_sched_guided,2); - break; - case 4: - omp_set_schedule(omp_sched_auto,0); - break; - case 5: - if (numpairs < 8*nthreads) omp_set_schedule(omp_sched_dynamic,1); - else if (schedule_time_guided < 0.0) { - omp_set_schedule(omp_sched_guided,2); - if (!eflag && !vflag) time_guided = 1; - } else if (schedule_time_dynamic<0.0) { - omp_set_schedule(omp_sched_dynamic,1); - if (!eflag && !vflag) time_dynamic = 1; - } else if (schedule_time_guidedcreate(pairs_tid_unique,numpairs,4,"numpairs"); - pairs = pairs_tid_unique; - } - - if (!use_shared_arrays) { - numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - for (int jj = 0; jjx; - double **f = atom->f; - int *type = atom->type; - int nlocal = atom->nlocal; - int newton_pair = force->newton_pair; - - numneigh = list->numneigh; - firstneigh = list->firstneigh; - -#ifdef TIMING_INFO - // only update micro timers after setup - static int count=0; - if (count<2) { - sna[tid]->timers[0] = 0; - sna[tid]->timers[1] = 0; - sna[tid]->timers[2] = 0; - sna[tid]->timers[3] = 0; - sna[tid]->timers[4] = 0; - } - count++; -#endif - - // did thread start working on interactions of new atom - int iold = -1; - - double starttime, endtime; - if (time_dynamic || time_guided) - starttime = MPI_Wtime(); - -#if defined(_OPENMP) -#pragma omp for schedule(runtime) -#endif - for (int iijj = 0; iijj < numpairs; iijj++) { - int i = 0; - if (use_shared_arrays) { - i = i_pairs[iijj][0]; - if (iold != i) { - set_sna_to_shared(tid,i_pairs[iijj][3]); - ielem = map[type[i]]; - } - iold = i; - } else { - i = pairs[iijj][0]; - if (iold != i) { - iold = i; - const double xtmp = x[i][0]; - const double ytmp = x[i][1]; - const double ztmp = x[i][2]; - const int itype = type[i]; - ielem = map[itype]; - const double radi = radelem[ielem]; - - if (i < nlocal) { - jlist = firstneigh[i]; - jnum = numneigh[i]; - } else { - jlist = ghostneighs+ghostfirstneigh[i]; - jnum = ghostnumneigh[i]; - } - - // insure rij, inside, wj, and rcutij are of size jnum - - sna[tid]->grow_rij(jnum); - - // rij[][3] = displacements between atom I and those neighbors - // inside = indices of neighbors of I within cutoff - // wj = weights of neighbors of I within cutoff - // rcutij = cutoffs of neighbors of I within cutoff - // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi - - ninside = 0; - for (jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - delx = x[j][0] - xtmp; //unitialised - dely = x[j][1] - ytmp; - delz = x[j][2] - ztmp; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - int jelem = map[jtype]; - - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { //unitialised - sna[tid]->rij[ninside][0] = delx; - sna[tid]->rij[ninside][1] = dely; - sna[tid]->rij[ninside][2] = delz; - sna[tid]->inside[ninside] = j; - sna[tid]->wj[ninside] = wjelem[jelem]; - sna[tid]->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; - ninside++; - - // update index list with inside index - pairs[iijj + (jj - pairs[iijj][1])][2] = - ninside-1; //unitialised - } - } - - // compute Ui and Zi for atom I - - sna[tid]->compute_ui(ninside); //unitialised - sna[tid]->compute_zi(); - } - } - if (quadraticflag) { - sna[tid]->compute_bi(); - sna[tid]->copy_bi2bvec(); - } - - // for neighbors of I within cutoff: - // compute dUi/drj and dBi/drj - // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj - - // entry into loop if inside index is set - - double* coeffi = coeffelem[ielem]; - - if (pairs[iijj][2] >= 0) { - jj = pairs[iijj][2]; - int j = sna[tid]->inside[jj]; - sna[tid]->compute_duidrj(sna[tid]->rij[jj], - sna[tid]->wj[jj],sna[tid]->rcutij[jj]); - - sna[tid]->compute_dbidrj(); - sna[tid]->copy_dbi2dbvec(); - - fij[0] = 0.0; - fij[1] = 0.0; - fij[2] = 0.0; - - // linear contributions - - for (k = 1; k <= ncoeff; k++) { - double bgb = coeffi[k]; - fij[0] += bgb*sna[tid]->dbvec[k-1][0]; - fij[1] += bgb*sna[tid]->dbvec[k-1][1]; - fij[2] += bgb*sna[tid]->dbvec[k-1][2]; - } - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = sna[tid]->bvec[icoeff]; - double fack = coeffi[k]*bveci; - double* dbveci = sna[tid]->dbvec[icoeff]; - fij[0] += fack*sna[tid]->dbvec[icoeff][0]; - fij[1] += fack*sna[tid]->dbvec[icoeff][1]; - fij[2] += fack*sna[tid]->dbvec[icoeff][2]; - k++; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double facki = coeffi[k]*bveci; - double fackj = coeffi[k]*sna[tid]->bvec[jcoeff]; - double* dbvecj = sna[tid]->dbvec[jcoeff]; - fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; - fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; - fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; - k++; - } - } - } - -#if defined(_OPENMP) -#pragma omp critical -#endif - { - f[i][0] += fij[0]; - f[i][1] += fij[1]; - f[i][2] += fij[2]; - f[j][0] -= fij[0]; - f[j][1] -= fij[1]; - f[j][2] -= fij[2]; - - // tally per-atom virial contribution - - if (vflag) - ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, - fij[0],fij[1],fij[2], - -sna[tid]->rij[jj][0],-sna[tid]->rij[jj][1], - -sna[tid]->rij[jj][2]); - } - } - - // evdwl = energy of atom I, sum over coeffs_k * Bi_k - // only call this for first pair of each atom i - // if atom has no pairs, eatom=0, which is wrong - - if (eflag&&pairs[iijj][1] == 0) { - evdwl = coeffi[0]; - - if (!quadraticflag) { - sna[tid]->compute_bi(); - sna[tid]->copy_bi2bvec(); - } - - // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i - - // linear contributions - - for (int k = 1; k <= ncoeff; k++) - evdwl += coeffi[k]*sna[tid]->bvec[k-1]; - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = sna[tid]->bvec[icoeff]; - evdwl += 0.5*coeffi[k++]*bveci*bveci; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += coeffi[k++]*bveci*sna[tid]->bvec[jcoeff]; - } - } - } - -#if defined(_OPENMP) -#pragma omp critical -#endif - ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); - } - - } - if (time_dynamic || time_guided) - endtime = MPI_Wtime(); - if (time_dynamic) schedule_time_dynamic = endtime - starttime; - if (time_guided) schedule_time_guided = endtime - starttime; - if (!use_shared_arrays) memory->destroy(pairs); - - }// end of pragma omp parallel - - if (vflag_fdotr) virial_fdotr_compute(); - -} - inline int PairSNAP::equal(double* x,double* y) { double dist2 = @@ -1325,14 +905,61 @@ void PairSNAP::compute_beta() void PairSNAP::compute_bispectrum() { - int i; + int i,j,jnum,ninside; + double delx,dely,delz,rsq; + int *jlist,*numneigh,**firstneigh; + + double **x = atom->x; int *type = atom->type; + class SNA* snaptr = sna[0]; for (int ii = 0; ii < list->inum; ii++) { i = list->ilist[ii]; + + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; const int itype = type[i]; const int ielem = map[itype]; - double* coeffi = coeffelem[ielem]; + const double radi = radelem[ielem]; + + jlist = list->firstneigh[i]; + jnum = list->numneigh[i]; + + // insure rij, inside, wj, and rcutij are of size jnum + + snaptr->grow_rij(jnum); + + // rij[][3] = displacements between atom I and those neighbors + // inside = indices of neighbors of I within cutoff + // wj = weights for neighbors of I within cutoff + // rcutij = cutoffs for neighbors of I within cutoff + // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi + + ninside = 0; + for (int jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + delx = x[j][0] - xtmp; + dely = x[j][1] - ytmp; + delz = x[j][2] - ztmp; + rsq = delx*delx + dely*dely + delz*delz; + int jtype = type[j]; + int jelem = map[jtype]; + + if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jelem]; + snaptr->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; + ninside++; + } + } + + snaptr->compute_ui(ninside); + snaptr->compute_zi(); snaptr->compute_bi(); snaptr->copy_bi2bvec(); @@ -1470,6 +1097,7 @@ void PairSNAP::coeff(int narg, char **arg) memory->destroy(wjelem); memory->destroy(coeffelem); memory->destroy(beta); + memory->destroy(bispectrum); } char* type1 = arg[0]; diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h index 94d21162e2..1453076b23 100644 --- a/src/SNAP/pair_snap.h +++ b/src/SNAP/pair_snap.h @@ -56,6 +56,7 @@ protected: void build_per_atom_arrays(); void compute_beta(); + void compute_bispectrum(); int schedule_user; double schedule_time_guided; @@ -102,6 +103,7 @@ protected: double *wjelem; // elements weights double **coeffelem; // element bispectrum coefficients double** beta; // betas for all atoms in list + double** bispectrum; // bispectrum components for all atoms in list int *map; // mapping from atom types to elements int twojmax, diagonalstyle, switchflag, bzeroflag; double rfac0, rmin0, wj1, wj2; From 51a6bfd579722cce8bc8f00de63ba08ff37ca9e4 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 16 May 2019 22:01:45 -0600 Subject: [PATCH 09/21] Added bispectrum compute --- src/SNAP/pair_snap.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index f3b678971d..7c81eb0e8a 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -102,6 +102,8 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) sna = NULL; beta_max = 0; + beta = NULL; + bispectrum = NULL; } /* ---------------------------------------------------------------------- */ From 803e0631c5e51ed5bdacebb6dbec9e5118d36954 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 16 May 2019 22:11:06 -0600 Subject: [PATCH 10/21] Added bispectrum compute --- src/SNAP/pair_snap.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 7c81eb0e8a..86e709ba03 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -119,8 +119,9 @@ PairSNAP::~PairSNAP() memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(coeffelem); - memory->destroy(beta); } + memory->destroy(beta); + memory->destroy(bispectrum); // Need to set this because restart not handled by PairHybrid @@ -1098,9 +1099,9 @@ void PairSNAP::coeff(int narg, char **arg) memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(coeffelem); - memory->destroy(beta); - memory->destroy(bispectrum); } + memory->destroy(beta); + memory->destroy(bispectrum); char* type1 = arg[0]; char* type2 = arg[1]; From 960a975e2a6c2125215b4945bc7abb1163a29e91 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Mon, 3 Jun 2019 19:50:40 -0600 Subject: [PATCH 11/21] Added compact arrays, removed unused openmp stuff --- src/SNAP/compute_sna_atom.cpp | 63 +-- src/SNAP/compute_sna_atom.h | 3 +- src/SNAP/compute_snad_atom.cpp | 101 ++--- src/SNAP/compute_snad_atom.h | 3 +- src/SNAP/compute_snav_atom.cpp | 112 ++--- src/SNAP/compute_snav_atom.h | 3 +- src/SNAP/openmp_snap.h | 16 - src/SNAP/pair_snap.cpp | 783 +-------------------------------- src/SNAP/pair_snap.h | 56 +-- src/SNAP/sna.cpp | 748 ++++++++----------------------- src/SNAP/sna.h | 58 +-- 11 files changed, 332 insertions(+), 1614 deletions(-) delete mode 100644 src/SNAP/openmp_snap.h diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp index 5ca63a7e85..17774143d5 100644 --- a/src/SNAP/compute_sna_atom.cpp +++ b/src/SNAP/compute_sna_atom.cpp @@ -25,7 +25,6 @@ #include "comm.h" #include "memory.h" #include "error.h" -#include "openmp_snap.h" using namespace LAMMPS_NS; @@ -115,20 +114,10 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute sna/atom command"); } - nthreads = comm->nthreads; - snaptr = new SNA*[nthreads]; -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag) -#endif - { - int tid = omp_get_thread_num(); + snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle, + rmin0,switchflag,bzeroflag); - // always unset use_shared_arrays since it does not work with computes - snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle, - 0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag); - } - - ncoeff = snaptr[0]->ncoeff; + ncoeff = snaptr->ncoeff; size_peratom_cols = ncoeff; if (quadraticflag) size_peratom_cols += (ncoeff*(ncoeff+1))/2; peratom_flag = 1; @@ -136,7 +125,6 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : nmax = 0; njmax = 0; sna = NULL; - } /* ---------------------------------------------------------------------- */ @@ -147,9 +135,7 @@ ComputeSNAAtom::~ComputeSNAAtom() memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(cutsq); - for (int tid = 0; tidcompute[i]->style,"sna/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute sna/atom"); -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - snaptr[tid]->init(); - } + snaptr->init(); } /* ---------------------------------------------------------------------- */ @@ -223,11 +203,7 @@ void ComputeSNAAtom::compute_peratom() double** const x = atom->x; const int* const mask = atom->mask; -#if defined(_OPENMP) -#pragma omp parallel for default(none) -#endif for (int ii = 0; ii < inum; ii++) { - const int tid = omp_get_thread_num(); const int i = ilist[ii]; if (mask[i] & groupbit) { @@ -241,7 +217,7 @@ void ComputeSNAAtom::compute_peratom() // insure rij, inside, and typej are of size jnum - snaptr[tid]->grow_rij(jnum); + snaptr->grow_rij(jnum); // rij[][3] = displacements between atom I and those neighbors // inside = indices of neighbors of I within cutoff @@ -258,26 +234,26 @@ void ComputeSNAAtom::compute_peratom() const double rsq = delx*delx + dely*dely + delz*delz; int jtype = type[j]; if (rsq < cutsq[itype][jtype] && rsq>1e-20) { - snaptr[tid]->rij[ninside][0] = delx; - snaptr[tid]->rij[ninside][1] = dely; - snaptr[tid]->rij[ninside][2] = delz; - snaptr[tid]->inside[ninside] = j; - snaptr[tid]->wj[ninside] = wjelem[jtype]; - snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jtype]; + snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; ninside++; } } - snaptr[tid]->compute_ui(ninside); - snaptr[tid]->compute_zi(); - snaptr[tid]->compute_bi(); - snaptr[tid]->copy_bi2bvec(); + snaptr->compute_ui(ninside); + snaptr->compute_zi(); + snaptr->compute_bi(); + snaptr->copy_bi2bvec(); for (int icoeff = 0; icoeff < ncoeff; icoeff++) - sna[i][icoeff] = snaptr[tid]->bvec[icoeff]; + sna[i][icoeff] = snaptr->bvec[icoeff]; if (quadraticflag) { int ncount = ncoeff; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr[tid]->bvec[icoeff]; + double bi = snaptr->bvec[icoeff]; // diagonal element of quadratic matrix @@ -286,7 +262,7 @@ void ComputeSNAAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) - sna[i][ncount++] = bi*snaptr[tid]->bvec[jcoeff]; + sna[i][ncount++] = bi*snaptr->bvec[jcoeff]; } } } else { @@ -305,7 +281,6 @@ double ComputeSNAAtom::memory_usage() double bytes = nmax*size_peratom_cols * sizeof(double); bytes += 3*njmax*sizeof(double); bytes += njmax*sizeof(int); - bytes += snaptr[0]->memory_usage()*comm->nthreads; return bytes; } diff --git a/src/SNAP/compute_sna_atom.h b/src/SNAP/compute_sna_atom.h index 2f6fb18996..56ffccfa7e 100644 --- a/src/SNAP/compute_sna_atom.h +++ b/src/SNAP/compute_sna_atom.h @@ -42,10 +42,9 @@ class ComputeSNAAtom : public Compute { double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; + class SNA* snaptr; double cutmax; int quadraticflag; - int nthreads; }; } diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp index b0395d5317..b356d61d3d 100644 --- a/src/SNAP/compute_snad_atom.cpp +++ b/src/SNAP/compute_snad_atom.cpp @@ -25,7 +25,6 @@ #include "comm.h" #include "memory.h" #include "error.h" -#include "openmp_snap.h" using namespace LAMMPS_NS; @@ -113,20 +112,10 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute snad/atom command"); } - nthreads = comm->nthreads; - snaptr = new SNA*[nthreads]; -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag) -#endif - { - int tid = omp_get_thread_num(); + snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle, + rmin0,switchflag,bzeroflag); - // always unset use_shared_arrays since it does not work with computes - snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle, - 0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag); - } - - ncoeff = snaptr[0]->ncoeff; + ncoeff = snaptr->ncoeff; nperdim = ncoeff; if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2; yoffset = nperdim; @@ -138,7 +127,6 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : nmax = 0; njmax = 0; snad = NULL; - } /* ---------------------------------------------------------------------- */ @@ -149,9 +137,7 @@ ComputeSNADAtom::~ComputeSNADAtom() memory->destroy(radelem); memory->destroy(wjelem); memory->destroy(cutsq); - for (int tid = 0; tidcompute[i]->style,"snad/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute snad/atom"); -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - snaptr[tid]->init(); - } + snaptr->init(); } /* ---------------------------------------------------------------------- */ @@ -235,11 +215,7 @@ void ComputeSNADAtom::compute_peratom() double** const x = atom->x; const int* const mask = atom->mask; -#if defined(_OPENMP) -#pragma omp parallel for default(none) -#endif for (int ii = 0; ii < inum; ii++) { - const int tid = omp_get_thread_num(); const int i = ilist[ii]; if (mask[i] & groupbit) { @@ -258,7 +234,7 @@ void ComputeSNADAtom::compute_peratom() // insure rij, inside, and typej are of size jnum - snaptr[tid]->grow_rij(jnum); + snaptr->grow_rij(jnum); // rij[][3] = displacements between atom I and those neighbors // inside = indices of neighbors of I within cutoff @@ -276,30 +252,30 @@ void ComputeSNADAtom::compute_peratom() const double rsq = delx*delx + dely*dely + delz*delz; int jtype = type[j]; if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - snaptr[tid]->rij[ninside][0] = delx; - snaptr[tid]->rij[ninside][1] = dely; - snaptr[tid]->rij[ninside][2] = delz; - snaptr[tid]->inside[ninside] = j; - snaptr[tid]->wj[ninside] = wjelem[jtype]; - snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jtype]; + snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; ninside++; } } - snaptr[tid]->compute_ui(ninside); - snaptr[tid]->compute_zi(); + snaptr->compute_ui(ninside); + snaptr->compute_zi(); if (quadraticflag) { - snaptr[tid]->compute_bi(); - snaptr[tid]->copy_bi2bvec(); + snaptr->compute_bi(); + snaptr->copy_bi2bvec(); } for (int jj = 0; jj < ninside; jj++) { - const int j = snaptr[tid]->inside[jj]; - snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj], - snaptr[tid]->wj[jj], - snaptr[tid]->rcutij[jj]); - snaptr[tid]->compute_dbidrj(); - snaptr[tid]->copy_dbi2dbvec(); + const int j = snaptr->inside[jj]; + snaptr->compute_duidrj(snaptr->rij[jj], + snaptr->wj[jj], + snaptr->rcutij[jj]); + snaptr->compute_dbidrj(); + snaptr->copy_dbi2dbvec(); // Accumulate -dBi/dRi, -dBi/dRj @@ -307,12 +283,12 @@ void ComputeSNADAtom::compute_peratom() double *snadj = snad[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0]; - snadi[icoeff+yoffset] += snaptr[tid]->dbvec[icoeff][1]; - snadi[icoeff+zoffset] += snaptr[tid]->dbvec[icoeff][2]; - snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]; - snadj[icoeff+yoffset] -= snaptr[tid]->dbvec[icoeff][1]; - snadj[icoeff+zoffset] -= snaptr[tid]->dbvec[icoeff][2]; + snadi[icoeff] += snaptr->dbvec[icoeff][0]; + snadi[icoeff+yoffset] += snaptr->dbvec[icoeff][1]; + snadi[icoeff+zoffset] += snaptr->dbvec[icoeff][2]; + snadj[icoeff] -= snaptr->dbvec[icoeff][0]; + snadj[icoeff+yoffset] -= snaptr->dbvec[icoeff][1]; + snadj[icoeff+zoffset] -= snaptr->dbvec[icoeff][2]; } if (quadraticflag) { @@ -321,10 +297,10 @@ void ComputeSNADAtom::compute_peratom() snadj += quadraticoffset; int ncount = 0; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr[tid]->bvec[icoeff]; - double bix = snaptr[tid]->dbvec[icoeff][0]; - double biy = snaptr[tid]->dbvec[icoeff][1]; - double biz = snaptr[tid]->dbvec[icoeff][2]; + double bi = snaptr->bvec[icoeff]; + double bix = snaptr->dbvec[icoeff][0]; + double biy = snaptr->dbvec[icoeff][1]; + double biz = snaptr->dbvec[icoeff][2]; // diagonal elements of quadratic matrix @@ -343,12 +319,12 @@ void ComputeSNADAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0] - + bix*snaptr[tid]->bvec[jcoeff]; - double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1] - + biy*snaptr[tid]->bvec[jcoeff]; - double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2] - + biz*snaptr[tid]->bvec[jcoeff]; + double dbxtmp = bi*snaptr->dbvec[jcoeff][0] + + bix*snaptr->bvec[jcoeff]; + double dbytmp = bi*snaptr->dbvec[jcoeff][1] + + biy*snaptr->bvec[jcoeff]; + double dbztmp = bi*snaptr->dbvec[jcoeff][2] + + biz*snaptr->bvec[jcoeff]; snadi[ncount] += dbxtmp; snadi[ncount+yoffset] += dbytmp; @@ -408,6 +384,5 @@ double ComputeSNADAtom::memory_usage() bytes += 3*njmax*sizeof(double); bytes += njmax*sizeof(int); bytes += 3*nperdim*atom->ntypes; - bytes += snaptr[0]->memory_usage()*comm->nthreads; return bytes; } diff --git a/src/SNAP/compute_snad_atom.h b/src/SNAP/compute_snad_atom.h index 92003a9bc5..1fcf540d7c 100644 --- a/src/SNAP/compute_snad_atom.h +++ b/src/SNAP/compute_snad_atom.h @@ -44,10 +44,9 @@ class ComputeSNADAtom : public Compute { double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; + class SNA* snaptr; double cutmax; int quadraticflag; - int nthreads; }; } diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp index b2d555f713..9f9ef7a67d 100644 --- a/src/SNAP/compute_snav_atom.cpp +++ b/src/SNAP/compute_snav_atom.cpp @@ -25,7 +25,6 @@ #include "comm.h" #include "memory.h" #include "error.h" -#include "openmp_snap.h" using namespace LAMMPS_NS; @@ -109,20 +108,10 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute snav/atom command"); } - nthreads = comm->nthreads; - snaptr = new SNA*[nthreads]; -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag) -#endif - { - int tid = omp_get_thread_num(); + snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle, + rmin0,switchflag,bzeroflag); - // always unset use_shared_arrays since it does not work with computes - snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle, - 0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag); - } - - ncoeff = snaptr[0]->ncoeff; + ncoeff = snaptr->ncoeff; nperdim = ncoeff; if (quadraticflag) nperdim += (ncoeff*(ncoeff+1))/2; size_peratom_cols = 6*nperdim*atom->ntypes; @@ -144,9 +133,7 @@ ComputeSNAVAtom::~ComputeSNAVAtom() memory->destroy(wjelem); memory->destroy(cutsq); - for (int tid = 0; tidcompute[i]->style,"snav/atom") == 0) count++; if (count > 1 && comm->me == 0) error->warning(FLERR,"More than one compute snav/atom"); -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - snaptr[tid]->init(); - } + snaptr->init(); } /* ---------------------------------------------------------------------- */ @@ -230,11 +211,7 @@ void ComputeSNAVAtom::compute_peratom() double** const x = atom->x; const int* const mask = atom->mask; -#if defined(_OPENMP) -#pragma omp parallel for default(none) -#endif for (int ii = 0; ii < inum; ii++) { - const int tid = omp_get_thread_num(); const int i = ilist[ii]; if (mask[i] & groupbit) { @@ -251,7 +228,7 @@ void ComputeSNAVAtom::compute_peratom() // insure rij, inside, and typej are of size jnum - snaptr[tid]->grow_rij(jnum); + snaptr->grow_rij(jnum); // rij[][3] = displacements between atom I and those neighbors // inside = indices of neighbors of I within cutoff @@ -269,31 +246,31 @@ void ComputeSNAVAtom::compute_peratom() const double rsq = delx*delx + dely*dely + delz*delz; int jtype = type[j]; if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - snaptr[tid]->rij[ninside][0] = delx; - snaptr[tid]->rij[ninside][1] = dely; - snaptr[tid]->rij[ninside][2] = delz; - snaptr[tid]->inside[ninside] = j; - snaptr[tid]->wj[ninside] = wjelem[jtype]; - snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; + snaptr->rij[ninside][0] = delx; + snaptr->rij[ninside][1] = dely; + snaptr->rij[ninside][2] = delz; + snaptr->inside[ninside] = j; + snaptr->wj[ninside] = wjelem[jtype]; + snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac; ninside++; } } - snaptr[tid]->compute_ui(ninside); - snaptr[tid]->compute_zi(); + snaptr->compute_ui(ninside); + snaptr->compute_zi(); if (quadraticflag) { - snaptr[tid]->compute_bi(); - snaptr[tid]->copy_bi2bvec(); + snaptr->compute_bi(); + snaptr->copy_bi2bvec(); } for (int jj = 0; jj < ninside; jj++) { - const int j = snaptr[tid]->inside[jj]; + const int j = snaptr->inside[jj]; - snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj], - snaptr[tid]->wj[jj], - snaptr[tid]->rcutij[jj]); - snaptr[tid]->compute_dbidrj(); - snaptr[tid]->copy_dbi2dbvec(); + snaptr->compute_duidrj(snaptr->rij[jj], + snaptr->wj[jj], + snaptr->rcutij[jj]); + snaptr->compute_dbidrj(); + snaptr->copy_dbi2dbvec(); // Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj @@ -301,18 +278,18 @@ void ComputeSNAVAtom::compute_peratom() double *snavj = snav[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snavi[icoeff] += snaptr[tid]->dbvec[icoeff][0]*xtmp; - snavi[icoeff+nperdim] += snaptr[tid]->dbvec[icoeff][1]*ytmp; - snavi[icoeff+2*nperdim] += snaptr[tid]->dbvec[icoeff][2]*ztmp; - snavi[icoeff+3*nperdim] += snaptr[tid]->dbvec[icoeff][1]*ztmp; - snavi[icoeff+4*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ztmp; - snavi[icoeff+5*nperdim] += snaptr[tid]->dbvec[icoeff][0]*ytmp; - snavj[icoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][0]; - snavj[icoeff+nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][1]; - snavj[icoeff+2*nperdim] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2]; - snavj[icoeff+3*nperdim] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2]; - snavj[icoeff+4*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2]; - snavj[icoeff+5*nperdim] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1]; + snavi[icoeff] += snaptr->dbvec[icoeff][0]*xtmp; + snavi[icoeff+nperdim] += snaptr->dbvec[icoeff][1]*ytmp; + snavi[icoeff+2*nperdim] += snaptr->dbvec[icoeff][2]*ztmp; + snavi[icoeff+3*nperdim] += snaptr->dbvec[icoeff][1]*ztmp; + snavi[icoeff+4*nperdim] += snaptr->dbvec[icoeff][0]*ztmp; + snavi[icoeff+5*nperdim] += snaptr->dbvec[icoeff][0]*ytmp; + snavj[icoeff] -= snaptr->dbvec[icoeff][0]*x[j][0]; + snavj[icoeff+nperdim] -= snaptr->dbvec[icoeff][1]*x[j][1]; + snavj[icoeff+2*nperdim] -= snaptr->dbvec[icoeff][2]*x[j][2]; + snavj[icoeff+3*nperdim] -= snaptr->dbvec[icoeff][1]*x[j][2]; + snavj[icoeff+4*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][2]; + snavj[icoeff+5*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][1]; } if (quadraticflag) { @@ -321,10 +298,10 @@ void ComputeSNAVAtom::compute_peratom() snavj += quadraticoffset; int ncount = 0; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr[tid]->bvec[icoeff]; - double bix = snaptr[tid]->dbvec[icoeff][0]; - double biy = snaptr[tid]->dbvec[icoeff][1]; - double biz = snaptr[tid]->dbvec[icoeff][2]; + double bi = snaptr->bvec[icoeff]; + double bix = snaptr->dbvec[icoeff][0]; + double biy = snaptr->dbvec[icoeff][1]; + double biz = snaptr->dbvec[icoeff][2]; // diagonal element of quadratic matrix @@ -348,12 +325,12 @@ void ComputeSNAVAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double dbxtmp = bi*snaptr[tid]->dbvec[jcoeff][0] - + bix*snaptr[tid]->bvec[jcoeff]; - double dbytmp = bi*snaptr[tid]->dbvec[jcoeff][1] - + biy*snaptr[tid]->bvec[jcoeff]; - double dbztmp = bi*snaptr[tid]->dbvec[jcoeff][2] - + biz*snaptr[tid]->bvec[jcoeff]; + double dbxtmp = bi*snaptr->dbvec[jcoeff][0] + + bix*snaptr->bvec[jcoeff]; + double dbytmp = bi*snaptr->dbvec[jcoeff][1] + + biy*snaptr->bvec[jcoeff]; + double dbztmp = bi*snaptr->dbvec[jcoeff][2] + + biz*snaptr->bvec[jcoeff]; snavi[ncount] += dbxtmp*xtmp; snavi[ncount+nperdim] += dbytmp*ytmp; snavi[ncount+2*nperdim] += dbztmp*ztmp; @@ -419,6 +396,5 @@ double ComputeSNAVAtom::memory_usage() bytes += njmax*sizeof(int); bytes += 6*nperdim*atom->ntypes; if (quadraticflag) bytes += 6*nperdim*atom->ntypes; - bytes += snaptr[0]->memory_usage()*comm->nthreads; return bytes; } diff --git a/src/SNAP/compute_snav_atom.h b/src/SNAP/compute_snav_atom.h index 9be5e1d389..6bcce346e0 100644 --- a/src/SNAP/compute_snav_atom.h +++ b/src/SNAP/compute_snav_atom.h @@ -44,9 +44,8 @@ class ComputeSNAVAtom : public Compute { double rcutfac; double *radelem; double *wjelem; - class SNA** snaptr; + class SNA* snaptr; int quadraticflag; - int nthreads; }; } diff --git a/src/SNAP/openmp_snap.h b/src/SNAP/openmp_snap.h deleted file mode 100644 index 60a3138c9c..0000000000 --- a/src/SNAP/openmp_snap.h +++ /dev/null @@ -1,16 +0,0 @@ - -#ifndef LMP_OPENMP_SNAP_H -#define LMP_OPENMP_SNAP_H - -#if defined(_OPENMP) -#include -#else -enum omp_sched_t {omp_sched_static, omp_sched_dynamic, omp_sched_guided, omp_sched_auto}; -inline int omp_get_thread_num() { return 0;} -inline int omp_set_num_threads(int num_threads) {return 1;} -/* inline int __sync_fetch_and_add(int* ptr, int value) {int tmp = *ptr; ptr[0]+=value; return tmp;} */ -inline void omp_set_schedule(omp_sched_t schedule,int modifier=1) {} -inline int omp_in_parallel() {return 0;} -#endif - -#endif diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 86e709ba03..ae542e81b4 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -23,7 +23,6 @@ #include "neigh_list.h" #include "neigh_request.h" #include "sna.h" -#include "openmp_snap.h" #include "domain.h" #include "memory.h" #include "error.h" @@ -55,51 +54,6 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) coeffelem = NULL; nmax = 0; - nthreads = 1; - - schedule_user = 0; - schedule_time_guided = -1; - schedule_time_dynamic = -1; - ncalls_neigh =-1; - - ilistmask_max = 0; - ilistmask = NULL; - ghostinum = 0; - ghostilist_max = 0; - ghostilist = NULL; - ghostnumneigh_max = 0; - ghostnumneigh = NULL; - ghostneighs = NULL; - ghostfirstneigh = NULL; - ghostneighs_total = 0; - ghostneighs_max = 0; - - i_max = 0; - i_neighmax = 0; - i_numpairs = 0; - i_rij = NULL; - i_inside = NULL; - i_wj = NULL; - i_rcutij = NULL; - i_ninside = NULL; - i_pairs = NULL; - i_uarraytot_r = NULL; - i_uarraytot_i = NULL; - i_zarray_r = NULL; - i_zarray_i = NULL; - - use_shared_arrays = 0; - -#ifdef TIMING_INFO - timers[0] = 0; - timers[1] = 0; - timers[2] = 0; - timers[3] = 0; -#endif - - // Need to set this because restart not handled by PairHybrid - - sna = NULL; beta_max = 0; beta = NULL; @@ -123,35 +77,7 @@ PairSNAP::~PairSNAP() memory->destroy(beta); memory->destroy(bispectrum); - // Need to set this because restart not handled by PairHybrid - - if (sna) { - -#ifdef TIMING_INFO - double time[5]; - double timeave[5]; - double timeave_mpi[5]; - double timemax_mpi[5]; - - for (int i = 0; i < 5; i++) { - time[i] = 0; - timeave[i] = 0; - for (int tid = 0; tidtimers[i]>time[i]) - time[i] = sna[tid]->timers[i]; - timeave[i] += sna[tid]->timers[i]; - } - timeave[i] /= nthreads; - } - MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world); - MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world); -#endif - - for (int tid = 0; tiddestroy(setflag); @@ -161,22 +87,11 @@ PairSNAP::~PairSNAP() } -void PairSNAP::compute(int eflag, int vflag) -{ -// if (use_optimized) -// compute_optimized(eflag, vflag); -// else - -// hard-code compute_regular() - - compute_regular(eflag, vflag); -} - /* ---------------------------------------------------------------------- This version is a straightforward implementation ---------------------------------------------------------------------- */ -void PairSNAP::compute_regular(int eflag, int vflag) +void PairSNAP::compute(int eflag, int vflag) { int i,j,jnum,ninside; double delx,dely,delz,evdwl,rsq; @@ -191,7 +106,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) int *type = atom->type; int nlocal = atom->nlocal; int newton_pair = force->newton_pair; - class SNA* snaptr = sna[0]; if (beta_max < list->inum) { memory->grow(beta,list->inum,ncoeff,"PairSNAP:beta"); @@ -315,573 +229,6 @@ void PairSNAP::compute_regular(int eflag, int vflag) if (vflag_fdotr) virial_fdotr_compute(); } -inline int PairSNAP::equal(double* x,double* y) -{ - double dist2 = - (x[0]-y[0])*(x[0]-y[0]) + - (x[1]-y[1])*(x[1]-y[1]) + - (x[2]-y[2])*(x[2]-y[2]); - if (dist2 < 1e-20) return 1; - return 0; -} - -inline double PairSNAP::dist2(double* x,double* y) -{ - return - (x[0]-y[0])*(x[0]-y[0]) + - (x[1]-y[1])*(x[1]-y[1]) + - (x[2]-y[2])*(x[2]-y[2]); -} - -// return extra communication cutoff -// extra_cutoff = max(subdomain_length) - -double PairSNAP::extra_cutoff() -{ - double sublo[3],subhi[3]; - - if (domain->triclinic == 0) { - for (int dim = 0 ; dim < 3 ; dim++) { - sublo[dim] = domain->sublo[dim]; - subhi[dim] = domain->subhi[dim]; - } - } else { - domain->lamda2x(domain->sublo_lamda,sublo); - domain->lamda2x(domain->subhi_lamda,subhi); - } - - double sub_size[3]; - for (int dim = 0; dim < 3; dim++) - sub_size[dim] = subhi[dim] - sublo[dim]; - - double max_sub_size = 0; - for (int dim = 0; dim < 3; dim++) - max_sub_size = MAX(max_sub_size,sub_size[dim]); - - // note: for triclinic, probably need something different - // see Comm::setup() - - return max_sub_size; -} - -// micro load_balancer: each MPI process will -// check with each of its 26 neighbors, -// whether an imbalance exists in the number -// of atoms to calculate forces for. -// If it does it will set ilistmask of one of -// its local atoms to zero, and send its Tag -// to the neighbor process. The neighboring process -// will check its ghost list for the -// ghost atom with the same Tag which is closest -// to its domain center, and build a -// neighborlist for this ghost atom. For this to work, -// the communication cutoff has to be -// as large as the neighbor cutoff + -// maximum subdomain length. - -// Note that at most one atom is exchanged per processor pair. - -// Also note that the local atom assignment -// doesn't change. This load balancer will cause -// some ghost atoms to have full neighborlists -// which are unique to PairSNAP. -// They are not part of the generally accessible neighborlist. -// At the same time corresponding local atoms on -// other MPI processes will not be -// included in the force computation since -// their ilistmask is 0. This does not effect -// any other classes which might -// access the same general neighborlist. -// Reverse communication (newton on) of forces is required. - -// Currently the load balancer does two passes, -// since its exchanging atoms upstream and downstream. - -void PairSNAP::load_balance() -{ - double sublo[3],subhi[3]; - if (domain->triclinic == 0) { - double* sublotmp = domain->sublo; - double* subhitmp = domain->subhi; - for (int dim = 0 ; dim<3 ; dim++) { - sublo[dim]=sublotmp[dim]; - subhi[dim]=subhitmp[dim]; - } - } else { - double* sublotmp = domain->sublo_lamda; - double* subhitmp = domain->subhi_lamda; - domain->lamda2x(sublotmp,sublo); - domain->lamda2x(subhitmp,subhi); - } - - //if (list->inum==0) list->grow(atom->nmax); - - int nlocal = ghostinum; - for (int i=0; i < list->inum; i++) - if (ilistmask[i]) nlocal++; - int ***grid2proc = comm->grid2proc; - int* procgrid = comm->procgrid; - - int nlocal_up,nlocal_down; - MPI_Request request; - - double sub_mid[3]; - for (int dim=0; dim<3; dim++) - sub_mid[dim] = (subhi[dim] + sublo[dim])/2; - - if (comm->cutghostuser < - neighbor->cutneighmax+extra_cutoff()) - error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing"); - - int nrecv = ghostinum; - int totalsend = 0; - int nsend = 0; - int depth = 1; - - for (int dx = -depth; dx < depth+1; dx++) - for (int dy = -depth; dy < depth+1; dy++) - for (int dz = -depth; dz < depth+1; dz++) { - - if (dx == dy && dy == dz && dz == 0) continue; - - int sendloc[3] = {comm->myloc[0], - comm->myloc[1], comm->myloc[2] - }; - sendloc[0] += dx; - sendloc[1] += dy; - sendloc[2] += dz; - for (int dim = 0; dim < 3; dim++) - if (sendloc[dim] >= procgrid[dim]) - sendloc[dim] = sendloc[dim] - procgrid[dim]; - for (int dim = 0; dim < 3; dim++) - if (sendloc[dim] < 0) - sendloc[dim] = procgrid[dim] + sendloc[dim]; - int recvloc[3] = {comm->myloc[0], - comm->myloc[1], comm->myloc[2] - }; - recvloc[0] -= dx; - recvloc[1] -= dy; - recvloc[2] -= dz; - for (int dim = 0; dim < 3; dim++) - if (recvloc[dim] < 0) - recvloc[dim] = procgrid[dim] + recvloc[dim]; - for (int dim = 0; dim < 3; dim++) - if (recvloc[dim] >= procgrid[dim]) - recvloc[dim] = recvloc[dim] - procgrid[dim]; - - int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]]; - int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]]; - - // two stage process, first upstream movement, then downstream - - MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, - &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, - &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - nsend = 0; - - // send upstream - - if (nlocal > nlocal_up+1) { - - int i = totalsend++; - while(i < list->inum && ilistmask[i] == 0) - i = totalsend++; - - if (i < list->inum) - MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request); - else { - int j = -1; - MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request); - } - - if (i < list->inum) { - for (int j = 0; j < list->inum; j++) - if (list->ilist[j] == i) - ilistmask[j] = 0; - nsend = 1; - } - } - - // recv downstream - - if (nlocal < nlocal_down-1) { - nlocal++; - int get_tag = -1; - MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - - // if get_tag -1 the other process didnt have local atoms to send - - if (get_tag >= 0) { - if (ghostinum >= ghostilist_max) { - memory->grow(ghostilist,ghostinum+10, - "PairSnap::ghostilist"); - ghostilist_max = ghostinum+10; - } - if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { - ghostnumneigh_max = atom->nlocal+atom->nghost+100; - memory->grow(ghostnumneigh,ghostnumneigh_max, - "PairSnap::ghostnumneigh"); - memory->grow(ghostfirstneigh,ghostnumneigh_max, - "PairSnap::ghostfirstneigh"); - } - - // find closest ghost image of the transfered particle - - double mindist = 1e200; - int closestghost = -1; - for (int j = 0; j < atom->nlocal + atom->nghost; j++) - if (atom->tag[j] == get_tag) - if (dist2(sub_mid, atom->x[j]) < mindist) { - closestghost = j; - mindist = dist2(sub_mid, atom->x[j]); - } - - // build neighborlist for this particular - // ghost atom, and add it to list->ilist - - if (ghostneighs_max - ghostneighs_total < - neighbor->oneatom) { - memory->grow(ghostneighs, - ghostneighs_total + neighbor->oneatom, - "PairSnap::ghostneighs"); - ghostneighs_max = ghostneighs_total + neighbor->oneatom; - } - - int j = closestghost; - - ghostilist[ghostinum] = j; - ghostnumneigh[j] = 0; - ghostfirstneigh[j] = ghostneighs_total; - - ghostinum++; - int* jlist = ghostneighs + ghostfirstneigh[j]; - - // find all neighbors by looping - // over all local and ghost atoms - - for (int k = 0; k < atom->nlocal + atom->nghost; k++) - if (dist2(atom->x[j],atom->x[k]) < - neighbor->cutneighmax*neighbor->cutneighmax) { - jlist[ghostnumneigh[j]] = k; - ghostnumneigh[j]++; - ghostneighs_total++; - } - } - - if (get_tag >= 0) nrecv++; - } - - // decrease nlocal later, so that it is the - // initial number both for receiving and sending - - if (nsend) nlocal--; - - // second pass through the grid - - MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, - &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, - &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - - // send downstream - - nsend=0; - if (nlocal > nlocal_down+1) { - int i = totalsend++; - while(i < list->inum && ilistmask[i]==0) i = totalsend++; - - if (i < list->inum) - MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request); - else { - int j =- 1; - MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request); - } - - if (i < list->inum) { - for (int j=0; jinum; j++) - if (list->ilist[j] == i) ilistmask[j] = 0; - nsend = 1; - } - } - - // receive upstream - - if (nlocal < nlocal_up-1) { - nlocal++; - int get_tag = -1; - - MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - - if (get_tag >= 0) { - if (ghostinum >= ghostilist_max) { - memory->grow(ghostilist,ghostinum+10, - "PairSnap::ghostilist"); - ghostilist_max = ghostinum+10; - } - if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { - ghostnumneigh_max = atom->nlocal + atom->nghost + 100; - memory->grow(ghostnumneigh,ghostnumneigh_max, - "PairSnap::ghostnumneigh"); - memory->grow(ghostfirstneigh,ghostnumneigh_max, - "PairSnap::ghostfirstneigh"); - } - - // find closest ghost image of the transfered particle - - double mindist = 1e200; - int closestghost = -1; - for (int j = 0; j < atom->nlocal + atom->nghost; j++) - if (atom->tag[j] == get_tag) - if (dist2(sub_mid,atom->x[j])x[j]); - } - - // build neighborlist for this particular ghost atom - - if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) { - memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom, - "PairSnap::ghostneighs"); - ghostneighs_max = ghostneighs_total + neighbor->oneatom; - } - - int j = closestghost; - - ghostilist[ghostinum] = j; - ghostnumneigh[j] = 0; - ghostfirstneigh[j] = ghostneighs_total; - - ghostinum++; - int* jlist = ghostneighs + ghostfirstneigh[j]; - - for (int k = 0; k < atom->nlocal + atom->nghost; k++) - if (dist2(atom->x[j],atom->x[k]) < - neighbor->cutneighmax*neighbor->cutneighmax) { - jlist[ghostnumneigh[j]] = k; - ghostnumneigh[j]++; - ghostneighs_total++; - } - } - - if (get_tag >= 0) nrecv++; - } - if (nsend) nlocal--; - } -} - -void PairSNAP::set_sna_to_shared(int snaid,int i) -{ - sna[snaid]->rij = i_rij[i]; - sna[snaid]->inside = i_inside[i]; - sna[snaid]->wj = i_wj[i]; - sna[snaid]->rcutij = i_rcutij[i]; - sna[snaid]->zarray_r = i_zarray_r[i]; - sna[snaid]->zarray_i = i_zarray_i[i]; - sna[snaid]->uarraytot_r = i_uarraytot_r[i]; - sna[snaid]->uarraytot_i = i_uarraytot_i[i]; -} - -void PairSNAP::build_per_atom_arrays() -{ - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - - int count = 0; - int neighmax = 0; - for (int ii = 0; ii < list->inum; ii++) - if ((do_load_balance <= 0) || ilistmask[ii]) { - neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]); - ++count; - } - for (int ii = 0; ii < ghostinum; ii++) { - neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]); - ++count; - } - - if (i_max < count || i_neighmax < neighmax) { - int i_maxt = MAX(count,i_max); - i_neighmax = MAX(neighmax,i_neighmax); - memory->destroy(i_rij); - memory->destroy(i_inside); - memory->destroy(i_wj); - memory->destroy(i_rcutij); - memory->destroy(i_ninside); - memory->destroy(i_pairs); - memory->create(i_rij,i_maxt,i_neighmax,3,"PairSNAP::i_rij"); - memory->create(i_inside,i_maxt,i_neighmax,"PairSNAP::i_inside"); - memory->create(i_wj,i_maxt,i_neighmax,"PairSNAP::i_wj"); - memory->create(i_rcutij,i_maxt,i_neighmax,"PairSNAP::i_rcutij"); - memory->create(i_ninside,i_maxt,"PairSNAP::i_ninside"); - memory->create(i_pairs,i_maxt*i_neighmax,4,"PairSNAP::i_pairs"); - } - - if (i_max < count) { - int jdim = sna[0]->twojmax+1; - memory->destroy(i_uarraytot_r); - memory->destroy(i_uarraytot_i); - memory->create(i_uarraytot_r,count,jdim,jdim,jdim, - "PairSNAP::i_uarraytot_r"); - memory->create(i_uarraytot_i,count,jdim,jdim,jdim, - "PairSNAP::i_uarraytot_i"); - if (i_zarray_r != NULL) - for (int i = 0; i < i_max; i++) { - memory->destroy(i_zarray_r[i]); - memory->destroy(i_zarray_i[i]); - } - - delete [] i_zarray_r; - delete [] i_zarray_i; - i_zarray_r = new double*****[count]; - i_zarray_i = new double*****[count]; - for (int i = 0; i < count; i++) { - memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim, - "PairSNAP::i_zarray_r"); - memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim, - "PairSNAP::i_zarray_i"); - } - } - - if (i_max < count) - i_max = count; - - count = 0; - i_numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - int* jlist = list->firstneigh[i]; - const double xtmp = atom->x[i][0]; - const double ytmp = atom->x[i][1]; - const double ztmp = atom->x[i][2]; - const int itype = atom->type[i]; - const int ielem = map[itype]; - const double radi = radelem[ielem]; - int ninside = 0; - for (int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - const double delx = atom->x[j][0] - xtmp; - const double dely = atom->x[j][1] - ytmp; - const double delz = atom->x[j][2] - ztmp; - const double rsq = delx*delx + dely*dely + delz*delz; - int jtype = atom->type[j]; - int jelem = map[jtype]; - - i_pairs[i_numpairs][0] = i; - i_pairs[i_numpairs][1] = jj; - i_pairs[i_numpairs][2] = -1; - i_pairs[i_numpairs][3] = count; - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - i_rij[count][ninside][0] = delx; - i_rij[count][ninside][1] = dely; - i_rij[count][ninside][2] = delz; - i_inside[count][ninside] = j; - i_wj[count][ninside] = wjelem[jelem]; - i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; - - // update index list with inside index - i_pairs[i_numpairs][2] = ninside++; - } - i_numpairs++; - } - i_ninside[count] = ninside; - count++; - } - } - - for (int ii = 0; ii < ghostinum; ii++) { - int i = ghostilist[ii]; - int jnum = ghostnumneigh[i]; - int* jlist = ghostneighs+ghostfirstneigh[i]; - const double xtmp = atom->x[i][0]; - const double ytmp = atom->x[i][1]; - const double ztmp = atom->x[i][2]; - const int itype = atom->type[i]; - const int ielem = map[itype]; - const double radi = radelem[ielem]; - int ninside = 0; - - for (int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - const double delx = atom->x[j][0] - xtmp; - const double dely = atom->x[j][1] - ytmp; - const double delz = atom->x[j][2] - ztmp; - const double rsq = delx*delx + dely*dely + delz*delz; - int jtype = atom->type[j]; - int jelem = map[jtype]; - - i_pairs[i_numpairs][0] = i; - i_pairs[i_numpairs][1] = jj; - i_pairs[i_numpairs][2] = -1; - i_pairs[i_numpairs][3] = count; - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - i_rij[count][ninside][0] = delx; - i_rij[count][ninside][1] = dely; - i_rij[count][ninside][2] = delz; - i_inside[count][ninside] = j; - i_wj[count][ninside] = wjelem[jelem]; - i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; - // update index list with inside index - i_pairs[i_numpairs][2] = ninside++; - } - i_numpairs++; - } - i_ninside[count] = ninside; - count++; - } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - -#if defined(_OPENMP) -#pragma omp parallel for shared(count) default(none) -#endif - for (int ii=0; ii < count; ii++) { - int tid = omp_get_thread_num(); - set_sna_to_shared(tid,ii); - //sna[tid]->compute_ui(i_ninside[ii]); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1)); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif - } - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - for (int ii=0; ii < count; ii++) { - int tid = 0;//omp_get_thread_num(); - set_sna_to_shared(tid,ii); - sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1)); - } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif -} - /* ---------------------------------------------------------------------- compute beta ------------------------------------------------------------------------- */ @@ -914,7 +261,6 @@ void PairSNAP::compute_bispectrum() double **x = atom->x; int *type = atom->type; - class SNA* snaptr = sna[0]; for (int ii = 0; ii < list->inum; ii++) { i = list->ilist[ii]; @@ -991,96 +337,8 @@ void PairSNAP::allocate() void PairSNAP::settings(int narg, char **arg) { - - // set default values for optional arguments - - nthreads = -1; - use_shared_arrays=-1; - do_load_balance = 0; - use_optimized = 1; - - // optional arguments - - for (int i=0; i < narg; i++) { - if (i+2>narg) error->all(FLERR,"Illegal pair_style command"); - if (strcmp(arg[i],"nthreads")==0) { - nthreads=force->inumeric(FLERR,arg[++i]); -#if defined(LMP_USER_OMP) - error->all(FLERR,"Must set number of threads via package omp command"); -#else - omp_set_num_threads(nthreads); - comm->nthreads=nthreads; -#endif - continue; - } - if (strcmp(arg[i],"optimized")==0) { - use_optimized=force->inumeric(FLERR,arg[++i]); - continue; - } - if (strcmp(arg[i],"shared")==0) { - use_shared_arrays=force->inumeric(FLERR,arg[++i]); - continue; - } - if (strcmp(arg[i],"loadbalance")==0) { - do_load_balance = force->inumeric(FLERR,arg[++i]); - if (do_load_balance) { - double mincutoff = extra_cutoff() + - rcutmax + neighbor->skin; - if (comm->cutghostuser < mincutoff) { - char buffer[255]; - - //apparently mincutoff is 0 after sprintf command ????? - - double tmp = mincutoff + 0.1; - sprintf(buffer, "Communication cutoff is too small " - "for SNAP micro load balancing, increased to %lf", - mincutoff+0.1); - if (comm->me==0) - error->warning(FLERR,buffer); - - comm->cutghostuser = tmp; - - } - } - continue; - } - if (strcmp(arg[i],"schedule")==0) { - i++; - if (strcmp(arg[i],"static")==0) - schedule_user = 1; - if (strcmp(arg[i],"dynamic")==0) - schedule_user = 2; - if (strcmp(arg[i],"guided")==0) - schedule_user = 3; - if (strcmp(arg[i],"auto")==0) - schedule_user = 4; - if (strcmp(arg[i],"determine")==0) - schedule_user = 5; - if (schedule_user == 0) - error->all(FLERR,"Illegal pair_style command"); - continue; - } + for (int i=0; i < narg; i++) error->all(FLERR,"Illegal pair_style command"); - } - - if (nthreads < 0) - nthreads = comm->nthreads; - - if (use_shared_arrays < 0) { - if (nthreads > 1 && atom->nlocal <= 2*nthreads) - use_shared_arrays = 1; - else use_shared_arrays = 0; - } - - // check if running non-optimized code with - // optimization flags set - - if (!use_optimized) - if (nthreads > 1 || - use_shared_arrays || - do_load_balance || - schedule_user) - error->all(FLERR,"Illegal pair_style command"); } /* ---------------------------------------------------------------------- @@ -1170,26 +428,14 @@ void PairSNAP::coeff(int narg, char **arg) if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); - sna = new SNA*[nthreads]; + snaptr = new SNA(lmp,rfac0,twojmax, + diagonalstyle, + rmin0,switchflag,bzeroflag); + snaptr->grow_rij(nmax); - // allocate memory for per OpenMP thread data which - // is wrapped into the sna class - -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - sna[tid] = new SNA(lmp,rfac0,twojmax, - diagonalstyle,use_shared_arrays, - rmin0,switchflag,bzeroflag); - if (!use_shared_arrays) - sna[tid]->grow_rij(nmax); - } - - if (ncoeff != sna[0]->ncoeff) { + if (ncoeff != snaptr->ncoeff) { if (comm->me == 0) - printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff); + printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff); error->all(FLERR,"Incorrect SNAP parameter file"); } @@ -1216,13 +462,7 @@ void PairSNAP::init_style() neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->full = 1; -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - sna[tid]->init(); - } + snaptr->init(); } @@ -1370,6 +610,8 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) } } + if (comm->me == 0) fclose(fpcoeff); + // set flags for required keywords rcutfacflag = 0; @@ -1471,7 +713,6 @@ double PairSNAP::memory_usage() bytes += nmax*sizeof(int); bytes += (2*ncoeffall)*sizeof(double); bytes += (ncoeff*3)*sizeof(double); - bytes += sna[0]->memory_usage()*nthreads; return bytes; } diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h index 1453076b23..b5871c1527 100644 --- a/src/SNAP/pair_snap.h +++ b/src/SNAP/pair_snap.h @@ -29,8 +29,6 @@ public: PairSNAP(class LAMMPS *); ~PairSNAP(); virtual void compute(int, int); - void compute_regular(int, int); - void compute_optimized(int, int); void settings(int, char **); virtual void coeff(int, char **); virtual void init_style(); @@ -43,59 +41,16 @@ public: protected: int ncoeffq, ncoeffall; double **bvec, ***dbvec; - class SNA** sna; + class SNA* snaptr; int nmax; - int nthreads; virtual void allocate(); void read_files(char *, char *); inline int equal(double* x,double* y); inline double dist2(double* x,double* y); - double extra_cutoff(); - void load_balance(); - void set_sna_to_shared(int snaid,int i); - void build_per_atom_arrays(); void compute_beta(); void compute_bispectrum(); - int schedule_user; - double schedule_time_guided; - double schedule_time_dynamic; - - int ncalls_neigh; - int do_load_balance; - int ilistmask_max; - int* ilistmask; - int ghostinum; - int ghostilist_max; - int* ghostilist; - int ghostnumneigh_max; - int* ghostnumneigh; - int* ghostneighs; - int* ghostfirstneigh; - int ghostneighs_total; - int ghostneighs_max; - - int use_optimized; - int use_shared_arrays; - - int i_max; - int i_neighmax; - int i_numpairs; - int **i_pairs; - double ***i_rij; - int **i_inside; - double **i_wj; - double **i_rcutij; - int *i_ninside; - double ****i_uarraytot_r, ****i_uarraytot_i; - double ******i_zarray_r, ******i_zarray_i; - -#ifdef TIMING_INFO - // timespec starttime, endtime; - double timers[4]; -#endif - double rcutmax; // max cutoff for all elements int nelements; // # of unique elements char **elements; // names of unique elements @@ -130,15 +85,6 @@ Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. -E: Must set number of threads via package omp command - -Because you are using the USER-OMP package, set the number of threads -via its settings, not by the pair_style snap nthreads setting. - -W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf - -Self-explanatory. - E: Incorrect args for pair coefficients Self-explanatory. Check the input script or data file. diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index b729e4d0d6..b388136caf 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -21,7 +21,6 @@ #include "math_extra.h" #include #include -#include "openmp_snap.h" #include "memory.h" #include "error.h" @@ -114,12 +113,11 @@ using namespace MathConst; ------------------------------------------------------------------------- */ SNA::SNA(LAMMPS* lmp, double rfac0_in, - int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in, + int twojmax_in, int diagonalstyle_in, double rmin0_in, int switch_flag_in, int bzero_flag_in) : Pointers(lmp) { wself = 1.0; - use_shared_arrays = use_shared_arrays_in; rfac0 = rfac0_in; rmin0 = rmin0_in; switch_flag = switch_flag_in; @@ -141,7 +139,8 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, wj = NULL; rcutij = NULL; nmax = 0; - idxj = NULL; + idxz = NULL; + idxb= NULL; if (bzero_flag) { double www = wself*wself*wself; @@ -149,133 +148,178 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, bzero[j] = www*(j+1); } -#ifdef TIMING_INFO - timers = new double[20]; - for(int i = 0; i < 20; i++) timers[i] = 0; - print = 0; - counter = 0; -#endif - build_indexlist(); - - } /* ---------------------------------------------------------------------- */ SNA::~SNA() { - if(!use_shared_arrays) { - destroy_twojmax_arrays(); - memory->destroy(rij); - memory->destroy(inside); - memory->destroy(wj); - memory->destroy(rcutij); - memory->destroy(bvec); - memory->destroy(dbvec); - } - delete[] idxj; + destroy_twojmax_arrays(); + memory->destroy(rij); + memory->destroy(inside); + memory->destroy(wj); + memory->destroy(rcutij); + memory->destroy(bvec); + memory->destroy(dbvec); + delete[] idxz; + delete[] idxb; } void SNA::build_indexlist() { - if(diagonalstyle == 0) { - int idxj_count = 0; + if(diagonalstyle != 3) + error->all(FLERR, "diagonal_style must be 3\n"); - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - idxj_count++; + // index list for cglist - // indexList can be changed here + int jdim = twojmax + 1; + memory->create(idxcg_block, jdim, jdim, jdim, + "sna:idxcg_block"); - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; + int idxcg_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + idxcg_block[j1][j2][j] = idxcg_count; + for (int m1 = 0; m1 <= j1; m1++) + for (int m2 = 0; m2 <= j2; m2++) + idxcg_count++; + } + idxcg_max = idxcg_count; - idxj_count = 0; + // index list for uarray + // need to include both halves - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j2; - idxj[idxj_count].j = j; - idxj_count++; + memory->create(idxu_block, jdim, + "sna:idxu_block"); + + int idxu_count = 0; + + for(int j = 0; j <= twojmax; j++) { + idxu_block[j] = idxu_count; + for(int mb = 0; mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) + idxu_count++; + } + idxu_max = idxu_count; + + // index list for beta and B + + int idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) idxb_count++; + + idxb_max = idxb_count; + idxb = new SNA_BINDICES[idxb_max]; + + idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) { + idxb[idxb_count].j1 = j1; + idxb[idxb_count].j2 = j2; + idxb[idxb_count].j = j; + idxb_count++; } - } - if(diagonalstyle == 1) { - int idxj_count = 0; + // reverse index list for beta and b - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) { - idxj_count++; + memory->create(idxb_block, jdim, jdim, jdim, + "sna:idxb_block"); + idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + if (j < j1) continue; + idxb_block[j1][j2][j] = idxb_count; + idxb_count++; } - // indexList can be changed here + // index list for zlist - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; + int idxz_count = 0; - idxj_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) + idxz_count++; + + idxz_max = idxz_count; + idxz = new SNA_ZINDICES[idxz_max]; + + memory->create(idxz_block, jdim, jdim, jdim, + "sna:idxz_block"); + + idxz_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + idxz_block[j1][j2][j] = idxz_count; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j1; - idxj[idxj_count].j = j; - idxj_count++; - } - } + // find right beta[jjb] entry + // multiply and divide by j+1 factors + // account for multiplicity of 1, 2, or 3 - if(diagonalstyle == 2) { - int idxj_count = 0; + // CODE HORROR!! Need to figure this out later + double betaj = 1.0; +// #ifdef USE_YDIRECT_ZLIST +// double betaj; +// if (j >= j1) { +// const int jjb = idxb_block[j1][j2][j]; +// if (j1 == j) { +// if (j2 == j) betaj = 3*beta[jjb]; +// else betaj = 2*beta[jjb]; +// } else betaj = beta[jjb]; +// } else if (j >= j2) { +// const int jjb = idxb_block[j][j2][j1]; +// if (j2 == j) betaj = 2*beta[jjb]*(j1+1)/(j+1.0); +// else betaj = beta[jjb]*(j1+1)/(j+1.0); +// } else { +// const int jjb = idxb_block[j2][j][j1]; +// betaj = beta[jjb]*(j1+1)/(j+1.0); +// } +// #else +// double betaj; +// if (j >= j1) { +// const int jjb = idxb_block[j1][j2][j]; +// betaj = beta[jjb]; +// } else if (j >= j2) { +// const int jjb = idxb_block[j][j2][j1]; +// betaj = beta[jjb]*(j1+1)/(j+1.0); +// } else { +// const int jjb = idxb_block[j2][j][j1]; +// betaj = beta[jjb]*(j1+1)/(j+1.0); +// } +// #endif - for(int j1 = 0; j1 <= twojmax; j1++) { - idxj_count++; - } + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + idxz[idxz_count].j1 = j1; + idxz[idxz_count].j2 = j2; + idxz[idxz_count].j = j; + idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2); + idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2; + idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1; + idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2); + idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2; + idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1; - // indexList can be changed here + // apply to z(j1,j2,j,ma,mb) to unique element of y(j) + // find right beta[jjb] and y_list[jju] entries - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; + const int jju = idxu_block[j] + (j+1)*mb + ma; + idxz[idxz_count].jju = jju; + idxz[idxz_count].betaj = betaj; - idxj_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j1; - idxj[idxj_count].j = j1; - idxj_count++; - } - } - - if(diagonalstyle == 3) { - int idxj_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) idxj_count++; - - // indexList can be changed here - - idxj = new SNA_LOOPINDICES[idxj_count]; - idxj_max = idxj_count; - - idxj_count = 0; - - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - idxj[idxj_count].j1 = j1; - idxj[idxj_count].j2 = j2; - idxj[idxj_count].j = j; - idxj_count++; + idxz_count++; } - } - + } } /* ---------------------------------------------------------------------- */ @@ -292,16 +336,14 @@ void SNA::grow_rij(int newnmax) nmax = newnmax; - if(!use_shared_arrays) { - memory->destroy(rij); - memory->destroy(inside); - memory->destroy(wj); - memory->destroy(rcutij); - memory->create(rij, nmax, 3, "pair:rij"); - memory->create(inside, nmax, "pair:inside"); - memory->create(wj, nmax, "pair:wj"); - memory->create(rcutij, nmax, "pair:rcutij"); - } + memory->destroy(rij); + memory->destroy(inside); + memory->destroy(wj); + memory->destroy(rcutij); + memory->create(rij, nmax, 3, "pair:rij"); + memory->create(inside, nmax, "pair:inside"); + memory->create(wj, nmax, "pair:wj"); + memory->create(rcutij, nmax, "pair:rcutij"); } /* ---------------------------------------------------------------------- compute Ui by summing over neighbors j @@ -320,10 +362,6 @@ void SNA::compute_ui(int jnum) zero_uarraytot(); addself_uarraytot(wself); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - for(int j = 0; j < jnum; j++) { x = rij[j][0]; y = rij[j][1]; @@ -339,48 +377,6 @@ void SNA::compute_ui(int jnum) add_uarraytot(r, wj[j], rcutij[j]); } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - -} - -void SNA::compute_ui_omp(int jnum, int sub_threads) -{ - double rsq, r, x, y, z, z0, theta0; - - // utot(j,ma,mb) = 0 for all j,ma,ma - // utot(j,ma,ma) = 1 for all j,ma - // for j in neighbors of i: - // compute r0 = (x,y,z,z0) - // utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb - - zero_uarraytot(); - addself_uarraytot(wself); - - for(int j = 0; j < jnum; j++) { - x = rij[j][0]; - y = rij[j][1]; - z = rij[j][2]; - rsq = x * x + y * y + z * z; - r = sqrt(rsq); - theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0); - // theta0 = (r - rmin0) * rscale0; - z0 = r / tan(theta0); - omp_set_num_threads(sub_threads); - -#if defined(_OPENMP) -#pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none) -#endif - { - compute_uarray_omp(x, y, z, z0, r, sub_threads); - } - add_uarraytot(r, wj[j], rcutij[j]); - } - - } /* ---------------------------------------------------------------------- @@ -389,24 +385,6 @@ void SNA::compute_ui_omp(int jnum, int sub_threads) void SNA::compute_zi() { - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // for ma = 0,...,j - // for mb = 0,...,jmid - // z(j1,j2,j,ma,mb) = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // u(j1,ma1,mb1) * u(j2,ma2,mb2) - // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif // compute_dbidrj() requires full j1/j2/j chunk of z elements // use zarray j1/j2 symmetry @@ -449,84 +427,13 @@ void SNA::compute_zi() } // end loop over j } // end loop over j1, j2 -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif -} - -void SNA::compute_zi_omp(int sub_threads) -{ - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // for ma = 0,...,j - // for mb = 0,...,j - // z(j1,j2,j,ma,mb) = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // u(j1,ma1,mb1) * u(j2,ma2,mb2) - // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) - - if(omp_in_parallel()) - omp_set_num_threads(sub_threads); - - // compute_dbidrj() requires full j1/j2/j chunk of z elements - // use zarray j1/j2 symmetry - -#if defined(_OPENMP) -#pragma omp parallel for schedule(auto) default(none) -#endif - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - - double sumb1_r, sumb1_i; - int ma2, mb2; - - for(int ma = 0; ma <= j; ma++) - for(int mb = 0; mb <= j; mb++) { - zarray_r[j1][j2][j][ma][mb] = 0.0; - zarray_i[j1][j2][j][ma][mb] = 0.0; - - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { - sumb1_r = 0.0; - sumb1_i = 0.0; - - ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; - - for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2); - mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { - - mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; - sumb1_r += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] - - uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]); - sumb1_i += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] + - uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]); - } - - zarray_r[j1][j2][j][ma][mb] += - sumb1_r * cgarray[j1][j2][j][ma1][ma2]; - zarray_i[j1][j2][j][ma][mb] += - sumb1_i * cgarray[j1][j2][j][ma1][ma2]; - } - } - } } /* ---------------------------------------------------------------------- compute Yi by summing over products of beta and Zi ------------------------------------------------------------------------- */ -void SNA::compute_yi(double* beta) +void SNA::compute_yi(const double* beta) { int j; int idxz_count; @@ -540,18 +447,18 @@ void SNA::compute_yi(double* beta) } // end loop over ma, mb } // end loop over j - for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j3 = idxj[JJ].j; + for(int jjb = 0; jjb < idxb_max; jjb++) { + const int j1 = idxb[jjb].j1; + const int j2 = idxb[jjb].j2; + const int j3 = idxb[jjb].j; j = j3; jjjzarray_r = zarray_r[j1][j2][j3]; jjjzarray_i = zarray_i[j1][j2][j3]; for(int mb = 0; 2*mb <= j; mb++) for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]; - yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]; + yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]; + yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]; } // end loop over ma, mb j = j1; @@ -560,8 +467,8 @@ void SNA::compute_yi(double* beta) double j1fac = (j3+1)/(j+1.0); for(int mb = 0; 2*mb <= j; mb++) for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j1fac; - yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j1fac; + yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j1fac; + yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j1fac; } // end loop over ma, mb j = j2; @@ -570,8 +477,8 @@ void SNA::compute_yi(double* beta) double j2fac = (j3+1)/(j+1.0); for(int mb = 0; 2*mb <= j; mb++) for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] += beta[JJ]*jjjzarray_r[ma][mb]*j2fac; - yarray_i[j][ma][mb] += beta[JJ]*jjjzarray_i[ma][mb]*j2fac; + yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j2fac; + yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j2fac; } // end loop over ma, mb } // end loop over jjb @@ -655,10 +562,6 @@ void SNA::compute_bi() // b(j1,j2,j) += // 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb) -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) { for(int j = abs(j1 - j2); @@ -691,12 +594,6 @@ void SNA::compute_bi() } } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - } /* ---------------------------------------------------------------------- @@ -760,164 +657,8 @@ void SNA::compute_duidrj(double* rij, double wj, double rcut) z0 = r * cs / sn; dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - -} - -/* ---------------------------------------------------------------------- - calculate derivative of Bi w.r.t. atom j - variant using indexlist for j1,j2,j - variant not using symmetry relation -------------------------------------------------------------------------- */ - -void SNA::compute_dbidrj_nonsymm() -{ - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // dbdr(j1,j2,j) = 0 - // for ma = 0,...,j - // for mb = 0,...,j - // dzdr = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) + - // u(j1,ma1,mb1) * dudr(j2,ma2,mb2)) - // dzdr += sumb1*cg(j1,ma1,j2,ma2,j) - // dbdr(j1,j2,j) += - // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + - // Conj(u(j,ma,mb))*dzdr - - double* dbdr; - double* dudr_r, *dudr_i; - double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3]; - int ma2; - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - - for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j = idxj[JJ].j; - - dbdr = dbarray[j1][j2][j]; - dbdr[0] = 0.0; - dbdr[1] = 0.0; - dbdr[2] = 0.0; - - double** *j1duarray_r = duarray_r[j1]; - double** *j2duarray_r = duarray_r[j2]; - double** *j1duarray_i = duarray_i[j1]; - double** *j2duarray_i = duarray_i[j2]; - double** j1uarraytot_r = uarraytot_r[j1]; - double** j2uarraytot_r = uarraytot_r[j2]; - double** j1uarraytot_i = uarraytot_i[j1]; - double** j2uarraytot_i = uarraytot_i[j2]; - double** j1j2jcgarray = cgarray[j1][j2][j]; - - for(int ma = 0; ma <= j; ma++) - for(int mb = 0; mb <= j; mb++) { - dzdr_r[0] = 0.0; - dzdr_r[1] = 0.0; - dzdr_r[2] = 0.0; - dzdr_i[0] = 0.0; - dzdr_i[1] = 0.0; - dzdr_i[2] = 0.0; - - const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1; - const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1; - - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 < max_ma1; ma1++) { - - ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; - sumb1_r[0] = 0.0; - sumb1_r[1] = 0.0; - sumb1_r[2] = 0.0; - sumb1_i[0] = 0.0; - sumb1_i[1] = 0.0; - sumb1_i[2] = 0.0; - - //inside loop 54 operations (mul and add) - for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2), - mb2 = mb + (j1 + j2 - j) / 2 - mb1; - mb1 < max_mb1; mb1++, mb2--) { - - double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i; - - dudr1_r = j1duarray_r[ma1][mb1]; - dudr2_r = j2duarray_r[ma2][mb2]; - dudr1_i = j1duarray_i[ma1][mb1]; - dudr2_i = j2duarray_i[ma2][mb2]; - - const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2]; - const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2]; - const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1]; - const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2]; - const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1]; - - for(int k = 0; k < 3; k++) { - sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2; - sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2; - sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2; - sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2; - - sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1; - sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1; - sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1; - sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1; - } - } // end loop over mb1,mb2 - - // dzdr += sumb1*cg(j1,ma1,j2,ma2,j) - - dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2]; - dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2]; - dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2]; - dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2]; - dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2]; - dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2]; - } // end loop over ma1,ma2 - - // dbdr(j1,j2,j) += - // Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + - // Conj(u(j,ma,mb))*dzdr - - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - - for(int k = 0; k < 3; k++) - dbdr[k] += - (dudr_r[k] * zarray_r[j1][j2][j][ma][mb] + - dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) + - (uarraytot_r[j][ma][mb] * dzdr_r[k] + - uarraytot_i[j][ma][mb] * dzdr_i[k]); - } //end loop over ma mb - - } //end loop over j1 j2 j - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - } /* ---------------------------------------------------------------------- @@ -958,14 +699,10 @@ void SNA::compute_dbidrj() double jjjmambzarray_r; double jjjmambzarray_i; -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - - for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j = idxj[JJ].j; + for(int jjb = 0; jjb < idxb_max; jjb++) { + const int j1 = idxb[jjb].j1; + const int j2 = idxb[jjb].j2; + const int j = idxb[jjb].j; dbdr = dbarray[j1][j2][j]; dbdr[0] = 0.0; @@ -1149,12 +886,6 @@ void SNA::compute_dbidrj() } //end loop over j1 j2 j -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - } /* ---------------------------------------------------------------------- @@ -1251,27 +982,6 @@ void SNA::add_uarraytot(double r, double wj, double rcut) } } -void SNA::add_uarraytot_omp(double r, double wj, double rcut) -{ - double sfac; - - sfac = compute_sfac(r, rcut); - - sfac *= wj; - -#if defined(_OPENMP) -#pragma omp for -#endif - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - uarraytot_r[j][ma][mb] += - sfac * uarray_r[j][ma][mb]; - uarraytot_i[j][ma][mb] += - sfac * uarray_i[j][ma][mb]; - } -} - /* ---------------------------------------------------------------------- compute Wigner U-functions for one neighbor ------------------------------------------------------------------------- */ @@ -1348,88 +1058,6 @@ void SNA::compute_uarray(double x, double y, double z, } } -void SNA::compute_uarray_omp(double x, double y, double z, - double z0, double r, int /*sub_threads*/) -{ - double r0inv; - double a_r, b_r, a_i, b_i; - double rootpq; - - // compute Cayley-Klein parameters for unit quaternion - - r0inv = 1.0 / sqrt(r * r + z0 * z0); - a_r = r0inv * z0; - a_i = -r0inv * z; - b_r = r0inv * y; - b_i = -r0inv * x; - - // VMK Section 4.8.2 - - uarray_r[0][0][0] = 1.0; - uarray_i[0][0][0] = 0.0; - - for (int j = 1; j <= twojmax; j++) { -#if defined(_OPENMP) -#pragma omp for -#endif - for (int mb = 0; mb < j; mb++) { - uarray_r[j][0][mb] = 0.0; - uarray_i[j][0][mb] = 0.0; - - for (int ma = 0; ma < j; ma++) { - rootpq = rootpqarray[j - ma][j - mb]; - uarray_r[j][ma][mb] += - rootpq * - (a_r * uarray_r[j - 1][ma][mb] + - a_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma][mb] += - rootpq * - (a_r * uarray_i[j - 1][ma][mb] - - a_i * uarray_r[j - 1][ma][mb]); - - rootpq = rootpqarray[ma + 1][j - mb]; - uarray_r[j][ma + 1][mb] = - -rootpq * - (b_r * uarray_r[j - 1][ma][mb] + - b_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma + 1][mb] = - -rootpq * - (b_r * uarray_i[j - 1][ma][mb] - - b_i * uarray_r[j - 1][ma][mb]); - } - } - - int mb = j; - uarray_r[j][0][mb] = 0.0; - uarray_i[j][0][mb] = 0.0; - -#if defined(_OPENMP) -#pragma omp for -#endif - for (int ma = 0; ma < j; ma++) { - rootpq = rootpqarray[j - ma][mb]; - uarray_r[j][ma][mb] += - rootpq * - (b_r * uarray_r[j - 1][ma][mb - 1] - - b_i * uarray_i[j - 1][ma][mb - 1]); - uarray_i[j][ma][mb] += - rootpq * - (b_r * uarray_i[j - 1][ma][mb - 1] + - b_i * uarray_r[j - 1][ma][mb - 1]); - - rootpq = rootpqarray[ma + 1][mb]; - uarray_r[j][ma + 1][mb] = - rootpq * - (a_r * uarray_r[j - 1][ma][mb - 1] - - a_i * uarray_i[j - 1][ma][mb - 1]); - uarray_i[j][ma + 1][mb] = - rootpq * - (a_r * uarray_i[j - 1][ma][mb - 1] + - a_i * uarray_r[j - 1][ma][mb - 1]); - } - } -} - /* ---------------------------------------------------------------------- compute derivatives of Wigner U-functions for one neighbor see comments in compute_uarray() @@ -1644,20 +1272,18 @@ void SNA::create_twojmax_arrays() bzero = NULL; - if(!use_shared_arrays) { - memory->create(uarraytot_r, jdim, jdim, jdim, - "sna:uarraytot"); - memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim, - "sna:zarray"); - memory->create(uarraytot_i, jdim, jdim, jdim, - "sna:uarraytot"); - memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, - "sna:zarray"); - memory->create(yarray_r, jdim, jdim, jdim, - "sna:yarray"); - memory->create(yarray_i, jdim, jdim, jdim, - "sna:yarray"); - } + memory->create(uarraytot_r, jdim, jdim, jdim, + "sna:uarraytot"); + memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim, + "sna:zarray"); + memory->create(uarraytot_i, jdim, jdim, jdim, + "sna:uarraytot"); + memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, + "sna:zarray"); + memory->create(yarray_r, jdim, jdim, jdim, + "sna:yarray"); + memory->create(yarray_i, jdim, jdim, jdim, + "sna:yarray"); } @@ -1680,14 +1306,12 @@ void SNA::destroy_twojmax_arrays() if (bzero_flag) memory->destroy(bzero); - if(!use_shared_arrays) { - memory->destroy(uarraytot_r); - memory->destroy(zarray_r); - memory->destroy(uarraytot_i); - memory->destroy(zarray_i); - memory->destroy(yarray_r); - memory->destroy(yarray_i); - } + memory->destroy(uarraytot_r); + memory->destroy(zarray_r); + memory->destroy(uarraytot_i); + memory->destroy(zarray_i); + memory->destroy(yarray_r); + memory->destroy(yarray_i); } /* ---------------------------------------------------------------------- diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h index 2c90da1d30..b93b0ac7b0 100644 --- a/src/SNAP/sna.h +++ b/src/SNAP/sna.h @@ -24,14 +24,19 @@ namespace LAMMPS_NS { -struct SNA_LOOPINDICES { +struct SNA_ZINDICES { + int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju; + double betaj; +}; + +struct SNA_BINDICES { int j1, j2, j; }; class SNA : protected Pointers { public: - SNA(LAMMPS*, double, int, int, int, double, int, int); + SNA(LAMMPS*, double, int, int, double, int, int); SNA(LAMMPS* lmp) : Pointers(lmp) {}; ~SNA(); @@ -44,10 +49,8 @@ public: // functions for bispectrum coefficients void compute_ui(int); - void compute_ui_omp(int, int); void compute_zi(); - void compute_zi_omp(int); - void compute_yi(double*); + void compute_yi(const double*); void compute_bi(); void copy_bi2bvec(); @@ -56,20 +59,10 @@ public: void compute_duidrj(double*, double, double); void compute_dbidrj(); void compute_deidrj(double*); - void compute_dbidrj_nonsymm(); void copy_dbi2dbvec(); double compute_sfac(double, double); double compute_dsfac(double, double); -#ifdef TIMING_INFO - double* timers; - timespec starttime, endtime; - int print; - int counter; -#endif - - //per sna class instance for OMP use - double* bvec, ** dbvec; double** rij; int* inside; @@ -83,16 +76,17 @@ public: double*** uarraytot_r, *** uarraytot_i; double***** zarray_r, ***** zarray_i; double*** yarray_r, *** yarray_i; - double*** uarraytot_r_b, *** uarraytot_i_b; - double***** zarray_r_b, ***** zarray_i_b; double*** uarray_r, *** uarray_i; private: double rmin0, rfac0; - //use indexlist instead of loops, constructor generates these - SNA_LOOPINDICES* idxj; - int idxj_max; + // use indexlist instead of loops, constructor generates these + + SNA_ZINDICES* idxz; + SNA_BINDICES* idxb; + int idxcg_max, idxu_max, idxz_max, idxb_max; + // data for bispectrum coefficients double***** cgarray; @@ -104,6 +98,21 @@ private: double**** duarray_r, **** duarray_i; double**** dbarray; + double* cglist; + int*** idxcg_block; + + double* ulisttot_r, * ulisttot_i; + double* ulist_r, * ulist_i; + int* idxu_block; + + double* zlist_r, * zlist_i; + int*** idxz_block; + + int*** idxb_block; + + double** dulist_r, ** dulist_i; + double* ylist_r, * ylist_i; + static const int nmaxfactorial = 167; static const double nfac_table[]; double factorial(int); @@ -118,22 +127,13 @@ private: void zero_uarraytot(); void addself_uarraytot(double); void add_uarraytot(double, double, double); - void add_uarraytot_omp(double, double, double); void compute_uarray(double, double, double, double, double); - void compute_uarray_omp(double, double, double, - double, double, int); double deltacg(int, int, int); int compute_ncoeff(); void compute_duarray(double, double, double, double, double, double, double, double); - // if number of atoms are small use per atom arrays - // for twojmax arrays, rij, inside, bvec - // this will increase the memory footprint considerably, - // but allows parallel filling and reuse of these arrays - int use_shared_arrays; - // Sets the style for the switching function // 0 = none // 1 = cosine From 0559e155f23330999239847396384115fc698311 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Tue, 11 Jun 2019 18:24:02 -0600 Subject: [PATCH 12/21] Implemented lists instead of multidim arrays --- src/SNAP/compute_sna_atom.cpp | 7 +- src/SNAP/compute_snad_atom.cpp | 34 +- src/SNAP/compute_snav_atom.cpp | 46 +- src/SNAP/pair_snap.cpp | 8 +- src/SNAP/sna.cpp | 1160 ++++++++++++++------------------ src/SNAP/sna.h | 25 +- 6 files changed, 566 insertions(+), 714 deletions(-) diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp index 17774143d5..fea37faca0 100644 --- a/src/SNAP/compute_sna_atom.cpp +++ b/src/SNAP/compute_sna_atom.cpp @@ -247,13 +247,12 @@ void ComputeSNAAtom::compute_peratom() snaptr->compute_ui(ninside); snaptr->compute_zi(); snaptr->compute_bi(); - snaptr->copy_bi2bvec(); for (int icoeff = 0; icoeff < ncoeff; icoeff++) - sna[i][icoeff] = snaptr->bvec[icoeff]; + sna[i][icoeff] = snaptr->blist[icoeff]; if (quadraticflag) { int ncount = ncoeff; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr->bvec[icoeff]; + double bi = snaptr->blist[icoeff]; // diagonal element of quadratic matrix @@ -262,7 +261,7 @@ void ComputeSNAAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) - sna[i][ncount++] = bi*snaptr->bvec[jcoeff]; + sna[i][ncount++] = bi*snaptr->blist[jcoeff]; } } } else { diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp index b356d61d3d..156380eccc 100644 --- a/src/SNAP/compute_snad_atom.cpp +++ b/src/SNAP/compute_snad_atom.cpp @@ -266,7 +266,6 @@ void ComputeSNADAtom::compute_peratom() snaptr->compute_zi(); if (quadraticflag) { snaptr->compute_bi(); - snaptr->copy_bi2bvec(); } for (int jj = 0; jj < ninside; jj++) { @@ -275,7 +274,6 @@ void ComputeSNADAtom::compute_peratom() snaptr->wj[jj], snaptr->rcutij[jj]); snaptr->compute_dbidrj(); - snaptr->copy_dbi2dbvec(); // Accumulate -dBi/dRi, -dBi/dRj @@ -283,12 +281,12 @@ void ComputeSNADAtom::compute_peratom() double *snadj = snad[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snadi[icoeff] += snaptr->dbvec[icoeff][0]; - snadi[icoeff+yoffset] += snaptr->dbvec[icoeff][1]; - snadi[icoeff+zoffset] += snaptr->dbvec[icoeff][2]; - snadj[icoeff] -= snaptr->dbvec[icoeff][0]; - snadj[icoeff+yoffset] -= snaptr->dbvec[icoeff][1]; - snadj[icoeff+zoffset] -= snaptr->dbvec[icoeff][2]; + snadi[icoeff] += snaptr->dblist[icoeff][0]; + snadi[icoeff+yoffset] += snaptr->dblist[icoeff][1]; + snadi[icoeff+zoffset] += snaptr->dblist[icoeff][2]; + snadj[icoeff] -= snaptr->dblist[icoeff][0]; + snadj[icoeff+yoffset] -= snaptr->dblist[icoeff][1]; + snadj[icoeff+zoffset] -= snaptr->dblist[icoeff][2]; } if (quadraticflag) { @@ -297,10 +295,10 @@ void ComputeSNADAtom::compute_peratom() snadj += quadraticoffset; int ncount = 0; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr->bvec[icoeff]; - double bix = snaptr->dbvec[icoeff][0]; - double biy = snaptr->dbvec[icoeff][1]; - double biz = snaptr->dbvec[icoeff][2]; + double bi = snaptr->blist[icoeff]; + double bix = snaptr->dblist[icoeff][0]; + double biy = snaptr->dblist[icoeff][1]; + double biz = snaptr->dblist[icoeff][2]; // diagonal elements of quadratic matrix @@ -319,12 +317,12 @@ void ComputeSNADAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double dbxtmp = bi*snaptr->dbvec[jcoeff][0] - + bix*snaptr->bvec[jcoeff]; - double dbytmp = bi*snaptr->dbvec[jcoeff][1] - + biy*snaptr->bvec[jcoeff]; - double dbztmp = bi*snaptr->dbvec[jcoeff][2] - + biz*snaptr->bvec[jcoeff]; + double dbxtmp = bi*snaptr->dblist[jcoeff][0] + + bix*snaptr->blist[jcoeff]; + double dbytmp = bi*snaptr->dblist[jcoeff][1] + + biy*snaptr->blist[jcoeff]; + double dbztmp = bi*snaptr->dblist[jcoeff][2] + + biz*snaptr->blist[jcoeff]; snadi[ncount] += dbxtmp; snadi[ncount+yoffset] += dbytmp; diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp index 9f9ef7a67d..6caff0820c 100644 --- a/src/SNAP/compute_snav_atom.cpp +++ b/src/SNAP/compute_snav_atom.cpp @@ -260,7 +260,6 @@ void ComputeSNAVAtom::compute_peratom() snaptr->compute_zi(); if (quadraticflag) { snaptr->compute_bi(); - snaptr->copy_bi2bvec(); } for (int jj = 0; jj < ninside; jj++) { @@ -270,7 +269,6 @@ void ComputeSNAVAtom::compute_peratom() snaptr->wj[jj], snaptr->rcutij[jj]); snaptr->compute_dbidrj(); - snaptr->copy_dbi2dbvec(); // Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj @@ -278,18 +276,18 @@ void ComputeSNAVAtom::compute_peratom() double *snavj = snav[j]+typeoffset; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - snavi[icoeff] += snaptr->dbvec[icoeff][0]*xtmp; - snavi[icoeff+nperdim] += snaptr->dbvec[icoeff][1]*ytmp; - snavi[icoeff+2*nperdim] += snaptr->dbvec[icoeff][2]*ztmp; - snavi[icoeff+3*nperdim] += snaptr->dbvec[icoeff][1]*ztmp; - snavi[icoeff+4*nperdim] += snaptr->dbvec[icoeff][0]*ztmp; - snavi[icoeff+5*nperdim] += snaptr->dbvec[icoeff][0]*ytmp; - snavj[icoeff] -= snaptr->dbvec[icoeff][0]*x[j][0]; - snavj[icoeff+nperdim] -= snaptr->dbvec[icoeff][1]*x[j][1]; - snavj[icoeff+2*nperdim] -= snaptr->dbvec[icoeff][2]*x[j][2]; - snavj[icoeff+3*nperdim] -= snaptr->dbvec[icoeff][1]*x[j][2]; - snavj[icoeff+4*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][2]; - snavj[icoeff+5*nperdim] -= snaptr->dbvec[icoeff][0]*x[j][1]; + snavi[icoeff] += snaptr->dblist[icoeff][0]*xtmp; + snavi[icoeff+nperdim] += snaptr->dblist[icoeff][1]*ytmp; + snavi[icoeff+2*nperdim] += snaptr->dblist[icoeff][2]*ztmp; + snavi[icoeff+3*nperdim] += snaptr->dblist[icoeff][1]*ztmp; + snavi[icoeff+4*nperdim] += snaptr->dblist[icoeff][0]*ztmp; + snavi[icoeff+5*nperdim] += snaptr->dblist[icoeff][0]*ytmp; + snavj[icoeff] -= snaptr->dblist[icoeff][0]*x[j][0]; + snavj[icoeff+nperdim] -= snaptr->dblist[icoeff][1]*x[j][1]; + snavj[icoeff+2*nperdim] -= snaptr->dblist[icoeff][2]*x[j][2]; + snavj[icoeff+3*nperdim] -= snaptr->dblist[icoeff][1]*x[j][2]; + snavj[icoeff+4*nperdim] -= snaptr->dblist[icoeff][0]*x[j][2]; + snavj[icoeff+5*nperdim] -= snaptr->dblist[icoeff][0]*x[j][1]; } if (quadraticflag) { @@ -298,10 +296,10 @@ void ComputeSNAVAtom::compute_peratom() snavj += quadraticoffset; int ncount = 0; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bi = snaptr->bvec[icoeff]; - double bix = snaptr->dbvec[icoeff][0]; - double biy = snaptr->dbvec[icoeff][1]; - double biz = snaptr->dbvec[icoeff][2]; + double bi = snaptr->blist[icoeff]; + double bix = snaptr->dblist[icoeff][0]; + double biy = snaptr->dblist[icoeff][1]; + double biz = snaptr->dblist[icoeff][2]; // diagonal element of quadratic matrix @@ -325,12 +323,12 @@ void ComputeSNAVAtom::compute_peratom() // upper-triangular elements of quadratic matrix for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double dbxtmp = bi*snaptr->dbvec[jcoeff][0] - + bix*snaptr->bvec[jcoeff]; - double dbytmp = bi*snaptr->dbvec[jcoeff][1] - + biy*snaptr->bvec[jcoeff]; - double dbztmp = bi*snaptr->dbvec[jcoeff][2] - + biz*snaptr->bvec[jcoeff]; + double dbxtmp = bi*snaptr->dblist[jcoeff][0] + + bix*snaptr->blist[jcoeff]; + double dbytmp = bi*snaptr->dblist[jcoeff][1] + + biy*snaptr->blist[jcoeff]; + double dbztmp = bi*snaptr->dblist[jcoeff][2] + + biz*snaptr->blist[jcoeff]; snavi[ncount] += dbxtmp*xtmp; snavi[ncount+nperdim] += dbytmp*ytmp; snavi[ncount+2*nperdim] += dbztmp*ztmp; diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index ae542e81b4..6a65f872fd 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -166,17 +166,14 @@ void PairSNAP::compute(int eflag, int vflag) } } - // compute Ui, Zi, and Bi for atom I + // compute Ui, Yi for atom I snaptr->compute_ui(ninside); - snaptr->compute_zi(); // for neighbors of I within cutoff: // compute Fij = dEi/dRj = -dEi/dRi // add to Fi, subtract from Fj - // compute beta_i*Z_i = Y_i - snaptr->compute_yi(beta[ii]); for (int jj = 0; jj < ninside; jj++) { @@ -310,10 +307,9 @@ void PairSNAP::compute_bispectrum() snaptr->compute_ui(ninside); snaptr->compute_zi(); snaptr->compute_bi(); - snaptr->copy_bi2bvec(); for (int k = 0; k < ncoeff; k++) - bispectrum[ii][k] = snaptr->bvec[k]; + bispectrum[ii][k] = snaptr->blist[k]; } } diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index b388136caf..131ac48fdb 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -128,8 +128,6 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, ncoeff = compute_ncoeff(); - create_twojmax_arrays(); - bvec = NULL; dbvec = NULL; memory->create(bvec, ncoeff, "pair:bvec"); @@ -142,20 +140,21 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, idxz = NULL; idxb= NULL; + build_indexlist(); + create_twojmax_arrays(); + if (bzero_flag) { double www = wself*wself*wself; for(int j = 0; j <= twojmax; j++) bzero[j] = www*(j+1); } - build_indexlist(); } /* ---------------------------------------------------------------------- */ SNA::~SNA() { - destroy_twojmax_arrays(); memory->destroy(rij); memory->destroy(inside); memory->destroy(wj); @@ -164,6 +163,7 @@ SNA::~SNA() memory->destroy(dbvec); delete[] idxz; delete[] idxb; + destroy_twojmax_arrays(); } void SNA::build_indexlist() @@ -234,9 +234,10 @@ void SNA::build_indexlist() for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j < j1) continue; - idxb_block[j1][j2][j] = idxb_count; - idxb_count++; + if (j >= j1) { + idxb_block[j1][j2][j] = idxb_count; + idxb_count++; + } } // index list for zlist @@ -266,38 +267,6 @@ void SNA::build_indexlist() // multiply and divide by j+1 factors // account for multiplicity of 1, 2, or 3 - // CODE HORROR!! Need to figure this out later - double betaj = 1.0; -// #ifdef USE_YDIRECT_ZLIST -// double betaj; -// if (j >= j1) { -// const int jjb = idxb_block[j1][j2][j]; -// if (j1 == j) { -// if (j2 == j) betaj = 3*beta[jjb]; -// else betaj = 2*beta[jjb]; -// } else betaj = beta[jjb]; -// } else if (j >= j2) { -// const int jjb = idxb_block[j][j2][j1]; -// if (j2 == j) betaj = 2*beta[jjb]*(j1+1)/(j+1.0); -// else betaj = beta[jjb]*(j1+1)/(j+1.0); -// } else { -// const int jjb = idxb_block[j2][j][j1]; -// betaj = beta[jjb]*(j1+1)/(j+1.0); -// } -// #else -// double betaj; -// if (j >= j1) { -// const int jjb = idxb_block[j1][j2][j]; -// betaj = beta[jjb]; -// } else if (j >= j2) { -// const int jjb = idxb_block[j][j2][j1]; -// betaj = beta[jjb]*(j1+1)/(j+1.0); -// } else { -// const int jjb = idxb_block[j2][j][j1]; -// betaj = beta[jjb]*(j1+1)/(j+1.0); -// } -// #endif - for (int mb = 0; 2*mb <= j; mb++) for (int ma = 0; ma <= j; ma++) { idxz[idxz_count].j1 = j1; @@ -311,11 +280,9 @@ void SNA::build_indexlist() idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1; // apply to z(j1,j2,j,ma,mb) to unique element of y(j) - // find right beta[jjb] and y_list[jju] entries const int jju = idxu_block[j] + (j+1)*mb + ma; idxz[idxz_count].jju = jju; - idxz[idxz_count].betaj = betaj; idxz_count++; } @@ -386,105 +353,168 @@ void SNA::compute_ui(int jnum) void SNA::compute_zi() { - // compute_dbidrj() requires full j1/j2/j chunk of z elements - // use zarray j1/j2 symmetry + int ma2, mb2; + for(int jjz = 0; jjz < idxz_max; jjz++) { + const int j1 = idxz[jjz].j1; + const int j2 = idxz[jjz].j2; + const int j = idxz[jjz].j; + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) { - for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { - double sumb1_r, sumb1_i; - int ma2, mb2; - for(int mb = 0; 2*mb <= j; mb++) - for(int ma = 0; ma <= j; ma++) { - zarray_r[j1][j2][j][ma][mb] = 0.0; - zarray_i[j1][j2][j][ma][mb] = 0.0; + const double* cgblock = cglist + idxcg_block[j1][j2][j]; - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { - sumb1_r = 0.0; - sumb1_i = 0.0; + zlist_r[jjz] = 0.0; + zlist_i[jjz] = 0.0; - ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { - for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2); - mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { + double suma1_r = 0.0; + double suma1_i = 0.0; - mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; - sumb1_r += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] - - uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]); - sumb1_i += cgarray[j1][j2][j][mb1][mb2] * - (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] + - uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]); - } // end loop over mb1 + const double* u1_r = &ulisttot_r[jju1]; + const double* u1_i = &ulisttot_i[jju1]; + const double* u2_r = &ulisttot_r[jju2]; + const double* u2_i = &ulisttot_i[jju2]; - zarray_r[j1][j2][j][ma][mb] += - sumb1_r * cgarray[j1][j2][j][ma1][ma2]; - zarray_i[j1][j2][j][ma][mb] += - sumb1_i * cgarray[j1][j2][j][ma1][ma2]; - } // end loop over ma1 - } // end loop over ma, mb - } // end loop over j - } // end loop over j1, j2 + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); + ma1++; + ma2--; + icga += j2; + } // end loop over ia + zlist_r[jjz] += cgblock[icgb] * suma1_r; + zlist_i[jjz] += cgblock[icgb] * suma1_i; + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib + +// // apply symmetry factor + +// const double jfac = 1.0/(j+1); +// zlist_r[jjz] *= jfac; +// zlist_i[jjz] *= jfac; + + } // end loop over jjz } /* ---------------------------------------------------------------------- - compute Yi by summing over products of beta and Zi + compute Yi from Ui without storing Zi, looping over ylist ------------------------------------------------------------------------- */ void SNA::compute_yi(const double* beta) { - int j; - int idxz_count; - double **jjjzarray_r, **jjjzarray_i; - for(int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; for(int mb = 0; 2*mb <= j; mb++) for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] = 0.0; - yarray_i[j][ma][mb] = 0.0; + ylist_r[jju] = 0.0; + ylist_i[jju] = 0.0; + jju++; } // end loop over ma, mb } // end loop over j for(int jjb = 0; jjb < idxb_max; jjb++) { - const int j1 = idxb[jjb].j1; - const int j2 = idxb[jjb].j2; - const int j3 = idxb[jjb].j; + const int j1b = idxb[jjb].j1; + const int j2b = idxb[jjb].j2; + const int j3b = idxb[jjb].j; - j = j3; - jjjzarray_r = zarray_r[j1][j2][j3]; - jjjzarray_i = zarray_i[j1][j2][j3]; - for(int mb = 0; 2*mb <= j; mb++) - for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]; - yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]; - } // end loop over ma, mb - - j = j1; - jjjzarray_r = zarray_r[j3][j2][j1]; - jjjzarray_i = zarray_i[j3][j2][j1]; - double j1fac = (j3+1)/(j+1.0); - for(int mb = 0; 2*mb <= j; mb++) - for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j1fac; - yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j1fac; - } // end loop over ma, mb - - j = j2; - jjjzarray_r = zarray_r[j3][j1][j2]; - jjjzarray_i = zarray_i[j3][j1][j2]; - double j2fac = (j3+1)/(j+1.0); - for(int mb = 0; 2*mb <= j; mb++) - for(int ma = 0; ma <= j; ma++) { - yarray_r[j][ma][mb] += beta[jjb]*jjjzarray_r[ma][mb]*j2fac; - yarray_i[j][ma][mb] += beta[jjb]*jjjzarray_i[ma][mb]*j2fac; - } // end loop over ma, mb + compute_yterm(j1b,j2b,j3b,beta); + compute_yterm(j3b,j2b,j1b,beta); + compute_yterm(j3b,j1b,j2b,beta); } // end loop over jjb } +void SNA::compute_yterm(int j1, int j2, int j, const double* beta) { + double betaj; + + int jju = idxu_block[j]; + int jjz = idxz_block[j1][j2][j]; + + // pick out right beta value + + if (j >= j1) { + const int jjb = idxb_block[j1][j2][j]; + betaj = beta[jjb]; + } else if (j >= j2) { + const int jjb = idxb_block[j][j2][j1]; + betaj = beta[jjb]*(j1+1)/(j+1.0); + } else { + const int jjb = idxb_block[j2][j][j1]; + betaj = beta[jjb]*(j1+1)/(j+1.0); + } + + // can replace this with a single loop over jjz + + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; + + const double* cgblock = cglist + idxcg_block[j1][j2][j]; + + double ztmp_r = 0.0; + double ztmp_i = 0.0; + + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { + + double suma1_r = 0.0; + double suma1_i = 0.0; + + const double* u1_r = &ulisttot_r[jju1]; + const double* u1_i = &ulisttot_i[jju1]; + const double* u2_r = &ulisttot_r[jju2]; + const double* u2_i = &ulisttot_i[jju2]; + + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; + + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); + ma1++; + ma2--; + icga += j2; + } // end loop over ia + + ztmp_r += cgblock[icgb] * suma1_r; + ztmp_i += cgblock[icgb] * suma1_i; + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib + + // printf("jju betaj ztmp ylist %d %g %g %d %d %d %d %d\n",jju,betaj,ztmp_r,j1,j2,j,ma,mb); + ylist_r[jju] += betaj*ztmp_r; + ylist_i[jju] += betaj*ztmp_i; + jjz++; + jju++; + } // end loop over ma, mb +} + /* ---------------------------------------------------------------------- compute dEidRj ------------------------------------------------------------------------- */ @@ -496,19 +526,21 @@ void SNA::compute_deidrj(double* dedr) dedr[k] = 0.0; for(int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; for(int mb = 0; 2*mb < j; mb++) for(int ma = 0; ma <= j; ma++) { - double* dudr_r = duarray_r[j][ma][mb]; - double* dudr_i = duarray_i[j][ma][mb]; - double jjjmambyarray_r = yarray_r[j][ma][mb]; - double jjjmambyarray_i = yarray_i[j][ma][mb]; + double* dudr_r = dulist_r[jju]; + double* dudr_i = dulist_i[jju]; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + for(int k = 0; k < 3; k++) dedr[k] += dudr_r[k] * jjjmambyarray_r + dudr_i[k] * jjjmambyarray_i; - + jju++; } //end loop over ma mb // For j even, handle middle column @@ -517,30 +549,33 @@ void SNA::compute_deidrj(double* dedr) int mb = j/2; for(int ma = 0; ma < mb; ma++) { - double* dudr_r = duarray_r[j][ma][mb]; - double* dudr_i = duarray_i[j][ma][mb]; - double jjjmambyarray_r = yarray_r[j][ma][mb]; - double jjjmambyarray_i = yarray_i[j][ma][mb]; + double* dudr_r = dulist_r[jju]; + double* dudr_i = dulist_i[jju]; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + for(int k = 0; k < 3; k++) dedr[k] += dudr_r[k] * jjjmambyarray_r + dudr_i[k] * jjjmambyarray_i; - + jju++; } int ma = mb; - double* dudr_r = duarray_r[j][ma][mb]; - double* dudr_i = duarray_i[j][ma][mb]; - double jjjmambyarray_r = yarray_r[j][ma][mb]; - double jjjmambyarray_i = yarray_i[j][ma][mb]; + double* dudr_r = dulist_r[jju]; + double* dudr_i = dulist_i[jju]; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + for(int k = 0; k < 3; k++) dedr[k] += (dudr_r[k] * jjjmambyarray_r + dudr_i[k] * jjjmambyarray_i)*0.5; - + jju++; + } // end if jeven - } // End loop over j + } // end loop over j for(int k = 0; k < 3; k++) dedr[k] *= 2.0; @@ -562,103 +597,41 @@ void SNA::compute_bi() // b(j1,j2,j) += // 2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb) - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) { - for(int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - barray[j1][j2][j] = 0.0; + for(int jjb = 0; jjb < idxb_max; jjb++) { + const int j1 = idxb[jjb].j1; + const int j2 = idxb[jjb].j2; + const int j = idxb[jjb].j; - for(int mb = 0; 2*mb < j; mb++) - for(int ma = 0; ma <= j; ma++) - barray[j1][j2][j] += - uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + - uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb]; + int jjz = idxz_block[j1][j2][j]; + int jju = idxu_block[j]; + double sumzu = 0.0; + for (int mb = 0; 2*mb < j; mb++) + for (int ma = 0; ma <= j; ma++) { + sumzu += ulisttot_r[jju]*zlist_r[jjz] + + ulisttot_i[jju]*zlist_i[jjz]; + jjz++; + jju++; + } // end loop over ma, mb - // For j even, special treatment for middle column + // For j even, handle middle column - if (j%2 == 0) { - int mb = j/2; - for(int ma = 0; ma < mb; ma++) - barray[j1][j2][j] += - uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + - uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb]; - int ma = mb; - barray[j1][j2][j] += - (uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] + - uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb])*0.5; - } - - barray[j1][j2][j] *= 2.0; - if (bzero_flag) - barray[j1][j2][j] -= bzero[j]; + if (j%2 == 0) { + int mb = j/2; + for(int ma = 0; ma < mb; ma++) { + sumzu += ulisttot_r[jju]*zlist_r[jjz] + + ulisttot_i[jju]*zlist_i[jjz]; + jjz++; + jju++; } - } -} - -/* ---------------------------------------------------------------------- - copy Bi array to a vector -------------------------------------------------------------------------- */ - -void SNA::copy_bi2bvec() -{ - int ncount, j1, j2, j; - - ncount = 0; - - for(j1 = 0; j1 <= twojmax; j1++) - if(diagonalstyle == 0) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } - } else if(diagonalstyle == 1) { - j2 = j1; - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } - } else if(diagonalstyle == 2) { - j = j2 = j1; - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } else if(diagonalstyle == 3) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - bvec[ncount] = barray[j1][j2][j]; - ncount++; - } - } -} - -/* ---------------------------------------------------------------------- - calculate derivative of Ui w.r.t. atom j -------------------------------------------------------------------------- */ - -void SNA::compute_duidrj(double* rij, double wj, double rcut) -{ - double rsq, r, x, y, z, z0, theta0, cs, sn; - double dz0dr; - - x = rij[0]; - y = rij[1]; - z = rij[2]; - rsq = x * x + y * y + z * z; - r = sqrt(rsq); - double rscale0 = rfac0 * MY_PI / (rcut - rmin0); - theta0 = (r - rmin0) * rscale0; - cs = cos(theta0); - sn = sin(theta0); - z0 = r * cs / sn; - dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - - compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); + sumzu += 0.5*(ulisttot_r[jju]*zlist_r[jjz] + + ulisttot_i[jju]*zlist_i[jjz]); + jjz++; + jju++; + } // end if jeven + blist[jjb] = 2.0*sumzu; + } } /* ---------------------------------------------------------------------- @@ -698,44 +671,36 @@ void SNA::compute_dbidrj() double** jjjzarray_i; double jjjmambzarray_r; double jjjmambzarray_i; + int jjz, jju; for(int jjb = 0; jjb < idxb_max; jjb++) { const int j1 = idxb[jjb].j1; const int j2 = idxb[jjb].j2; const int j = idxb[jjb].j; - dbdr = dbarray[j1][j2][j]; + dbdr = dblist[jjb]; dbdr[0] = 0.0; dbdr[1] = 0.0; dbdr[2] = 0.0; // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) + jjz = idxz_block[j1][j2][j]; + jju = idxu_block[j]; + for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; - // use zarray j1/j2 symmetry (optional) - - if (j1 >= j2) { - jjjzarray_r = zarray_r[j1][j2][j]; - jjjzarray_i = zarray_i[j1][j2][j]; - } else { - jjjzarray_r = zarray_r[j2][j1][j]; - jjjzarray_i = zarray_i[j2][j1][j]; - } - for(int mb = 0; 2*mb < j; mb++) for(int ma = 0; ma <= j; ma++) { - - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - jjjmambzarray_r = jjjzarray_r[ma][mb]; - jjjmambzarray_i = jjjzarray_i[ma][mb]; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; - + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } //end loop over ma mb // For j even, handle middle column @@ -743,24 +708,24 @@ void SNA::compute_dbidrj() if (j%2 == 0) { int mb = j/2; for(int ma = 0; ma < mb; ma++) { - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - jjjmambzarray_r = jjjzarray_r[ma][mb]; - jjjmambzarray_i = jjjzarray_i[ma][mb]; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } int ma = mb; - dudr_r = duarray_r[j][ma][mb]; - dudr_i = duarray_i[j][ma][mb]; - jjjmambzarray_r = jjjzarray_r[ma][mb]; - jjjmambzarray_i = jjjzarray_i[ma][mb]; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - (dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i)*0.5; + (dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz])*0.5; + jjz++; + jju++; } // end if jeven for(int k = 0; k < 3; k++) @@ -770,115 +735,97 @@ void SNA::compute_dbidrj() double j1fac = (j+1)/(j1+1.0); + jjz = idxz_block[j][j2][j1]; + jju = idxu_block[j1]; + for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; - // use zarray j1/j2 symmetry (optional) - - if (j >= j2) { - jjjzarray_r = zarray_r[j][j2][j1]; - jjjzarray_i = zarray_i[j][j2][j1]; - } else { - jjjzarray_r = zarray_r[j2][j][j1]; - jjjzarray_i = zarray_i[j2][j][j1]; - } - - for(int mb1 = 0; 2*mb1 < j1; mb1++) - for(int ma1 = 0; ma1 <= j1; ma1++) { - - dudr_r = duarray_r[j1][ma1][mb1]; - dudr_i = duarray_i[j1][ma1][mb1]; - jjjmambzarray_r = jjjzarray_r[ma1][mb1]; - jjjmambzarray_i = jjjzarray_i[ma1][mb1]; + for(int mb = 0; 2*mb < j1; mb++) + for(int ma = 0; ma <= j1; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; - - } //end loop over ma1 mb1 + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; + } //end loop over ma mb // For j1 even, handle middle column if (j1%2 == 0) { - int mb1 = j1/2; - for(int ma1 = 0; ma1 < mb1; ma1++) { - dudr_r = duarray_r[j1][ma1][mb1]; - dudr_i = duarray_i[j1][ma1][mb1]; - jjjmambzarray_r = jjjzarray_r[ma1][mb1]; - jjjmambzarray_i = jjjzarray_i[ma1][mb1]; + int mb = j1/2; + for(int ma = 0; ma < mb; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } - int ma1 = mb1; - dudr_r = duarray_r[j1][ma1][mb1]; - dudr_i = duarray_i[j1][ma1][mb1]; - jjjmambzarray_r = jjjzarray_r[ma1][mb1]; - jjjmambzarray_i = jjjzarray_i[ma1][mb1]; + int ma = mb; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - (dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i)*0.5; + (dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz])*0.5; + jjz++; + jju++; } // end if j1even for(int k = 0; k < 3; k++) dbdr[k] += 2.0*sumzdu_r[k]*j1fac; - // Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2) + // Sum over Conj(dudr(j2,ma2,mb2))*z(j,j1,j2,ma2,mb2) double j2fac = (j+1)/(j2+1.0); + jjz = idxz_block[j][j1][j2]; + jju = idxu_block[j2]; + for(int k = 0; k < 3; k++) sumzdu_r[k] = 0.0; - // use zarray j1/j2 symmetry (optional) - - if (j1 >= j) { - jjjzarray_r = zarray_r[j1][j][j2]; - jjjzarray_i = zarray_i[j1][j][j2]; - } else { - jjjzarray_r = zarray_r[j][j1][j2]; - jjjzarray_i = zarray_i[j][j1][j2]; - } - - for(int mb2 = 0; 2*mb2 < j2; mb2++) - for(int ma2 = 0; ma2 <= j2; ma2++) { - - dudr_r = duarray_r[j2][ma2][mb2]; - dudr_i = duarray_i[j2][ma2][mb2]; - jjjmambzarray_r = jjjzarray_r[ma2][mb2]; - jjjmambzarray_i = jjjzarray_i[ma2][mb2]; + for(int mb = 0; 2*mb < j2; mb++) + for(int ma = 0; ma <= j2; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; - - } //end loop over ma2 mb2 + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; + } //end loop over ma mb // For j2 even, handle middle column if (j2%2 == 0) { - int mb2 = j2/2; - for(int ma2 = 0; ma2 < mb2; ma2++) { - dudr_r = duarray_r[j2][ma2][mb2]; - dudr_i = duarray_i[j2][ma2][mb2]; - jjjmambzarray_r = jjjzarray_r[ma2][mb2]; - jjjmambzarray_i = jjjzarray_i[ma2][mb2]; + int mb = j2/2; + for(int ma = 0; ma < mb; ma++) { + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i; + dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz]; + jjz++; + jju++; } - int ma2 = mb2; - dudr_r = duarray_r[j2][ma2][mb2]; - dudr_i = duarray_i[j2][ma2][mb2]; - jjjmambzarray_r = jjjzarray_r[ma2][mb2]; - jjjmambzarray_i = jjjzarray_i[ma2][mb2]; + int ma = mb; + dudr_r = dulist_r[jju]; + dudr_i = dulist_i[jju]; for(int k = 0; k < 3; k++) sumzdu_r[k] += - (dudr_r[k] * jjjmambzarray_r + - dudr_i[k] * jjjmambzarray_i)*0.5; + (dudr_r[k] * zlist_r[jjz] + + dudr_i[k] * zlist_i[jjz])*0.5; + jjz++; + jju++; } // end if j2even for(int k = 0; k < 3; k++) @@ -889,75 +836,56 @@ void SNA::compute_dbidrj() } /* ---------------------------------------------------------------------- - copy Bi derivatives into a vector + calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ -void SNA::copy_dbi2dbvec() +void SNA::compute_duidrj(double* rij, double wj, double rcut) { - int ncount, j1, j2, j; + double rsq, r, x, y, z, z0, theta0, cs, sn; + double dz0dr; - ncount = 0; + x = rij[0]; + y = rij[1]; + z = rij[2]; + rsq = x * x + y * y + z * z; + r = sqrt(rsq); + double rscale0 = rfac0 * MY_PI / (rcut - rmin0); + theta0 = (r - rmin0) * rscale0; + cs = cos(theta0); + sn = sin(theta0); + z0 = r * cs / sn; + dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - for(j1 = 0; j1 <= twojmax; j1++) { - if(diagonalstyle == 0) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } - } else if(diagonalstyle == 1) { - j2 = j1; - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) { - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } - } else if(diagonalstyle == 2) { - j = j2 = j1; - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } else if(diagonalstyle == 3) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) { - dbvec[ncount][0] = dbarray[j1][j2][j][0]; - dbvec[ncount][1] = dbarray[j1][j2][j][1]; - dbvec[ncount][2] = dbarray[j1][j2][j][2]; - ncount++; - } - } - } + compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); } /* ---------------------------------------------------------------------- */ void SNA::zero_uarraytot() { - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - uarraytot_r[j][ma][mb] = 0.0; - uarraytot_i[j][ma][mb] = 0.0; + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + for (int mb = 0; mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + ulisttot_r[jju] = 0.0; + ulisttot_i[jju] = 0.0; + jju++; } + } } /* ---------------------------------------------------------------------- */ void SNA::addself_uarraytot(double wself_in) { - for (int j = 0; j <= twojmax; j++) + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; for (int ma = 0; ma <= j; ma++) { - uarraytot_r[j][ma][ma] = wself_in; - uarraytot_i[j][ma][ma] = 0.0; + ulisttot_r[jju] = wself_in; + ulisttot_i[jju] = 0.0; + jju += j+2; } + } } /* ---------------------------------------------------------------------- @@ -972,14 +900,17 @@ void SNA::add_uarraytot(double r, double wj, double rcut) sfac *= wj; - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - uarraytot_r[j][ma][mb] += - sfac * uarray_r[j][ma][mb]; - uarraytot_i[j][ma][mb] += - sfac * uarray_i[j][ma][mb]; + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + for (int mb = 0; mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + ulisttot_r[jju] += + sfac * ulist_r[jju]; + ulisttot_i[jju] += + sfac * ulist_i[jju]; + jju++; } + } } /* ---------------------------------------------------------------------- @@ -1003,63 +934,72 @@ void SNA::compute_uarray(double x, double y, double z, // VMK Section 4.8.2 - uarray_r[0][0][0] = 1.0; - uarray_i[0][0][0] = 0.0; + ulist_r[0] = 1.0; + ulist_i[0] = 0.0; for (int j = 1; j <= twojmax; j++) { + int jju = idxu_block[j]; + int jjup = idxu_block[j-1]; // fill in left side of matrix layer from previous layer for (int mb = 0; 2*mb <= j; mb++) { - uarray_r[j][0][mb] = 0.0; - uarray_i[j][0][mb] = 0.0; + ulist_r[jju] = 0.0; + ulist_i[jju] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; - uarray_r[j][ma][mb] += + ulist_r[jju] += rootpq * - (a_r * uarray_r[j - 1][ma][mb] + - a_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma][mb] += + (a_r * ulist_r[jjup] + + a_i * ulist_i[jjup]); + ulist_i[jju] += rootpq * - (a_r * uarray_i[j - 1][ma][mb] - - a_i * uarray_r[j - 1][ma][mb]); + (a_r * ulist_i[jjup] - + a_i * ulist_r[jjup]); rootpq = rootpqarray[ma + 1][j - mb]; - uarray_r[j][ma + 1][mb] = + ulist_r[jju+1] = -rootpq * - (b_r * uarray_r[j - 1][ma][mb] + - b_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma + 1][mb] = + (b_r * ulist_r[jjup] + + b_i * ulist_i[jjup]); + ulist_i[jju+1] = -rootpq * - (b_r * uarray_i[j - 1][ma][mb] - - b_i * uarray_r[j - 1][ma][mb]); + (b_r * ulist_i[jjup] - + b_i * ulist_r[jjup]); + jju++; + jjup++; } + jju++; } // copy left side to right side with inversion symmetry VMK 4.4(2) // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) - int mbpar = -1; + jju = idxu_block[j]; + jjup = jju+(j+1)*(j+1)-1; + int mbpar = 1; for (int mb = 0; 2*mb <= j; mb++) { - mbpar = -mbpar; - int mapar = -mbpar; + int mapar = mbpar; for (int ma = 0; ma <= j; ma++) { - mapar = -mapar; if (mapar == 1) { - uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb]; + ulist_r[jjup] = ulist_r[jju]; + ulist_i[jjup] = -ulist_i[jju]; } else { - uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb]; + ulist_r[jjup] = -ulist_r[jju]; + ulist_i[jjup] = ulist_i[jju]; } + mapar = -mapar; + jju++; + jjup--; } + mbpar = -mbpar; } } } /* ---------------------------------------------------------------------- - compute derivatives of Wigner U-functions for one neighbor + Compute derivatives of Wigner U-functions for one neighbor see comments in compute_uarray() ------------------------------------------------------------------------- */ @@ -1109,93 +1049,105 @@ void SNA::compute_duarray(double x, double y, double z, db_i[0] += -r0inv; db_r[1] += r0inv; - uarray_r[0][0][0] = 1.0; - duarray_r[0][0][0][0] = 0.0; - duarray_r[0][0][0][1] = 0.0; - duarray_r[0][0][0][2] = 0.0; - uarray_i[0][0][0] = 0.0; - duarray_i[0][0][0][0] = 0.0; - duarray_i[0][0][0][1] = 0.0; - duarray_i[0][0][0][2] = 0.0; + ulist_r[0] = 1.0; + dulist_r[0][0] = 0.0; + dulist_r[0][1] = 0.0; + dulist_r[0][2] = 0.0; + ulist_i[0] = 0.0; + dulist_i[0][0] = 0.0; + dulist_i[0][1] = 0.0; + dulist_i[0][2] = 0.0; for (int j = 1; j <= twojmax; j++) { + int jju = idxu_block[j]; + int jjup = idxu_block[j-1]; for (int mb = 0; 2*mb <= j; mb++) { - uarray_r[j][0][mb] = 0.0; - duarray_r[j][0][mb][0] = 0.0; - duarray_r[j][0][mb][1] = 0.0; - duarray_r[j][0][mb][2] = 0.0; - uarray_i[j][0][mb] = 0.0; - duarray_i[j][0][mb][0] = 0.0; - duarray_i[j][0][mb][1] = 0.0; - duarray_i[j][0][mb][2] = 0.0; + ulist_r[jju] = 0.0; + dulist_r[jju][0] = 0.0; + dulist_r[jju][1] = 0.0; + dulist_r[jju][2] = 0.0; + ulist_i[jju] = 0.0; + dulist_i[jju][0] = 0.0; + dulist_i[jju][1] = 0.0; + dulist_i[jju][2] = 0.0; for (int ma = 0; ma < j; ma++) { rootpq = rootpqarray[j - ma][j - mb]; - uarray_r[j][ma][mb] += rootpq * - (a_r * uarray_r[j - 1][ma][mb] + - a_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma][mb] += rootpq * - (a_r * uarray_i[j - 1][ma][mb] - - a_i * uarray_r[j - 1][ma][mb]); + ulist_r[jju] += rootpq * + (a_r * ulist_r[jjup] + + a_i * ulist_i[jjup]); + ulist_i[jju] += rootpq * + (a_r * ulist_i[jjup] - + a_i * ulist_r[jjup]); for (int k = 0; k < 3; k++) { - duarray_r[j][ma][mb][k] += - rootpq * (da_r[k] * uarray_r[j - 1][ma][mb] + - da_i[k] * uarray_i[j - 1][ma][mb] + - a_r * duarray_r[j - 1][ma][mb][k] + - a_i * duarray_i[j - 1][ma][mb][k]); - duarray_i[j][ma][mb][k] += - rootpq * (da_r[k] * uarray_i[j - 1][ma][mb] - - da_i[k] * uarray_r[j - 1][ma][mb] + - a_r * duarray_i[j - 1][ma][mb][k] - - a_i * duarray_r[j - 1][ma][mb][k]); + dulist_r[jju][k] += + rootpq * (da_r[k] * ulist_r[jjup] + + da_i[k] * ulist_i[jjup] + + a_r * dulist_r[jjup][k] + + a_i * dulist_i[jjup][k]); + dulist_i[jju][k] += + rootpq * (da_r[k] * ulist_i[jjup] - + da_i[k] * ulist_r[jjup] + + a_r * dulist_i[jjup][k] - + a_i * dulist_r[jjup][k]); } rootpq = rootpqarray[ma + 1][j - mb]; - uarray_r[j][ma + 1][mb] = - -rootpq * (b_r * uarray_r[j - 1][ma][mb] + - b_i * uarray_i[j - 1][ma][mb]); - uarray_i[j][ma + 1][mb] = - -rootpq * (b_r * uarray_i[j - 1][ma][mb] - - b_i * uarray_r[j - 1][ma][mb]); + ulist_r[jju+1] = + -rootpq * (b_r * ulist_r[jjup] + + b_i * ulist_i[jjup]); + ulist_i[jju+1] = + -rootpq * (b_r * ulist_i[jjup] - + b_i * ulist_r[jjup]); for (int k = 0; k < 3; k++) { - duarray_r[j][ma + 1][mb][k] = - -rootpq * (db_r[k] * uarray_r[j - 1][ma][mb] + - db_i[k] * uarray_i[j - 1][ma][mb] + - b_r * duarray_r[j - 1][ma][mb][k] + - b_i * duarray_i[j - 1][ma][mb][k]); - duarray_i[j][ma + 1][mb][k] = - -rootpq * (db_r[k] * uarray_i[j - 1][ma][mb] - - db_i[k] * uarray_r[j - 1][ma][mb] + - b_r * duarray_i[j - 1][ma][mb][k] - - b_i * duarray_r[j - 1][ma][mb][k]); + dulist_r[jju+1][k] = + -rootpq * (db_r[k] * ulist_r[jjup] + + db_i[k] * ulist_i[jjup] + + b_r * dulist_r[jjup][k] + + b_i * dulist_i[jjup][k]); + dulist_i[jju+1][k] = + -rootpq * (db_r[k] * ulist_i[jjup] - + db_i[k] * ulist_r[jjup] + + b_r * dulist_i[jjup][k] - + b_i * dulist_r[jjup][k]); } + jju++; + jjup++; } + jju++; } - int mbpar = -1; + // copy left side to right side with inversion symmetry VMK 4.4(2) + // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb]) + + jju = idxu_block[j]; + jjup = jju+(j+1)*(j+1)-1; + int mbpar = 1; for (int mb = 0; 2*mb <= j; mb++) { - mbpar = -mbpar; - int mapar = -mbpar; + int mapar = mbpar; for (int ma = 0; ma <= j; ma++) { - mapar = -mapar; if (mapar == 1) { - uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb]; + ulist_r[jjup] = ulist_r[jju]; + ulist_i[jjup] = -ulist_i[jju]; for (int k = 0; k < 3; k++) { - duarray_r[j][j-ma][j-mb][k] = duarray_r[j][ma][mb][k]; - duarray_i[j][j-ma][j-mb][k] = -duarray_i[j][ma][mb][k]; + dulist_r[jjup][k] = dulist_r[jju][k]; + dulist_i[jjup][k] = -dulist_i[jju][k]; } } else { - uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb]; - uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb]; + ulist_r[jjup] = -ulist_r[jju]; + ulist_i[jjup] = ulist_i[jju]; for (int k = 0; k < 3; k++) { - duarray_r[j][j-ma][j-mb][k] = -duarray_r[j][ma][mb][k]; - duarray_i[j][j-ma][j-mb][k] = duarray_i[j][ma][mb][k]; + dulist_r[jjup][k] = -dulist_r[jju][k]; + dulist_i[jjup][k] = dulist_i[jju][k]; } } + mapar = -mapar; + jju++; + jjup--; } + mbpar = -mbpar; } } @@ -1204,23 +1156,25 @@ void SNA::compute_duarray(double x, double y, double z, sfac *= wj; dsfac *= wj; - - for (int j = 0; j <= twojmax; j++) - for (int ma = 0; ma <= j; ma++) - for (int mb = 0; mb <= j; mb++) { - duarray_r[j][ma][mb][0] = dsfac * uarray_r[j][ma][mb] * ux + - sfac * duarray_r[j][ma][mb][0]; - duarray_i[j][ma][mb][0] = dsfac * uarray_i[j][ma][mb] * ux + - sfac * duarray_i[j][ma][mb][0]; - duarray_r[j][ma][mb][1] = dsfac * uarray_r[j][ma][mb] * uy + - sfac * duarray_r[j][ma][mb][1]; - duarray_i[j][ma][mb][1] = dsfac * uarray_i[j][ma][mb] * uy + - sfac * duarray_i[j][ma][mb][1]; - duarray_r[j][ma][mb][2] = dsfac * uarray_r[j][ma][mb] * uz + - sfac * duarray_r[j][ma][mb][2]; - duarray_i[j][ma][mb][2] = dsfac * uarray_i[j][ma][mb] * uz + - sfac * duarray_i[j][ma][mb][2]; + for (int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + dulist_r[jju][0] = dsfac * ulist_r[jju] * ux + + sfac * dulist_r[jju][0]; + dulist_i[jju][0] = dsfac * ulist_i[jju] * ux + + sfac * dulist_i[jju][0]; + dulist_r[jju][1] = dsfac * ulist_r[jju] * uy + + sfac * dulist_r[jju][1]; + dulist_i[jju][1] = dsfac * ulist_i[jju] * uy + + sfac * dulist_i[jju][1]; + dulist_r[jju][2] = dsfac * ulist_r[jju] * uz + + sfac * dulist_r[jju][2]; + dulist_i[jju][2] = dsfac * ulist_i[jju] * uz + + sfac * dulist_i[jju][2]; + jju++; } + } } /* ---------------------------------------------------------------------- @@ -1229,89 +1183,89 @@ void SNA::compute_duarray(double x, double y, double z, double SNA::memory_usage() { + int jdimpq = twojmax + 2; int jdim = twojmax + 1; double bytes; - bytes = jdim * jdim * jdim * jdim * jdim * sizeof(double); - bytes += 2 * jdim * jdim * jdim * sizeof(complex); - bytes += 2 * jdim * jdim * jdim * sizeof(double); - bytes += jdim * jdim * jdim * 3 * sizeof(complex); - bytes += jdim * jdim * jdim * 3 * sizeof(double); - bytes += ncoeff * sizeof(double); - bytes += jdim * jdim * jdim * jdim * jdim * sizeof(complex); + bytes = ncoeff * sizeof(double); // coeff + + bytes += jdimpq*jdimpq * sizeof(double); // pqarray + bytes += idxcg_max * sizeof(double); // cglist + bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block + + bytes += idxu_max * sizeof(double) * 2; // ulist + bytes += idxu_max * sizeof(double) * 2; // ulisttot + bytes += idxu_max * 3 * sizeof(double) * 2; // dulist + bytes += jdim * sizeof(int); // idxu_block + + bytes += idxz_max * 9 * sizeof(int); // idxz + bytes += idxz_max * sizeof(double) * 2; // zlist + bytes += jdim * jdim * jdim * sizeof(int); // idxz_block + + bytes += idxu_max * sizeof(double) * 2; // ylist + bytes += idxb_max * 3 * sizeof(int); // idxb + + bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + return bytes; } - /* ---------------------------------------------------------------------- */ void SNA::create_twojmax_arrays() { - int jdim = twojmax + 1; - - memory->create(cgarray, jdim, jdim, jdim, jdim, jdim, - "sna:cgarray"); - memory->create(rootpqarray, jdim+1, jdim+1, + int jdimpq = twojmax + 2; + memory->create(rootpqarray, jdimpq, jdimpq, "sna:rootpqarray"); - memory->create(barray, jdim, jdim, jdim, - "sna:barray"); - memory->create(dbarray, jdim, jdim, jdim, 3, - "sna:dbarray"); - - memory->create(duarray_r, jdim, jdim, jdim, 3, - "sna:duarray"); - memory->create(duarray_i, jdim, jdim, jdim, 3, - "sna:duarray"); - - memory->create(uarray_r, jdim, jdim, jdim, - "sna:uarray"); - memory->create(uarray_i, jdim, jdim, jdim, - "sna:uarray"); + memory->create(cglist, idxcg_max, "sna:cglist"); + memory->create(ulist_r, idxu_max, "sna:ulist"); + memory->create(ulist_i, idxu_max, "sna:ulist"); + memory->create(ulisttot_r, idxu_max, "sna:ulisttot"); + memory->create(ulisttot_i, idxu_max, "sna:ulisttot"); + memory->create(dulist_r, idxu_max, 3, "sna:dulist"); + memory->create(dulist_i, idxu_max, 3, "sna:dulist"); + memory->create(zlist_r, idxz_max, "sna:zlist"); + memory->create(zlist_i, idxz_max, "sna:zlist"); + memory->create(blist, idxb_max, "sna:blist"); + memory->create(dblist, idxb_max, 3, "sna:dblist"); + memory->create(ylist_r, idxu_max, "sna:ylist"); + memory->create(ylist_i, idxu_max, "sna:ylist"); if (bzero_flag) - memory->create(bzero, jdim,"sna:bzero"); + memory->create(bzero, twojmax+1,"sna:bzero"); else bzero = NULL; - - memory->create(uarraytot_r, jdim, jdim, jdim, - "sna:uarraytot"); - memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim, - "sna:zarray"); - memory->create(uarraytot_i, jdim, jdim, jdim, - "sna:uarraytot"); - memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim, - "sna:zarray"); - memory->create(yarray_r, jdim, jdim, jdim, - "sna:yarray"); - memory->create(yarray_i, jdim, jdim, jdim, - "sna:yarray"); - } /* ---------------------------------------------------------------------- */ void SNA::destroy_twojmax_arrays() { - memory->destroy(cgarray); memory->destroy(rootpqarray); - memory->destroy(barray); + memory->destroy(cglist); + memory->destroy(idxcg_block); - memory->destroy(dbarray); + memory->destroy(ulist_r); + memory->destroy(ulist_i); + memory->destroy(ulisttot_r); + memory->destroy(ulisttot_i); + memory->destroy(dulist_r); + memory->destroy(dulist_i); + memory->destroy(idxu_block); - memory->destroy(duarray_r); - memory->destroy(duarray_i); + memory->destroy(zlist_r); + memory->destroy(zlist_i); + memory->destroy(blist); + memory->destroy(dblist); + memory->destroy(idxz_block); - memory->destroy(uarray_r); - memory->destroy(uarray_i); + memory->destroy(ylist_r); + memory->destroy(ylist_i); + + memory->destroy(idxb_block); if (bzero_flag) memory->destroy(bzero); - memory->destroy(uarraytot_r); - memory->destroy(zarray_r); - memory->destroy(uarraytot_i); - memory->destroy(zarray_i); - memory->destroy(yarray_r); - memory->destroy(yarray_i); } /* ---------------------------------------------------------------------- @@ -1527,28 +1481,33 @@ void SNA::init_clebsch_gordan() int m, aa2, bb2, cc2; int ifac; - for (int j1 = 0; j1 <= twojmax; j1++) - for (int j2 = 0; j2 <= twojmax; j2++) - for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - for (int m1 = 0; m1 <= j1; m1 += 1) { + int idxcg_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + for (int m1 = 0; m1 <= j1; m1++) { aa2 = 2 * m1 - j1; - for (int m2 = 0; m2 <= j2; m2 += 1) { + for (int m2 = 0; m2 <= j2; m2++) { // -c <= cc <= c bb2 = 2 * m2 - j2; m = (aa2 + bb2 + j) / 2; - if(m < 0 || m > j) continue; + if(m < 0 || m > j) { + cglist[idxcg_count] = 0.0; + idxcg_count++; + continue; + } sum = 0.0; for (int z = MAX(0, MAX(-(j - j2 + aa2) - / 2, -(j - j1 - bb2) / 2)); - z <= MIN((j1 + j2 - j) / 2, - MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); - z++) { + / 2, -(j - j1 - bb2) / 2)); + z <= MIN((j1 + j2 - j) / 2, + MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); + z++) { ifac = z % 2 ? -1 : 1; sum += ifac / (factorial(z) * @@ -1558,20 +1517,22 @@ void SNA::init_clebsch_gordan() factorial((j - j2 + aa2) / 2 + z) * factorial((j - j1 - bb2) / 2 + z)); } - + cc2 = 2 * m - j; dcg = deltacg(j1, j2, j); sfaccg = sqrt(factorial((j1 + aa2) / 2) * - factorial((j1 - aa2) / 2) * - factorial((j2 + bb2) / 2) * - factorial((j2 - bb2) / 2) * - factorial((j + cc2) / 2) * - factorial((j - cc2) / 2) * - (j + 1)); - - cgarray[j1][j2][j][m1][m2] = sum * dcg * sfaccg; + factorial((j1 - aa2) / 2) * + factorial((j2 + bb2) / 2) * + factorial((j2 - bb2) / 2) * + factorial((j + cc2) / 2) * + factorial((j - cc2) / 2) * + (j + 1)); + + cglist[idxcg_count] = sum * dcg * sfaccg; + idxcg_count++; } } + } } /* ---------------------------------------------------------------------- @@ -1586,74 +1547,6 @@ void SNA::init_rootpqarray() rootpqarray[p][q] = sqrt(static_cast(p)/q); } -/* ---------------------------------------------------------------------- - a = j/2 -------------------------------------------------------------------------- */ - -void SNA::jtostr(char* str, int j) -{ - if(j % 2 == 0) - sprintf(str, "%d", j / 2); - else - sprintf(str, "%d/2", j); -} - -/* ---------------------------------------------------------------------- - aa = m - j/2 -------------------------------------------------------------------------- */ - -void SNA::mtostr(char* str, int j, int m) -{ - if(j % 2 == 0) - sprintf(str, "%d", m - j / 2); - else - sprintf(str, "%d/2", 2 * m - j); -} - -/* ---------------------------------------------------------------------- - list values of Clebsch-Gordan coefficients - using notation of VMK Table 8.11 -------------------------------------------------------------------------- */ - -void SNA::print_clebsch_gordan(FILE* file) -{ - char stra[20], strb[20], strc[20], straa[20], strbb[20], strcc[20]; - int m, aa2, bb2; - - fprintf(file, "a, aa, b, bb, c, cc, c(a,aa,b,bb,c,cc) \n"); - - for (int j1 = 0; j1 <= twojmax; j1++) { - jtostr(stra, j1); - - for (int j2 = 0; j2 <= twojmax; j2++) { - jtostr(strb, j2); - - for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - jtostr(strc, j); - - for (int m1 = 0; m1 <= j1; m1 += 1) { - mtostr(straa, j1, m1); - aa2 = 2 * m1 - j1; - - for (int m2 = 0; m2 <= j2; m2 += 1) { - bb2 = 2 * m2 - j2; - m = (aa2 + bb2 + j) / 2; - - if(m < 0 || m > j) continue; - - mtostr(strbb, j2, m2); - mtostr(strcc, j, m); - - fprintf(file, "%s\t%s\t%s\t%s\t%s\t%s\t%g\n", - stra, straa, strb, strbb, strc, strcc, - cgarray[j1][j2][j][m1][m2]); - } - } - } - } - } -} - /* ---------------------------------------------------------------------- */ int SNA::compute_ncoeff() @@ -1663,25 +1556,10 @@ int SNA::compute_ncoeff() ncount = 0; for (int j1 = 0; j1 <= twojmax; j1++) - if(diagonalstyle == 0) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 1) { - int j2 = j1; - + for (int j2 = 0; j2 <= j1; j2++) for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 2) { - ncount++; - } else if(diagonalstyle == 3) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) ncount++; - } + j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) ncount++; return ncount; } diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h index b93b0ac7b0..b54ad3482a 100644 --- a/src/SNAP/sna.h +++ b/src/SNAP/sna.h @@ -26,7 +26,6 @@ namespace LAMMPS_NS { struct SNA_ZINDICES { int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju; - double betaj; }; struct SNA_BINDICES { @@ -51,19 +50,20 @@ public: void compute_ui(int); void compute_zi(); void compute_yi(const double*); + void compute_yterm(int, int, int, const double*); void compute_bi(); - void copy_bi2bvec(); // functions for derivatives void compute_duidrj(double*, double, double); void compute_dbidrj(); void compute_deidrj(double*); - void copy_dbi2dbvec(); double compute_sfac(double, double); double compute_dsfac(double, double); double* bvec, ** dbvec; + double* blist; + double** dblist; double** rij; int* inside; double* wj; @@ -73,31 +73,17 @@ public: void grow_rij(int); int twojmax, diagonalstyle; - double*** uarraytot_r, *** uarraytot_i; - double***** zarray_r, ***** zarray_i; - double*** yarray_r, *** yarray_i; - double*** uarray_r, *** uarray_i; private: double rmin0, rfac0; - // use indexlist instead of loops, constructor generates these + // data for bispectrum coefficients SNA_ZINDICES* idxz; SNA_BINDICES* idxb; int idxcg_max, idxu_max, idxz_max, idxb_max; - // data for bispectrum coefficients - - double***** cgarray; double** rootpqarray; - double*** barray; - - // derivatives of data - - double**** duarray_r, **** duarray_i; - double**** dbarray; - double* cglist; int*** idxcg_block; @@ -121,9 +107,6 @@ private: void destroy_twojmax_arrays(); void init_clebsch_gordan(); void init_rootpqarray(); - void jtostr(char*, int); - void mtostr(char*, int, int); - void print_clebsch_gordan(FILE*); void zero_uarraytot(); void addself_uarraytot(double); void add_uarraytot(double, double, double); From a973700295d264b09503728f805d451c054470da Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Wed, 12 Jun 2019 16:42:28 -0600 Subject: [PATCH 13/21] Completed back-porting of Zombie SNAP improvements, particularly noteworthy is reduction in memory footprint, elimination of most multidimensional arrays, elimination of diagonal_style, elimination of Z array in force calculation. --- doc/src/compute_sna_atom.txt | 39 ++++++--------- doc/src/pair_snap.txt | 8 +-- potentials/Ta06A.snapparam | 1 - potentials/W_2940_2017_2.snapparam | 1 - src/SNAP/compute_sna_atom.cpp | 19 ++----- src/SNAP/compute_sna_atom.h | 2 +- src/SNAP/compute_snad_atom.cpp | 21 +++----- src/SNAP/compute_snad_atom.h | 2 +- src/SNAP/compute_snav_atom.cpp | 21 ++------ src/SNAP/compute_snav_atom.h | 2 +- src/SNAP/pair_snap.cpp | 75 +++++++++++++++++----------- src/SNAP/pair_snap.h | 4 +- src/SNAP/sna.cpp | 80 +++++++++++++++--------------- src/SNAP/sna.h | 9 ++-- 14 files changed, 130 insertions(+), 154 deletions(-) diff --git a/doc/src/compute_sna_atom.txt b/doc/src/compute_sna_atom.txt index efbf2e9ea3..10e68f5698 100644 --- a/doc/src/compute_sna_atom.txt +++ b/doc/src/compute_sna_atom.txt @@ -24,12 +24,8 @@ twojmax = band limit for bispectrum components (non-negative integer) :l R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l w_1, w_2,... = list of neighbor weights, one for each type :l zero or more keyword/value pairs may be appended :l -keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l - {diagonal} value = {0} or {1} or {2} or {3} - {0} = all j1, j2, j <= twojmax, j2 <= j1 - {1} = subset satisfying j1 == j2 - {2} = subset satisfying j1 == j2 == j3 - {3} = subset satisfying j2 <= j1 <= j +keyword = {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l +// {3} = subset satisfying j2 <= j1 <= j {rmin0} value = parameter in distance to angle conversion (distance units) {switchflag} value = {0} or {1} {0} = do not use switching function @@ -44,7 +40,7 @@ keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag [Examples:] -compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 diagonal 3 rmin0 0.0 +compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 rmin0 0.0 compute db all sna/atom 1.4 0.95 6 2.0 1.0 compute vb all sna/atom 1.4 0.95 6 2.0 1.0 :pre @@ -151,7 +147,7 @@ The argument {rfac0} and the optional keyword {rmin0} define the linear mapping from radial distance to polar angle {theta0} on the 3-sphere. -The argument {twojmax} and the keyword {diagonal} define which +The argument {twojmax} defines which bispectrum components are generated. See section below on output for a detailed explanation of the number of bispectrum components and the ordered in which they are listed. @@ -192,25 +188,20 @@ command that includes all pairs in the neighbor list. Compute {sna/atom} calculates a per-atom array, each column corresponding to a particular bispectrum component. The total number of columns and the identity of the bispectrum component contained in -each column depend on the values of {twojmax} and {diagonal}, as +each column depend of the value of {twojmax}, as described by the following piece of python code: for j1 in range(0,twojmax+1): - if(diagonal==2): - print j1/2.,j1/2.,j1/2. - elif(diagonal==1): - for j in range(0,min(twojmax,2*j1)+1,2): - print j1/2.,j1/2.,j/2. - elif(diagonal==0): - for j2 in range(0,j1+1): - for j in range(j1-j2,min(twojmax,j1+j2)+1,2): - print j1/2.,j2/2.,j/2. - elif(diagonal==3): - for j2 in range(0,j1+1): - for j in range(j1-j2,min(twojmax,j1+j2)+1,2): - if (j>=j1): print j1/2.,j2/2.,j/2. :pre + for j2 in range(0,j1+1): + for j in range(j1-j2,min(twojmax,j1+j2)+1,2): + if (j>=j1): print j1/2.,j2/2.,j/2. :pre -Compute {snad/atom} evaluates a per-atom array. The columns are +NOTE: the {diagonal} keyword allowing other possible choices +for the number of bispectrum components was removed in 2019, +since all potentials use the value of 3, corresponding to the +above set of bispectrum components. + +ompute {snad/atom} evaluates a per-atom array. The columns are arranged into {ntypes} blocks, listed in order of atom type {I}. Each block contains three sub-blocks corresponding to the {x}, {y}, and {z} components of the atom position. Each of these sub-blocks contains @@ -259,7 +250,7 @@ package"_Build_package.html doc page for more info. [Default:] -The optional keyword defaults are {diagonal} = 0, {rmin0} = 0, +The optional keyword defaults are {rmin0} = 0, {switchflag} = 1, {bzeroflag} = 1, {quadraticflag} = 0, :line diff --git a/doc/src/pair_snap.txt b/doc/src/pair_snap.txt index a796cfdeba..1fba74a188 100644 --- a/doc/src/pair_snap.txt +++ b/doc/src/pair_snap.txt @@ -38,7 +38,7 @@ where {B_k^i} is the {k}-th bispectrum component of atom {i}, and {beta_k^alpha_i} is the corresponding linear coefficient that depends on {alpha_i}, the SNAP element of atom {i}. The number of bispectrum components used and their definitions -depend on the values of {twojmax} and {diagonalstyle} +depend on the value of {twojmax} defined in the SNAP parameter file described below. The bispectrum calculation is described in more detail in "compute sna/atom"_compute_sna_atom.html. @@ -125,14 +125,13 @@ This line is followed by {ncoeff} coefficients, one per line. The SNAP parameter file can contain blank and comment lines (start with #) anywhere. Each non-blank non-comment line must contain one keyword/value pair. The required keywords are {rcutfac} and -{twojmax}. Optional keywords are {rfac0}, {rmin0}, {diagonalstyle}, +{twojmax}. Optional keywords are {rfac0}, {rmin0}, {switchflag}, and {bzeroflag}. The default values for these keywords are {rfac0} = 0.99363 {rmin0} = 0.0 -{diagonalstyle} = 3 {switchflag} = 0 {bzeroflag} = 1 {quadraticflag} = 1 :ul @@ -144,6 +143,9 @@ If {quadraticflag} is set to 1, then the SNAP energy expression includes the qua The SNAP element file should contain {K}({K}+1)/2 additional coefficients for each element, the upper-triangular elements of alpha. +NOTE: The previously used {diagonalstyle} keyword was removed in 2019, +since all known SNAP potentials use the default value of 3. + :line [Mixing, shift, table, tail correction, restart, rRESPA info]: diff --git a/potentials/Ta06A.snapparam b/potentials/Ta06A.snapparam index 283629d658..629d96d708 100644 --- a/potentials/Ta06A.snapparam +++ b/potentials/Ta06A.snapparam @@ -10,6 +10,5 @@ twojmax 6 rfac0 0.99363 rmin0 0 -diagonalstyle 3 bzeroflag 0 quadraticflag 0 diff --git a/potentials/W_2940_2017_2.snapparam b/potentials/W_2940_2017_2.snapparam index 27ab61a266..49f3094d08 100644 --- a/potentials/W_2940_2017_2.snapparam +++ b/potentials/W_2940_2017_2.snapparam @@ -8,6 +8,5 @@ twojmax 8 rfac0 0.99363 rmin0 0 -diagonalstyle 3 bzeroflag 0 quadraticflag 0 diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp index fea37faca0..cc7a84281e 100644 --- a/src/SNAP/compute_sna_atom.cpp +++ b/src/SNAP/compute_sna_atom.cpp @@ -44,7 +44,6 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : // default values - diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; bzeroflag = 1; @@ -84,14 +83,7 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : int iarg = nargmin; while (iarg < narg) { - if (strcmp(arg[iarg],"diagonal") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute sna/atom command"); - diagonalstyle = atoi(arg[iarg+1]); - if (diagonalstyle < 0 || diagonalstyle > 3) - error->all(FLERR,"Illegal compute sna/atom command"); - iarg += 2; - } else if (strcmp(arg[iarg],"rmin0") == 0) { + if (strcmp(arg[iarg],"rmin0") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute sna/atom command"); rmin0 = atof(arg[iarg+1]); @@ -114,7 +106,7 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute sna/atom command"); } - snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle, + snaptr = new SNA(lmp,rfac0,twojmax, rmin0,switchflag,bzeroflag); ncoeff = snaptr->ncoeff; @@ -123,7 +115,6 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) : peratom_flag = 1; nmax = 0; - njmax = 0; sna = NULL; } @@ -277,9 +268,9 @@ void ComputeSNAAtom::compute_peratom() double ComputeSNAAtom::memory_usage() { - double bytes = nmax*size_peratom_cols * sizeof(double); - bytes += 3*njmax*sizeof(double); - bytes += njmax*sizeof(int); + double bytes = nmax*size_peratom_cols * sizeof(double); // sna + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/compute_sna_atom.h b/src/SNAP/compute_sna_atom.h index 56ffccfa7e..105a62a37a 100644 --- a/src/SNAP/compute_sna_atom.h +++ b/src/SNAP/compute_sna_atom.h @@ -34,7 +34,7 @@ class ComputeSNAAtom : public Compute { double memory_usage(); private: - int nmax, njmax, diagonalstyle; + int nmax; int ncoeff; double **cutsq; class NeighList *list; diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp index 156380eccc..37587a0aae 100644 --- a/src/SNAP/compute_snad_atom.cpp +++ b/src/SNAP/compute_snad_atom.cpp @@ -44,7 +44,6 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : // default values - diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; bzeroflag = 1; @@ -82,14 +81,7 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : int iarg = nargmin; while (iarg < narg) { - if (strcmp(arg[iarg],"diagonal") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute snad/atom command"); - diagonalstyle = atof(arg[iarg+1]); - if (diagonalstyle < 0 || diagonalstyle > 3) - error->all(FLERR,"Illegal compute snad/atom command"); - iarg += 2; - } else if (strcmp(arg[iarg],"rmin0") == 0) { + if (strcmp(arg[iarg],"rmin0") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute snad/atom command"); rmin0 = atof(arg[iarg+1]); @@ -112,7 +104,7 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute snad/atom command"); } - snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle, + snaptr = new SNA(lmp,rfac0,twojmax, rmin0,switchflag,bzeroflag); ncoeff = snaptr->ncoeff; @@ -125,7 +117,6 @@ ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) : peratom_flag = 1; nmax = 0; - njmax = 0; snad = NULL; } @@ -378,9 +369,9 @@ void ComputeSNADAtom::unpack_reverse_comm(int n, int *list, double *buf) double ComputeSNADAtom::memory_usage() { - double bytes = nmax*size_peratom_cols * sizeof(double); - bytes += 3*njmax*sizeof(double); - bytes += njmax*sizeof(int); - bytes += 3*nperdim*atom->ntypes; + + double bytes = nmax*size_peratom_cols * sizeof(double); // snad + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/compute_snad_atom.h b/src/SNAP/compute_snad_atom.h index 1fcf540d7c..ac353d8553 100644 --- a/src/SNAP/compute_snad_atom.h +++ b/src/SNAP/compute_snad_atom.h @@ -36,7 +36,7 @@ class ComputeSNADAtom : public Compute { double memory_usage(); private: - int nmax, njmax, diagonalstyle; + int nmax; int ncoeff, nperdim, yoffset, zoffset; double **cutsq; class NeighList *list; diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp index 6caff0820c..1f702496ed 100644 --- a/src/SNAP/compute_snav_atom.cpp +++ b/src/SNAP/compute_snav_atom.cpp @@ -44,7 +44,6 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : // default values - diagonalstyle = 0; rmin0 = 0.0; switchflag = 1; bzeroflag = 1; @@ -78,14 +77,7 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : int iarg = nargmin; while (iarg < narg) { - if (strcmp(arg[iarg],"diagonal") == 0) { - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute snav/atom command"); - diagonalstyle = atof(arg[iarg+1]); - if (diagonalstyle < 0 || diagonalstyle > 3) - error->all(FLERR,"Illegal compute snav/atom command"); - iarg += 2; - } else if (strcmp(arg[iarg],"rmin0") == 0) { + if (strcmp(arg[iarg],"rmin0") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute snav/atom command"); rmin0 = atof(arg[iarg+1]); @@ -108,7 +100,7 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Illegal compute snav/atom command"); } - snaptr = new SNA(lmp,rfac0,twojmax,diagonalstyle, + snaptr = new SNA(lmp,rfac0,twojmax, rmin0,switchflag,bzeroflag); ncoeff = snaptr->ncoeff; @@ -119,7 +111,6 @@ ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) : peratom_flag = 1; nmax = 0; - njmax = 0; snav = NULL; } @@ -389,10 +380,8 @@ void ComputeSNAVAtom::unpack_reverse_comm(int n, int *list, double *buf) double ComputeSNAVAtom::memory_usage() { - double bytes = nmax*size_peratom_cols * sizeof(double); - bytes += 3*njmax*sizeof(double); - bytes += njmax*sizeof(int); - bytes += 6*nperdim*atom->ntypes; - if (quadraticflag) bytes += 6*nperdim*atom->ntypes; + double bytes = nmax*size_peratom_cols * sizeof(double); // snav + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/compute_snav_atom.h b/src/SNAP/compute_snav_atom.h index 6bcce346e0..9df17cc667 100644 --- a/src/SNAP/compute_snav_atom.h +++ b/src/SNAP/compute_snav_atom.h @@ -36,7 +36,7 @@ class ComputeSNAVAtom : public Compute { double memory_usage(); private: - int nmax, njmax, diagonalstyle; + int nmax; int ncoeff, nperdim; double **cutsq; class NeighList *list; diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 6a65f872fd..6eb05f85a4 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -34,10 +34,6 @@ using namespace LAMMPS_NS; #define MAXLINE 1024 #define MAXWORD 3 -// Outstanding issues with quadratic term -// 1. there seems to a problem with compute_optimized energy calc -// it does not match compute_regular, even when quadratic coeffs = 0 - /* ---------------------------------------------------------------------- */ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) @@ -53,8 +49,6 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) wjelem = NULL; coeffelem = NULL; - nmax = 0; - beta_max = 0; beta = NULL; bispectrum = NULL; @@ -74,6 +68,7 @@ PairSNAP::~PairSNAP() memory->destroy(wjelem); memory->destroy(coeffelem); } + memory->destroy(beta); memory->destroy(bispectrum); @@ -115,7 +110,8 @@ void PairSNAP::compute(int eflag, int vflag) // compute dE_i/dB_i = beta_i for all i in list - compute_bispectrum(); + if (quadraticflag || eflag) + compute_bispectrum(); compute_beta(); numneigh = list->numneigh; @@ -209,15 +205,25 @@ void PairSNAP::compute(int eflag, int vflag) evdwl = coeffi[0]; // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i // linear contributions - for (int k = 0; k < ncoeff; k++) - evdwl += beta[ii][k]*bispectrum[ii][k]; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + evdwl += coeffi[icoeff+1]*bispectrum[ii][icoeff]; + // quadratic contributions + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = bispectrum[ii][icoeff]; + evdwl += 0.5*coeffi[k++]*bveci*bveci; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + double bvecj = bispectrum[ii][jcoeff]; + evdwl += coeffi[k++]*bveci*bvecj; + } + } + } ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); } @@ -241,8 +247,23 @@ void PairSNAP::compute_beta() const int ielem = map[itype]; double* coeffi = coeffelem[ielem]; - for (int k = 1; k <= ncoeff; k++) - beta[ii][k-1] = coeffi[k]; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + beta[ii][icoeff] = coeffi[icoeff+1]; + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = bispectrum[ii][icoeff]; + beta[ii][icoeff] += coeffi[k]*bveci; + k++; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + double bvecj = bispectrum[ii][jcoeff]; + beta[ii][icoeff] += coeffi[k]*bvecj; + beta[ii][jcoeff] += coeffi[k]*bveci; + k++; + } + } + } } } @@ -308,8 +329,8 @@ void PairSNAP::compute_bispectrum() snaptr->compute_zi(); snaptr->compute_bi(); - for (int k = 0; k < ncoeff; k++) - bispectrum[ii][k] = snaptr->blist[k]; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + bispectrum[ii][icoeff] = snaptr->blist[icoeff]; } } @@ -354,8 +375,6 @@ void PairSNAP::coeff(int narg, char **arg) memory->destroy(wjelem); memory->destroy(coeffelem); } - memory->destroy(beta); - memory->destroy(bispectrum); char* type1 = arg[0]; char* type2 = arg[1]; @@ -425,9 +444,7 @@ void PairSNAP::coeff(int narg, char **arg) if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); snaptr = new SNA(lmp,rfac0,twojmax, - diagonalstyle, rmin0,switchflag,bzeroflag); - snaptr->grow_rij(nmax); if (ncoeff != snaptr->ncoeff) { if (comm->me == 0) @@ -617,7 +634,6 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) rfac0 = 0.99363; rmin0 = 0.0; - diagonalstyle = 3; switchflag = 1; bzeroflag = 1; quadraticflag = 0; @@ -678,8 +694,6 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) rfac0 = atof(keyval); else if (strcmp(keywd,"rmin0") == 0) rmin0 = atof(keyval); - else if (strcmp(keywd,"diagonalstyle") == 0) - diagonalstyle = atoi(keyval); else if (strcmp(keywd,"switchflag") == 0) switchflag = atoi(keyval); else if (strcmp(keywd,"bzeroflag") == 0) @@ -702,13 +716,16 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename) double PairSNAP::memory_usage() { double bytes = Pair::memory_usage(); + int n = atom->ntypes+1; - bytes += n*n*sizeof(int); - bytes += n*n*sizeof(double); - bytes += 3*nmax*sizeof(double); - bytes += nmax*sizeof(int); - bytes += (2*ncoeffall)*sizeof(double); - bytes += (ncoeff*3)*sizeof(double); + bytes += n*n*sizeof(int); // setflag + bytes += n*n*sizeof(double); // cutsq + bytes += n*sizeof(int); // map + bytes += beta_max*ncoeff*sizeof(double); // bispectrum + bytes += beta_max*ncoeff*sizeof(double); // beta + + bytes += snaptr->memory_usage(); // SNA object + return bytes; } diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h index b5871c1527..c64eaa5d4e 100644 --- a/src/SNAP/pair_snap.h +++ b/src/SNAP/pair_snap.h @@ -40,9 +40,7 @@ public: protected: int ncoeffq, ncoeffall; - double **bvec, ***dbvec; class SNA* snaptr; - int nmax; virtual void allocate(); void read_files(char *, char *); inline int equal(double* x,double* y); @@ -60,7 +58,7 @@ protected: double** beta; // betas for all atoms in list double** bispectrum; // bispectrum components for all atoms in list int *map; // mapping from atom types to elements - int twojmax, diagonalstyle, switchflag, bzeroflag; + int twojmax, switchflag, bzeroflag; double rfac0, rmin0, wj1, wj2; int rcutfacflag, twojmaxflag; // flags for required parameters int beta_max; // length of beta diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index 131ac48fdb..75601b8e17 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -113,7 +113,7 @@ using namespace MathConst; ------------------------------------------------------------------------- */ SNA::SNA(LAMMPS* lmp, double rfac0_in, - int twojmax_in, int diagonalstyle_in, + int twojmax_in, double rmin0_in, int switch_flag_in, int bzero_flag_in) : Pointers(lmp) { wself = 1.0; @@ -124,21 +124,16 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, bzero_flag = bzero_flag_in; twojmax = twojmax_in; - diagonalstyle = diagonalstyle_in; ncoeff = compute_ncoeff(); - bvec = NULL; - dbvec = NULL; - memory->create(bvec, ncoeff, "pair:bvec"); - memory->create(dbvec, ncoeff, 3, "pair:dbvec"); rij = NULL; inside = NULL; wj = NULL; rcutij = NULL; nmax = 0; idxz = NULL; - idxb= NULL; + idxb = NULL; build_indexlist(); create_twojmax_arrays(); @@ -159,8 +154,6 @@ SNA::~SNA() memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); - memory->destroy(bvec); - memory->destroy(dbvec); delete[] idxz; delete[] idxb; destroy_twojmax_arrays(); @@ -168,8 +161,6 @@ SNA::~SNA() void SNA::build_indexlist() { - if(diagonalstyle != 3) - error->all(FLERR, "diagonal_style must be 3\n"); // index list for cglist @@ -180,7 +171,7 @@ void SNA::build_indexlist() int idxcg_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { idxcg_block[j1][j2][j] = idxcg_count; for (int m1 = 0; m1 <= j1; m1++) for (int m2 = 0; m2 <= j2; m2++) @@ -209,7 +200,7 @@ void SNA::build_indexlist() int idxb_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) idxb_count++; idxb_max = idxb_count; @@ -218,7 +209,7 @@ void SNA::build_indexlist() idxb_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) { idxb[idxb_count].j1 = j1; idxb[idxb_count].j2 = j2; @@ -233,7 +224,7 @@ void SNA::build_indexlist() idxb_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { if (j >= j1) { idxb_block[j1][j2][j] = idxb_count; idxb_count++; @@ -246,7 +237,7 @@ void SNA::build_indexlist() for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) for (int mb = 0; 2*mb <= j; mb++) for (int ma = 0; ma <= j; ma++) idxz_count++; @@ -260,7 +251,7 @@ void SNA::build_indexlist() idxz_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { idxz_block[j1][j2][j] = idxz_count; // find right beta[jjb] entry @@ -288,6 +279,7 @@ void SNA::build_indexlist() } } } + /* ---------------------------------------------------------------------- */ void SNA::init() @@ -312,6 +304,7 @@ void SNA::grow_rij(int newnmax) memory->create(wj, nmax, "pair:wj"); memory->create(rcutij, nmax, "pair:rcutij"); } + /* ---------------------------------------------------------------------- compute Ui by summing over neighbors j ------------------------------------------------------------------------- */ @@ -401,12 +394,6 @@ void SNA::compute_zi() icgb += j2; } // end loop over ib -// // apply symmetry factor - -// const double jfac = 1.0/(j+1); -// zlist_r[jjz] *= jfac; -// zlist_i[jjz] *= jfac; - } // end loop over jjz } @@ -631,6 +618,11 @@ void SNA::compute_bi() } // end if jeven blist[jjb] = 2.0*sumzu; + + // apply bzero shift + + if (bzero_flag) + blist[jjb] -= bzero[j]; } } @@ -1186,25 +1178,38 @@ double SNA::memory_usage() int jdimpq = twojmax + 2; int jdim = twojmax + 1; double bytes; - bytes = ncoeff * sizeof(double); // coeff + + bytes = 0; bytes += jdimpq*jdimpq * sizeof(double); // pqarray bytes += idxcg_max * sizeof(double); // cglist - bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block bytes += idxu_max * sizeof(double) * 2; // ulist bytes += idxu_max * sizeof(double) * 2; // ulisttot bytes += idxu_max * 3 * sizeof(double) * 2; // dulist - bytes += jdim * sizeof(int); // idxu_block - bytes += idxz_max * 9 * sizeof(int); // idxz bytes += idxz_max * sizeof(double) * 2; // zlist - bytes += jdim * jdim * jdim * sizeof(int); // idxz_block - + bytes += idxb_max * sizeof(double); // blist + bytes += idxb_max * 3 * sizeof(double); // dblist bytes += idxu_max * sizeof(double) * 2; // ylist - bytes += idxb_max * 3 * sizeof(int); // idxb - bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block + bytes += jdim * sizeof(int); // idxu_block + bytes += jdim * jdim * jdim * sizeof(int); // idxz_block + bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + + bytes += idxz_max * sizeof(SNA_ZINDICES); // idxz + bytes += idxb_max * sizeof(SNA_BINDICES); // idxb + + bytes += jdim * sizeof(double); // bzero + + bytes += nmax * 3 * sizeof(double); // rij + bytes += nmax * sizeof(int); // inside + bytes += nmax * sizeof(double); // wj + bytes += nmax * sizeof(double); // rcutij + + printf("SNAP Z list Memory Usage %d\n",idxz_max * sizeof(double) * 2); + printf("SNAP CG list Memory Usage %d\n",idxcg_max * sizeof(double)); return bytes; } @@ -1242,25 +1247,22 @@ void SNA::destroy_twojmax_arrays() { memory->destroy(rootpqarray); memory->destroy(cglist); - memory->destroy(idxcg_block); - memory->destroy(ulist_r); memory->destroy(ulist_i); memory->destroy(ulisttot_r); memory->destroy(ulisttot_i); memory->destroy(dulist_r); memory->destroy(dulist_i); - memory->destroy(idxu_block); - memory->destroy(zlist_r); memory->destroy(zlist_i); memory->destroy(blist); memory->destroy(dblist); - memory->destroy(idxz_block); - memory->destroy(ylist_r); memory->destroy(ylist_i); + memory->destroy(idxcg_block); + memory->destroy(idxu_block); + memory->destroy(idxz_block); memory->destroy(idxb_block); if (bzero_flag) @@ -1484,7 +1486,7 @@ void SNA::init_clebsch_gordan() int idxcg_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { for (int m1 = 0; m1 <= j1; m1++) { aa2 = 2 * m1 - j1; @@ -1557,7 +1559,7 @@ int SNA::compute_ncoeff() for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); + for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) if (j >= j1) ncount++; diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h index b54ad3482a..1e08ef123c 100644 --- a/src/SNAP/sna.h +++ b/src/SNAP/sna.h @@ -18,9 +18,7 @@ #ifndef LMP_SNA_H #define LMP_SNA_H -#include #include "pointers.h" -#include namespace LAMMPS_NS { @@ -35,7 +33,7 @@ struct SNA_BINDICES { class SNA : protected Pointers { public: - SNA(LAMMPS*, double, int, int, double, int, int); + SNA(LAMMPS*, double, int, double, int, int); SNA(LAMMPS* lmp) : Pointers(lmp) {}; ~SNA(); @@ -61,7 +59,6 @@ public: double compute_sfac(double, double); double compute_dsfac(double, double); - double* bvec, ** dbvec; double* blist; double** dblist; double** rij; @@ -72,7 +69,7 @@ public: void grow_rij(int); - int twojmax, diagonalstyle; + int twojmax; private: double rmin0, rfac0; @@ -126,7 +123,7 @@ private: double wself; int bzero_flag; // 1 if bzero subtracted from barray - double *bzero; // array of B values for isolated atoms + double* bzero; // array of B values for isolated atoms }; } From 3f523ea906a417232a7be1e8d64778fef41041d0 Mon Sep 17 00:00:00 2001 From: athomps Date: Wed, 12 Jun 2019 17:02:59 -0600 Subject: [PATCH 14/21] Not part of this pull request --- examples/snap/W.nnsnap | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 examples/snap/W.nnsnap diff --git a/examples/snap/W.nnsnap b/examples/snap/W.nnsnap deleted file mode 100644 index 6ca97a701a..0000000000 --- a/examples/snap/W.nnsnap +++ /dev/null @@ -1,16 +0,0 @@ -# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. "Quantum-Accurate Molecular Dynamics Potential for Tungsten" arXiv:1702.07042 [physics.comp-ph] -# -# Definition of SNAP+ZBL potential. -variable zblcutinner equal 4 -variable zblcutouter equal 4.8 -variable zblz equal 74 - -# Specify hybrid with SNAP and ZBL - -pair_style hybrid/overlay & -zbl ${zblcutinner} ${zblcutouter} & -nn/snap -pair_coeff 1 1 zbl ${zblz} ${zblz} -pair_coeff * * nn/snap W_2940_2017_2.snapcoeff W_2940_2017_2.snapparam W - -#Nomenclature on the snap files are Element_DakotaID_Year_Month From f8e257d21967470b0e4c6e47bd3830a3192e871a Mon Sep 17 00:00:00 2001 From: athomps Date: Wed, 12 Jun 2019 17:04:05 -0600 Subject: [PATCH 15/21] Not part of this pull request --- src/SNAP/pair_nn_snap.cpp | 1824 ------------------------------------- 1 file changed, 1824 deletions(-) delete mode 100644 src/SNAP/pair_nn_snap.cpp diff --git a/src/SNAP/pair_nn_snap.cpp b/src/SNAP/pair_nn_snap.cpp deleted file mode 100644 index e90f6d6b1b..0000000000 --- a/src/SNAP/pair_nn_snap.cpp +++ /dev/null @@ -1,1824 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include -#include -#include -#include "pair_nn_snap.h" -#include "atom.h" -#include "atom_vec.h" -#include "force.h" -#include "comm.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "neigh_request.h" -#include "sna.h" -#include "openmp_snap.h" -#include "domain.h" -#include "memory.h" -#include "error.h" - -#include - -using namespace LAMMPS_NS; - -#define MAXLINE 1024 -#define MAXWORD 3 - -// Outstanding issues with quadratic term -// 1. there seems to a problem with compute_optimized energy calc -// it does not match compute_regular, even when quadratic coeffs = 0 - -/* ---------------------------------------------------------------------- */ - -PairNNSNAP::PairNNSNAP(LAMMPS *lmp) : Pair(lmp) -{ - single_enable = 0; - restartinfo = 0; - one_coeff = 1; - manybody_flag = 1; - - nelements = 0; - elements = NULL; - radelem = NULL; - wjelem = NULL; - coeffelem = NULL; - - nmax = 0; - nthreads = 1; - - schedule_user = 0; - schedule_time_guided = -1; - schedule_time_dynamic = -1; - ncalls_neigh =-1; - - ilistmask_max = 0; - ilistmask = NULL; - ghostinum = 0; - ghostilist_max = 0; - ghostilist = NULL; - ghostnumneigh_max = 0; - ghostnumneigh = NULL; - ghostneighs = NULL; - ghostfirstneigh = NULL; - ghostneighs_total = 0; - ghostneighs_max = 0; - - i_max = 0; - i_neighmax = 0; - i_numpairs = 0; - i_rij = NULL; - i_inside = NULL; - i_wj = NULL; - i_rcutij = NULL; - i_ninside = NULL; - i_pairs = NULL; - i_uarraytot_r = NULL; - i_uarraytot_i = NULL; - i_zarray_r = NULL; - i_zarray_i = NULL; - - use_shared_arrays = 0; - -#ifdef TIMING_INFO - timers[0] = 0; - timers[1] = 0; - timers[2] = 0; - timers[3] = 0; -#endif - - // Need to set this because restart not handled by PairHybrid - - sna = NULL; - - beta_max = 0; -} - -/* ---------------------------------------------------------------------- */ - -PairNNSNAP::~PairNNSNAP() -{ - if (copymode) return; - - if (nelements) { - for (int i = 0; i < nelements; i++) - delete[] elements[i]; - delete[] elements; - memory->destroy(radelem); - memory->destroy(wjelem); - memory->destroy(coeffelem); - memory->destroy(beta); - } - - // Need to set this because restart not handled by PairHybrid - - if (sna) { - -#ifdef TIMING_INFO - double time[5]; - double timeave[5]; - double timeave_mpi[5]; - double timemax_mpi[5]; - - for (int i = 0; i < 5; i++) { - time[i] = 0; - timeave[i] = 0; - for (int tid = 0; tidtimers[i]>time[i]) - time[i] = sna[tid]->timers[i]; - timeave[i] += sna[tid]->timers[i]; - } - timeave[i] /= nthreads; - } - MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world); - MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world); -#endif - - for (int tid = 0; tiddestroy(setflag); - memory->destroy(cutsq); - memory->destroy(map); - } - -} - -void PairNNSNAP::compute(int eflag, int vflag) -{ -// if (use_optimized) -// compute_optimized(eflag, vflag); -// else - -// hard-code compute_regular() - - compute_regular(eflag, vflag); -} - -/* ---------------------------------------------------------------------- - This version is a straightforward implementation - ---------------------------------------------------------------------- */ - -void PairNNSNAP::compute_regular(int eflag, int vflag) -{ - int i,j,jnum,ninside; - double delx,dely,delz,evdwl,rsq; - double fij[3]; - int *jlist,*numneigh,**firstneigh; - evdwl = 0.0; - - ev_init(eflag,vflag); - - double **x = atom->x; - double **f = atom->f; - int *type = atom->type; - int nlocal = atom->nlocal; - int newton_pair = force->newton_pair; - class SNA* snaptr = sna[0]; - - if (beta_max < list->inum) { - memory->grow(beta,list->inum,ncoeff,"PairNNSNAP:beta"); - beta_max = list->inum; - } - - // compute dE_i/dB_i = beta_i for all i in list - - compute_beta(); - - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - for (int ii = 0; ii < list->inum; ii++) { - i = list->ilist[ii]; - - const double xtmp = x[i][0]; - const double ytmp = x[i][1]; - const double ztmp = x[i][2]; - const int itype = type[i]; - const int ielem = map[itype]; - const double radi = radelem[ielem]; - - jlist = firstneigh[i]; - jnum = numneigh[i]; - - // insure rij, inside, wj, and rcutij are of size jnum - - snaptr->grow_rij(jnum); - - // rij[][3] = displacements between atom I and those neighbors - // inside = indices of neighbors of I within cutoff - // wj = weights for neighbors of I within cutoff - // rcutij = cutoffs for neighbors of I within cutoff - // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi - - ninside = 0; - for (int jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - j &= NEIGHMASK; - delx = x[j][0] - xtmp; - dely = x[j][1] - ytmp; - delz = x[j][2] - ztmp; - rsq = delx*delx + dely*dely + delz*delz; - int jtype = type[j]; - int jelem = map[jtype]; - - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - snaptr->rij[ninside][0] = delx; - snaptr->rij[ninside][1] = dely; - snaptr->rij[ninside][2] = delz; - snaptr->inside[ninside] = j; - snaptr->wj[ninside] = wjelem[jelem]; - snaptr->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; - ninside++; - } - } - - // compute Ui, Zi, and Bi for atom I - - snaptr->compute_ui(ninside); - snaptr->compute_zi(); - if (quadraticflag) { - snaptr->compute_bi(); - snaptr->copy_bi2bvec(); - } - - // for neighbors of I within cutoff: - // compute Fij = dEi/dRj = -dEi/dRi - // add to Fi, subtract from Fj - - // compute beta_i*Z_i = Y_i - - snaptr->compute_yi(beta[ii]); - - for (int jj = 0; jj < ninside; jj++) { - int j = snaptr->inside[jj]; - snaptr->compute_duidrj(snaptr->rij[jj], - snaptr->wj[jj],snaptr->rcutij[jj]); - -// // quadratic contributions - -// if (quadraticflag) { -// int k = ncoeff+1; -// for (int icoeff = 0; icoeff < ncoeff; icoeff++) { -// double bveci = snaptr->bvec[icoeff]; -// double fack = coeffi[k]*bveci; -// double* dbveci = snaptr->dbvec[icoeff]; -// fij[0] += fack*dbveci[0]; -// fij[1] += fack*dbveci[1]; -// fij[2] += fack*dbveci[2]; -// k++; -// for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { -// double facki = coeffi[k]*bveci; -// double fackj = coeffi[k]*snaptr->bvec[jcoeff]; -// double* dbvecj = snaptr->dbvec[jcoeff]; - -// fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; -// fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; -// fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; -// k++; -// } -// } -// } - - snaptr->compute_deidrj(fij); - - f[i][0] += fij[0]; - f[i][1] += fij[1]; - f[i][2] += fij[2]; - f[j][0] -= fij[0]; - f[j][1] -= fij[1]; - f[j][2] -= fij[2]; - - // tally per-atom virial contribution - - if (vflag) - ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, - fij[0],fij[1],fij[2], - -snaptr->rij[jj][0],-snaptr->rij[jj][1], - -snaptr->rij[jj][2]); - } - - // tally energy contribution - - if (eflag) { - - // evdwl = energy of atom I, sum over coeffs_k * Bi_k - - double* coeffi = coeffelem[ielem]; - evdwl = coeffi[0]; - if (!quadraticflag) { - snaptr->compute_bi(); - snaptr->copy_bi2bvec(); - } - - // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i - - // linear contributions - - for (int k = 1; k <= ncoeff; k++) - evdwl += coeffi[k]*snaptr->bvec[k-1]; - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = snaptr->bvec[icoeff]; - evdwl += 0.5*coeffi[k++]*bveci*bveci; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += coeffi[k++]*bveci*snaptr->bvec[jcoeff]; - } - } - } - ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); - } - - } - - if (vflag_fdotr) virial_fdotr_compute(); -} - - -/* ---------------------------------------------------------------------- - This version is optimized for threading, micro-load balancing - ---------------------------------------------------------------------- */ - -void PairNNSNAP::compute_optimized(int eflag, int vflag) -{ - // if reneighboring took place do load_balance if requested - if (do_load_balance > 0 && - (neighbor->ncalls != ncalls_neigh)) { - ghostinum = 0; - // reset local ghost neighbor lists - ncalls_neigh = neighbor->ncalls; - if (ilistmask_max < list->inum) { - memory->grow(ilistmask,list->inum,"PairSnap::ilistmask"); - ilistmask_max = list->inum; - } - for (int i = 0; i < list->inum; i++) - ilistmask[i] = 1; - - //multiple passes for loadbalancing - for (int i = 0; i < do_load_balance; i++) - load_balance(); - } - - int numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - numpairs += jnum; - } - } - - if (do_load_balance) - for (int ii = 0; ii < ghostinum; ii++) { - int i = ghostilist[ii]; - int jnum = ghostnumneigh[i]; - numpairs += jnum; - } - - // optimized schedule setting - - int time_dynamic = 0; - int time_guided = 0; - - if (schedule_user == 0) schedule_user = 4; - - switch (schedule_user) { - case 1: - omp_set_schedule(omp_sched_static,1); - break; - case 2: - omp_set_schedule(omp_sched_dynamic,1); - break; - case 3: - omp_set_schedule(omp_sched_guided,2); - break; - case 4: - omp_set_schedule(omp_sched_auto,0); - break; - case 5: - if (numpairs < 8*nthreads) omp_set_schedule(omp_sched_dynamic,1); - else if (schedule_time_guided < 0.0) { - omp_set_schedule(omp_sched_guided,2); - if (!eflag && !vflag) time_guided = 1; - } else if (schedule_time_dynamic<0.0) { - omp_set_schedule(omp_sched_dynamic,1); - if (!eflag && !vflag) time_dynamic = 1; - } else if (schedule_time_guidedcreate(pairs_tid_unique,numpairs,4,"numpairs"); - pairs = pairs_tid_unique; - } - - if (!use_shared_arrays) { - numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - for (int jj = 0; jjx; - double **f = atom->f; - int *type = atom->type; - int nlocal = atom->nlocal; - int newton_pair = force->newton_pair; - - numneigh = list->numneigh; - firstneigh = list->firstneigh; - -#ifdef TIMING_INFO - // only update micro timers after setup - static int count=0; - if (count<2) { - sna[tid]->timers[0] = 0; - sna[tid]->timers[1] = 0; - sna[tid]->timers[2] = 0; - sna[tid]->timers[3] = 0; - sna[tid]->timers[4] = 0; - } - count++; -#endif - - // did thread start working on interactions of new atom - int iold = -1; - - double starttime, endtime; - if (time_dynamic || time_guided) - starttime = MPI_Wtime(); - -#if defined(_OPENMP) -#pragma omp for schedule(runtime) -#endif - for (int iijj = 0; iijj < numpairs; iijj++) { - int i = 0; - if (use_shared_arrays) { - i = i_pairs[iijj][0]; - if (iold != i) { - set_sna_to_shared(tid,i_pairs[iijj][3]); - ielem = map[type[i]]; - } - iold = i; - } else { - i = pairs[iijj][0]; - if (iold != i) { - iold = i; - const double xtmp = x[i][0]; - const double ytmp = x[i][1]; - const double ztmp = x[i][2]; - const int itype = type[i]; - ielem = map[itype]; - const double radi = radelem[ielem]; - - if (i < nlocal) { - jlist = firstneigh[i]; - jnum = numneigh[i]; - } else { - jlist = ghostneighs+ghostfirstneigh[i]; - jnum = ghostnumneigh[i]; - } - - // insure rij, inside, wj, and rcutij are of size jnum - - sna[tid]->grow_rij(jnum); - - // rij[][3] = displacements between atom I and those neighbors - // inside = indices of neighbors of I within cutoff - // wj = weights of neighbors of I within cutoff - // rcutij = cutoffs of neighbors of I within cutoff - // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi - - ninside = 0; - for (jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - delx = x[j][0] - xtmp; //unitialised - dely = x[j][1] - ytmp; - delz = x[j][2] - ztmp; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - int jelem = map[jtype]; - - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { //unitialised - sna[tid]->rij[ninside][0] = delx; - sna[tid]->rij[ninside][1] = dely; - sna[tid]->rij[ninside][2] = delz; - sna[tid]->inside[ninside] = j; - sna[tid]->wj[ninside] = wjelem[jelem]; - sna[tid]->rcutij[ninside] = (radi + radelem[jelem])*rcutfac; - ninside++; - - // update index list with inside index - pairs[iijj + (jj - pairs[iijj][1])][2] = - ninside-1; //unitialised - } - } - - // compute Ui and Zi for atom I - - sna[tid]->compute_ui(ninside); //unitialised - sna[tid]->compute_zi(); - } - } - if (quadraticflag) { - sna[tid]->compute_bi(); - sna[tid]->copy_bi2bvec(); - } - - // for neighbors of I within cutoff: - // compute dUi/drj and dBi/drj - // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj - - // entry into loop if inside index is set - - double* coeffi = coeffelem[ielem]; - - if (pairs[iijj][2] >= 0) { - jj = pairs[iijj][2]; - int j = sna[tid]->inside[jj]; - sna[tid]->compute_duidrj(sna[tid]->rij[jj], - sna[tid]->wj[jj],sna[tid]->rcutij[jj]); - - sna[tid]->compute_dbidrj(); - sna[tid]->copy_dbi2dbvec(); - - fij[0] = 0.0; - fij[1] = 0.0; - fij[2] = 0.0; - - // linear contributions - - for (k = 1; k <= ncoeff; k++) { - double bgb = coeffi[k]; - fij[0] += bgb*sna[tid]->dbvec[k-1][0]; - fij[1] += bgb*sna[tid]->dbvec[k-1][1]; - fij[2] += bgb*sna[tid]->dbvec[k-1][2]; - } - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = sna[tid]->bvec[icoeff]; - double fack = coeffi[k]*bveci; - double* dbveci = sna[tid]->dbvec[icoeff]; - fij[0] += fack*sna[tid]->dbvec[icoeff][0]; - fij[1] += fack*sna[tid]->dbvec[icoeff][1]; - fij[2] += fack*sna[tid]->dbvec[icoeff][2]; - k++; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double facki = coeffi[k]*bveci; - double fackj = coeffi[k]*sna[tid]->bvec[jcoeff]; - double* dbvecj = sna[tid]->dbvec[jcoeff]; - fij[0] += facki*dbvecj[0]+fackj*dbveci[0]; - fij[1] += facki*dbvecj[1]+fackj*dbveci[1]; - fij[2] += facki*dbvecj[2]+fackj*dbveci[2]; - k++; - } - } - } - -#if defined(_OPENMP) -#pragma omp critical -#endif - { - f[i][0] += fij[0]; - f[i][1] += fij[1]; - f[i][2] += fij[2]; - f[j][0] -= fij[0]; - f[j][1] -= fij[1]; - f[j][2] -= fij[2]; - - // tally per-atom virial contribution - - if (vflag) - ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0, - fij[0],fij[1],fij[2], - -sna[tid]->rij[jj][0],-sna[tid]->rij[jj][1], - -sna[tid]->rij[jj][2]); - } - } - - // evdwl = energy of atom I, sum over coeffs_k * Bi_k - // only call this for first pair of each atom i - // if atom has no pairs, eatom=0, which is wrong - - if (eflag&&pairs[iijj][1] == 0) { - evdwl = coeffi[0]; - - if (!quadraticflag) { - sna[tid]->compute_bi(); - sna[tid]->copy_bi2bvec(); - } - - // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i - - // linear contributions - - for (int k = 1; k <= ncoeff; k++) - evdwl += coeffi[k]*sna[tid]->bvec[k-1]; - - // quadratic contributions - - if (quadraticflag) { - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = sna[tid]->bvec[icoeff]; - evdwl += 0.5*coeffi[k++]*bveci*bveci; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += coeffi[k++]*bveci*sna[tid]->bvec[jcoeff]; - } - } - } - -#if defined(_OPENMP) -#pragma omp critical -#endif - ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0); - } - - } - if (time_dynamic || time_guided) - endtime = MPI_Wtime(); - if (time_dynamic) schedule_time_dynamic = endtime - starttime; - if (time_guided) schedule_time_guided = endtime - starttime; - if (!use_shared_arrays) memory->destroy(pairs); - - }// end of pragma omp parallel - - if (vflag_fdotr) virial_fdotr_compute(); - -} - -inline int PairNNSNAP::equal(double* x,double* y) -{ - double dist2 = - (x[0]-y[0])*(x[0]-y[0]) + - (x[1]-y[1])*(x[1]-y[1]) + - (x[2]-y[2])*(x[2]-y[2]); - if (dist2 < 1e-20) return 1; - return 0; -} - -inline double PairNNSNAP::dist2(double* x,double* y) -{ - return - (x[0]-y[0])*(x[0]-y[0]) + - (x[1]-y[1])*(x[1]-y[1]) + - (x[2]-y[2])*(x[2]-y[2]); -} - -// return extra communication cutoff -// extra_cutoff = max(subdomain_length) - -double PairNNSNAP::extra_cutoff() -{ - double sublo[3],subhi[3]; - - if (domain->triclinic == 0) { - for (int dim = 0 ; dim < 3 ; dim++) { - sublo[dim] = domain->sublo[dim]; - subhi[dim] = domain->subhi[dim]; - } - } else { - domain->lamda2x(domain->sublo_lamda,sublo); - domain->lamda2x(domain->subhi_lamda,subhi); - } - - double sub_size[3]; - for (int dim = 0; dim < 3; dim++) - sub_size[dim] = subhi[dim] - sublo[dim]; - - double max_sub_size = 0; - for (int dim = 0; dim < 3; dim++) - max_sub_size = MAX(max_sub_size,sub_size[dim]); - - // note: for triclinic, probably need something different - // see Comm::setup() - - return max_sub_size; -} - -// micro load_balancer: each MPI process will -// check with each of its 26 neighbors, -// whether an imbalance exists in the number -// of atoms to calculate forces for. -// If it does it will set ilistmask of one of -// its local atoms to zero, and send its Tag -// to the neighbor process. The neighboring process -// will check its ghost list for the -// ghost atom with the same Tag which is closest -// to its domain center, and build a -// neighborlist for this ghost atom. For this to work, -// the communication cutoff has to be -// as large as the neighbor cutoff + -// maximum subdomain length. - -// Note that at most one atom is exchanged per processor pair. - -// Also note that the local atom assignment -// doesn't change. This load balancer will cause -// some ghost atoms to have full neighborlists -// which are unique to PairNNSNAP. -// They are not part of the generally accessible neighborlist. -// At the same time corresponding local atoms on -// other MPI processes will not be -// included in the force computation since -// their ilistmask is 0. This does not effect -// any other classes which might -// access the same general neighborlist. -// Reverse communication (newton on) of forces is required. - -// Currently the load balancer does two passes, -// since its exchanging atoms upstream and downstream. - -void PairNNSNAP::load_balance() -{ - double sublo[3],subhi[3]; - if (domain->triclinic == 0) { - double* sublotmp = domain->sublo; - double* subhitmp = domain->subhi; - for (int dim = 0 ; dim<3 ; dim++) { - sublo[dim]=sublotmp[dim]; - subhi[dim]=subhitmp[dim]; - } - } else { - double* sublotmp = domain->sublo_lamda; - double* subhitmp = domain->subhi_lamda; - domain->lamda2x(sublotmp,sublo); - domain->lamda2x(subhitmp,subhi); - } - - //if (list->inum==0) list->grow(atom->nmax); - - int nlocal = ghostinum; - for (int i=0; i < list->inum; i++) - if (ilistmask[i]) nlocal++; - int ***grid2proc = comm->grid2proc; - int* procgrid = comm->procgrid; - - int nlocal_up,nlocal_down; - MPI_Request request; - - double sub_mid[3]; - for (int dim=0; dim<3; dim++) - sub_mid[dim] = (subhi[dim] + sublo[dim])/2; - - if (comm->cutghostuser < - neighbor->cutneighmax+extra_cutoff()) - error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing"); - - int nrecv = ghostinum; - int totalsend = 0; - int nsend = 0; - int depth = 1; - - for (int dx = -depth; dx < depth+1; dx++) - for (int dy = -depth; dy < depth+1; dy++) - for (int dz = -depth; dz < depth+1; dz++) { - - if (dx == dy && dy == dz && dz == 0) continue; - - int sendloc[3] = {comm->myloc[0], - comm->myloc[1], comm->myloc[2] - }; - sendloc[0] += dx; - sendloc[1] += dy; - sendloc[2] += dz; - for (int dim = 0; dim < 3; dim++) - if (sendloc[dim] >= procgrid[dim]) - sendloc[dim] = sendloc[dim] - procgrid[dim]; - for (int dim = 0; dim < 3; dim++) - if (sendloc[dim] < 0) - sendloc[dim] = procgrid[dim] + sendloc[dim]; - int recvloc[3] = {comm->myloc[0], - comm->myloc[1], comm->myloc[2] - }; - recvloc[0] -= dx; - recvloc[1] -= dy; - recvloc[2] -= dz; - for (int dim = 0; dim < 3; dim++) - if (recvloc[dim] < 0) - recvloc[dim] = procgrid[dim] + recvloc[dim]; - for (int dim = 0; dim < 3; dim++) - if (recvloc[dim] >= procgrid[dim]) - recvloc[dim] = recvloc[dim] - procgrid[dim]; - - int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]]; - int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]]; - - // two stage process, first upstream movement, then downstream - - MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, - &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, - &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - nsend = 0; - - // send upstream - - if (nlocal > nlocal_up+1) { - - int i = totalsend++; - while(i < list->inum && ilistmask[i] == 0) - i = totalsend++; - - if (i < list->inum) - MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request); - else { - int j = -1; - MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request); - } - - if (i < list->inum) { - for (int j = 0; j < list->inum; j++) - if (list->ilist[j] == i) - ilistmask[j] = 0; - nsend = 1; - } - } - - // recv downstream - - if (nlocal < nlocal_down-1) { - nlocal++; - int get_tag = -1; - MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - - // if get_tag -1 the other process didnt have local atoms to send - - if (get_tag >= 0) { - if (ghostinum >= ghostilist_max) { - memory->grow(ghostilist,ghostinum+10, - "PairSnap::ghostilist"); - ghostilist_max = ghostinum+10; - } - if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { - ghostnumneigh_max = atom->nlocal+atom->nghost+100; - memory->grow(ghostnumneigh,ghostnumneigh_max, - "PairSnap::ghostnumneigh"); - memory->grow(ghostfirstneigh,ghostnumneigh_max, - "PairSnap::ghostfirstneigh"); - } - - // find closest ghost image of the transfered particle - - double mindist = 1e200; - int closestghost = -1; - for (int j = 0; j < atom->nlocal + atom->nghost; j++) - if (atom->tag[j] == get_tag) - if (dist2(sub_mid, atom->x[j]) < mindist) { - closestghost = j; - mindist = dist2(sub_mid, atom->x[j]); - } - - // build neighborlist for this particular - // ghost atom, and add it to list->ilist - - if (ghostneighs_max - ghostneighs_total < - neighbor->oneatom) { - memory->grow(ghostneighs, - ghostneighs_total + neighbor->oneatom, - "PairSnap::ghostneighs"); - ghostneighs_max = ghostneighs_total + neighbor->oneatom; - } - - int j = closestghost; - - ghostilist[ghostinum] = j; - ghostnumneigh[j] = 0; - ghostfirstneigh[j] = ghostneighs_total; - - ghostinum++; - int* jlist = ghostneighs + ghostfirstneigh[j]; - - // find all neighbors by looping - // over all local and ghost atoms - - for (int k = 0; k < atom->nlocal + atom->nghost; k++) - if (dist2(atom->x[j],atom->x[k]) < - neighbor->cutneighmax*neighbor->cutneighmax) { - jlist[ghostnumneigh[j]] = k; - ghostnumneigh[j]++; - ghostneighs_total++; - } - } - - if (get_tag >= 0) nrecv++; - } - - // decrease nlocal later, so that it is the - // initial number both for receiving and sending - - if (nsend) nlocal--; - - // second pass through the grid - - MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0, - &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0, - &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE); - - // send downstream - - nsend=0; - if (nlocal > nlocal_down+1) { - int i = totalsend++; - while(i < list->inum && ilistmask[i]==0) i = totalsend++; - - if (i < list->inum) - MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request); - else { - int j =- 1; - MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request); - } - - if (i < list->inum) { - for (int j=0; jinum; j++) - if (list->ilist[j] == i) ilistmask[j] = 0; - nsend = 1; - } - } - - // receive upstream - - if (nlocal < nlocal_up-1) { - nlocal++; - int get_tag = -1; - - MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE); - - if (get_tag >= 0) { - if (ghostinum >= ghostilist_max) { - memory->grow(ghostilist,ghostinum+10, - "PairSnap::ghostilist"); - ghostilist_max = ghostinum+10; - } - if (atom->nlocal + atom->nghost >= ghostnumneigh_max) { - ghostnumneigh_max = atom->nlocal + atom->nghost + 100; - memory->grow(ghostnumneigh,ghostnumneigh_max, - "PairSnap::ghostnumneigh"); - memory->grow(ghostfirstneigh,ghostnumneigh_max, - "PairSnap::ghostfirstneigh"); - } - - // find closest ghost image of the transfered particle - - double mindist = 1e200; - int closestghost = -1; - for (int j = 0; j < atom->nlocal + atom->nghost; j++) - if (atom->tag[j] == get_tag) - if (dist2(sub_mid,atom->x[j])x[j]); - } - - // build neighborlist for this particular ghost atom - - if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) { - memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom, - "PairSnap::ghostneighs"); - ghostneighs_max = ghostneighs_total + neighbor->oneatom; - } - - int j = closestghost; - - ghostilist[ghostinum] = j; - ghostnumneigh[j] = 0; - ghostfirstneigh[j] = ghostneighs_total; - - ghostinum++; - int* jlist = ghostneighs + ghostfirstneigh[j]; - - for (int k = 0; k < atom->nlocal + atom->nghost; k++) - if (dist2(atom->x[j],atom->x[k]) < - neighbor->cutneighmax*neighbor->cutneighmax) { - jlist[ghostnumneigh[j]] = k; - ghostnumneigh[j]++; - ghostneighs_total++; - } - } - - if (get_tag >= 0) nrecv++; - } - if (nsend) nlocal--; - } -} - -void PairNNSNAP::set_sna_to_shared(int snaid,int i) -{ - sna[snaid]->rij = i_rij[i]; - sna[snaid]->inside = i_inside[i]; - sna[snaid]->wj = i_wj[i]; - sna[snaid]->rcutij = i_rcutij[i]; - sna[snaid]->zarray_r = i_zarray_r[i]; - sna[snaid]->zarray_i = i_zarray_i[i]; - sna[snaid]->uarraytot_r = i_uarraytot_r[i]; - sna[snaid]->uarraytot_i = i_uarraytot_i[i]; -} - -void PairNNSNAP::build_per_atom_arrays() -{ - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - - int count = 0; - int neighmax = 0; - for (int ii = 0; ii < list->inum; ii++) - if ((do_load_balance <= 0) || ilistmask[ii]) { - neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]); - ++count; - } - for (int ii = 0; ii < ghostinum; ii++) { - neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]); - ++count; - } - - if (i_max < count || i_neighmax < neighmax) { - int i_maxt = MAX(count,i_max); - i_neighmax = MAX(neighmax,i_neighmax); - memory->destroy(i_rij); - memory->destroy(i_inside); - memory->destroy(i_wj); - memory->destroy(i_rcutij); - memory->destroy(i_ninside); - memory->destroy(i_pairs); - memory->create(i_rij,i_maxt,i_neighmax,3,"PairNNSNAP::i_rij"); - memory->create(i_inside,i_maxt,i_neighmax,"PairNNSNAP::i_inside"); - memory->create(i_wj,i_maxt,i_neighmax,"PairNNSNAP::i_wj"); - memory->create(i_rcutij,i_maxt,i_neighmax,"PairNNSNAP::i_rcutij"); - memory->create(i_ninside,i_maxt,"PairNNSNAP::i_ninside"); - memory->create(i_pairs,i_maxt*i_neighmax,4,"PairNNSNAP::i_pairs"); - } - - if (i_max < count) { - int jdim = sna[0]->twojmax+1; - memory->destroy(i_uarraytot_r); - memory->destroy(i_uarraytot_i); - memory->create(i_uarraytot_r,count,jdim,jdim,jdim, - "PairNNSNAP::i_uarraytot_r"); - memory->create(i_uarraytot_i,count,jdim,jdim,jdim, - "PairNNSNAP::i_uarraytot_i"); - if (i_zarray_r != NULL) - for (int i = 0; i < i_max; i++) { - memory->destroy(i_zarray_r[i]); - memory->destroy(i_zarray_i[i]); - } - - delete [] i_zarray_r; - delete [] i_zarray_i; - i_zarray_r = new double*****[count]; - i_zarray_i = new double*****[count]; - for (int i = 0; i < count; i++) { - memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim, - "PairNNSNAP::i_zarray_r"); - memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim, - "PairNNSNAP::i_zarray_i"); - } - } - - if (i_max < count) - i_max = count; - - count = 0; - i_numpairs = 0; - for (int ii = 0; ii < list->inum; ii++) { - if ((do_load_balance <= 0) || ilistmask[ii]) { - int i = list->ilist[ii]; - int jnum = list->numneigh[i]; - int* jlist = list->firstneigh[i]; - const double xtmp = atom->x[i][0]; - const double ytmp = atom->x[i][1]; - const double ztmp = atom->x[i][2]; - const int itype = atom->type[i]; - const int ielem = map[itype]; - const double radi = radelem[ielem]; - int ninside = 0; - for (int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - const double delx = atom->x[j][0] - xtmp; - const double dely = atom->x[j][1] - ytmp; - const double delz = atom->x[j][2] - ztmp; - const double rsq = delx*delx + dely*dely + delz*delz; - int jtype = atom->type[j]; - int jelem = map[jtype]; - - i_pairs[i_numpairs][0] = i; - i_pairs[i_numpairs][1] = jj; - i_pairs[i_numpairs][2] = -1; - i_pairs[i_numpairs][3] = count; - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - i_rij[count][ninside][0] = delx; - i_rij[count][ninside][1] = dely; - i_rij[count][ninside][2] = delz; - i_inside[count][ninside] = j; - i_wj[count][ninside] = wjelem[jelem]; - i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; - - // update index list with inside index - i_pairs[i_numpairs][2] = ninside++; - } - i_numpairs++; - } - i_ninside[count] = ninside; - count++; - } - } - - for (int ii = 0; ii < ghostinum; ii++) { - int i = ghostilist[ii]; - int jnum = ghostnumneigh[i]; - int* jlist = ghostneighs+ghostfirstneigh[i]; - const double xtmp = atom->x[i][0]; - const double ytmp = atom->x[i][1]; - const double ztmp = atom->x[i][2]; - const int itype = atom->type[i]; - const int ielem = map[itype]; - const double radi = radelem[ielem]; - int ninside = 0; - - for (int jj = 0; jj < jnum; jj++) { - int j = jlist[jj]; - j &= NEIGHMASK; - const double delx = atom->x[j][0] - xtmp; - const double dely = atom->x[j][1] - ytmp; - const double delz = atom->x[j][2] - ztmp; - const double rsq = delx*delx + dely*dely + delz*delz; - int jtype = atom->type[j]; - int jelem = map[jtype]; - - i_pairs[i_numpairs][0] = i; - i_pairs[i_numpairs][1] = jj; - i_pairs[i_numpairs][2] = -1; - i_pairs[i_numpairs][3] = count; - if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { - i_rij[count][ninside][0] = delx; - i_rij[count][ninside][1] = dely; - i_rij[count][ninside][2] = delz; - i_inside[count][ninside] = j; - i_wj[count][ninside] = wjelem[jelem]; - i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac; - // update index list with inside index - i_pairs[i_numpairs][2] = ninside++; - } - i_numpairs++; - } - i_ninside[count] = ninside; - count++; - } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - -#if defined(_OPENMP) -#pragma omp parallel for shared(count) default(none) -#endif - for (int ii=0; ii < count; ii++) { - int tid = omp_get_thread_num(); - set_sna_to_shared(tid,ii); - //sna[tid]->compute_ui(i_ninside[ii]); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1)); -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif - } - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&starttime); -#endif - for (int ii=0; ii < count; ii++) { - int tid = 0;//omp_get_thread_num(); - set_sna_to_shared(tid,ii); - sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1)); - } -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME,&endtime); - timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0* - (endtime.tv_nsec-starttime.tv_nsec)/1000000000); -#endif -} - -/* ---------------------------------------------------------------------- - compute beta -------------------------------------------------------------------------- */ - -void PairNNSNAP::compute_beta() -{ - int i; - int *type = atom->type; - - for (int ii = 0; ii < list->inum; ii++) { - i = list->ilist[ii]; - const int itype = type[i]; - const int ielem = map[itype]; - double* coeffi = coeffelem[ielem]; - - for (int k = 1; k <= ncoeff; k++) - beta[ii][k-1] = coeffi[k]; - } -} - -/* ---------------------------------------------------------------------- - allocate all arrays -------------------------------------------------------------------------- */ - -void PairNNSNAP::allocate() -{ - allocated = 1; - int n = atom->ntypes; - - memory->create(setflag,n+1,n+1,"pair:setflag"); - memory->create(cutsq,n+1,n+1,"pair:cutsq"); - memory->create(map,n+1,"pair:map"); -} - -/* ---------------------------------------------------------------------- - global settings -------------------------------------------------------------------------- */ - -void PairNNSNAP::settings(int narg, char **arg) -{ - - // set default values for optional arguments - - nthreads = -1; - use_shared_arrays=-1; - do_load_balance = 0; - use_optimized = 1; - - // optional arguments - - for (int i=0; i < narg; i++) { - if (i+2>narg) error->all(FLERR,"Illegal pair_style command"); - if (strcmp(arg[i],"nthreads")==0) { - nthreads=force->inumeric(FLERR,arg[++i]); -#if defined(LMP_USER_OMP) - error->all(FLERR,"Must set number of threads via package omp command"); -#else - omp_set_num_threads(nthreads); - comm->nthreads=nthreads; -#endif - continue; - } - if (strcmp(arg[i],"optimized")==0) { - use_optimized=force->inumeric(FLERR,arg[++i]); - continue; - } - if (strcmp(arg[i],"shared")==0) { - use_shared_arrays=force->inumeric(FLERR,arg[++i]); - continue; - } - if (strcmp(arg[i],"loadbalance")==0) { - do_load_balance = force->inumeric(FLERR,arg[++i]); - if (do_load_balance) { - double mincutoff = extra_cutoff() + - rcutmax + neighbor->skin; - if (comm->cutghostuser < mincutoff) { - char buffer[255]; - - //apparently mincutoff is 0 after sprintf command ????? - - double tmp = mincutoff + 0.1; - sprintf(buffer, "Communication cutoff is too small " - "for SNAP micro load balancing, increased to %lf", - mincutoff+0.1); - if (comm->me==0) - error->warning(FLERR,buffer); - - comm->cutghostuser = tmp; - - } - } - continue; - } - if (strcmp(arg[i],"schedule")==0) { - i++; - if (strcmp(arg[i],"static")==0) - schedule_user = 1; - if (strcmp(arg[i],"dynamic")==0) - schedule_user = 2; - if (strcmp(arg[i],"guided")==0) - schedule_user = 3; - if (strcmp(arg[i],"auto")==0) - schedule_user = 4; - if (strcmp(arg[i],"determine")==0) - schedule_user = 5; - if (schedule_user == 0) - error->all(FLERR,"Illegal pair_style command"); - continue; - } - error->all(FLERR,"Illegal pair_style command"); - } - - if (nthreads < 0) - nthreads = comm->nthreads; - - if (use_shared_arrays < 0) { - if (nthreads > 1 && atom->nlocal <= 2*nthreads) - use_shared_arrays = 1; - else use_shared_arrays = 0; - } - - // check if running non-optimized code with - // optimization flags set - - if (!use_optimized) - if (nthreads > 1 || - use_shared_arrays || - do_load_balance || - schedule_user) - error->all(FLERR,"Illegal pair_style command"); -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -void PairNNSNAP::coeff(int narg, char **arg) -{ - if (narg < 5) error->all(FLERR,"Incorrect args for pair coefficients"); - if (!allocated) allocate(); - - if (nelements) { - for (int i = 0; i < nelements; i++) - delete[] elements[i]; - delete[] elements; - memory->destroy(radelem); - memory->destroy(wjelem); - memory->destroy(coeffelem); - memory->destroy(beta); - } - - char* type1 = arg[0]; - char* type2 = arg[1]; - char* coefffilename = arg[2]; - char* paramfilename = arg[3]; - char** elemtypes = &arg[4]; - - // insure I,J args are * * - - if (strcmp(type1,"*") != 0 || strcmp(type2,"*") != 0) - error->all(FLERR,"Incorrect args for pair coefficients"); - - // read snapcoeff and snapparam files - - read_files(coefffilename,paramfilename); - - if (!quadraticflag) - ncoeff = ncoeffall - 1; - else { - - // ncoeffall should be (ncoeff+2)*(ncoeff+1)/2 - // so, ncoeff = floor(sqrt(2*ncoeffall))-1 - - ncoeff = sqrt(2*ncoeffall)-1; - ncoeffq = (ncoeff*(ncoeff+1))/2; - int ntmp = 1+ncoeff+ncoeffq; - if (ntmp != ncoeffall) { - printf("ncoeffall = %d ntmp = %d ncoeff = %d \n",ncoeffall,ntmp,ncoeff); - error->all(FLERR,"Incorrect SNAP coeff file"); - } - } - - // read args that map atom types to SNAP elements - // map[i] = which element the Ith atom type is, -1 if not mapped - // map[0] is not used - - for (int i = 1; i <= atom->ntypes; i++) { - char* elemname = elemtypes[i-1]; - int jelem; - for (jelem = 0; jelem < nelements; jelem++) - if (strcmp(elemname,elements[jelem]) == 0) - break; - - if (jelem < nelements) - map[i] = jelem; - else if (strcmp(elemname,"NULL") == 0) map[i] = -1; - else error->all(FLERR,"Incorrect args for pair coefficients"); - } - - // clear setflag since coeff() called once with I,J = * * - - int n = atom->ntypes; - for (int i = 1; i <= n; i++) - for (int j = i; j <= n; j++) - setflag[i][j] = 0; - - // set setflag i,j for type pairs where both are mapped to elements - - int count = 0; - for (int i = 1; i <= n; i++) - for (int j = i; j <= n; j++) - if (map[i] >= 0 && map[j] >= 0) { - setflag[i][j] = 1; - count++; - } - - if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); - - sna = new SNA*[nthreads]; - - // allocate memory for per OpenMP thread data which - // is wrapped into the sna class - -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - sna[tid] = new SNA(lmp,rfac0,twojmax, - diagonalstyle,use_shared_arrays, - rmin0,switchflag,bzeroflag); - if (!use_shared_arrays) - sna[tid]->grow_rij(nmax); - } - - if (ncoeff != sna[0]->ncoeff) { - if (comm->me == 0) - printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff); - error->all(FLERR,"Incorrect SNAP parameter file"); - } - - // Calculate maximum cutoff for all elements - - rcutmax = 0.0; - for (int ielem = 0; ielem < nelements; ielem++) - rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax); - -} - -/* ---------------------------------------------------------------------- - init specific to this pair style -------------------------------------------------------------------------- */ - -void PairNNSNAP::init_style() -{ - if (force->newton_pair == 0) - error->all(FLERR,"Pair style SNAP requires newton pair on"); - - // need a full neighbor list - - int irequest = neighbor->request(this,instance_me); - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full = 1; - -#if defined(_OPENMP) -#pragma omp parallel default(none) -#endif - { - int tid = omp_get_thread_num(); - sna[tid]->init(); - } - -} - -/* ---------------------------------------------------------------------- - init for one type pair i,j and corresponding j,i -------------------------------------------------------------------------- */ - -double PairNNSNAP::init_one(int i, int j) -{ - if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); - return (radelem[map[i]] + - radelem[map[j]])*rcutfac; -} - -/* ---------------------------------------------------------------------- */ - -void PairNNSNAP::read_files(char *coefffilename, char *paramfilename) -{ - - // open SNAP coefficient file on proc 0 - - FILE *fpcoeff; - if (comm->me == 0) { - fpcoeff = force->open_potential(coefffilename); - if (fpcoeff == NULL) { - char str[128]; - snprintf(str,128,"Cannot open SNAP coefficient file %s",coefffilename); - error->one(FLERR,str); - } - } - - char line[MAXLINE],*ptr; - int eof = 0; - - int n; - int nwords = 0; - while (nwords == 0) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpcoeff); - if (ptr == NULL) { - eof = 1; - fclose(fpcoeff); - } else n = strlen(line) + 1; - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) break; - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - // strip comment, skip line if blank - - if ((ptr = strchr(line,'#'))) *ptr = '\0'; - nwords = atom->count_words(line); - } - if (nwords != 2) - error->all(FLERR,"Incorrect format in SNAP coefficient file"); - - // words = ptrs to all words in line - // strip single and double quotes from words - - char* words[MAXWORD]; - int iword = 0; - words[iword] = strtok(line,"' \t\n\r\f"); - iword = 1; - words[iword] = strtok(NULL,"' \t\n\r\f"); - - nelements = atoi(words[0]); - ncoeffall = atoi(words[1]); - - // set up element lists - - elements = new char*[nelements]; - memory->create(radelem,nelements,"pair:radelem"); - memory->create(wjelem,nelements,"pair:wjelem"); - memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem"); - - // Loop over nelements blocks in the SNAP coefficient file - - for (int ielem = 0; ielem < nelements; ielem++) { - - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpcoeff); - if (ptr == NULL) { - eof = 1; - fclose(fpcoeff); - } else n = strlen(line) + 1; - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) - error->all(FLERR,"Incorrect format in SNAP coefficient file"); - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - nwords = atom->count_words(line); - if (nwords != 3) - error->all(FLERR,"Incorrect format in SNAP coefficient file"); - - iword = 0; - words[iword] = strtok(line,"' \t\n\r\f"); - iword = 1; - words[iword] = strtok(NULL,"' \t\n\r\f"); - iword = 2; - words[iword] = strtok(NULL,"' \t\n\r\f"); - - char* elemtmp = words[0]; - int n = strlen(elemtmp) + 1; - elements[ielem] = new char[n]; - strcpy(elements[ielem],elemtmp); - - radelem[ielem] = atof(words[1]); - wjelem[ielem] = atof(words[2]); - - - if (comm->me == 0) { - if (screen) fprintf(screen,"SNAP Element = %s, Radius %g, Weight %g \n", - elements[ielem], radelem[ielem], wjelem[ielem]); - if (logfile) fprintf(logfile,"SNAP Element = %s, Radius %g, Weight %g \n", - elements[ielem], radelem[ielem], wjelem[ielem]); - } - - for (int icoeff = 0; icoeff < ncoeffall; icoeff++) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpcoeff); - if (ptr == NULL) { - eof = 1; - fclose(fpcoeff); - } else n = strlen(line) + 1; - } - - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) - error->all(FLERR,"Incorrect format in SNAP coefficient file"); - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - nwords = atom->count_words(line); - if (nwords != 1) - error->all(FLERR,"Incorrect format in SNAP coefficient file"); - - iword = 0; - words[iword] = strtok(line,"' \t\n\r\f"); - - coeffelem[ielem][icoeff] = atof(words[0]); - - } - } - - // set flags for required keywords - - rcutfacflag = 0; - twojmaxflag = 0; - - // Set defaults for optional keywords - - rfac0 = 0.99363; - rmin0 = 0.0; - diagonalstyle = 3; - switchflag = 1; - bzeroflag = 1; - quadraticflag = 0; - - // open SNAP parameter file on proc 0 - - FILE *fpparam; - if (comm->me == 0) { - fpparam = force->open_potential(paramfilename); - if (fpparam == NULL) { - char str[128]; - snprintf(str,128,"Cannot open SNAP parameter file %s",paramfilename); - error->one(FLERR,str); - } - } - - eof = 0; - while (1) { - if (comm->me == 0) { - ptr = fgets(line,MAXLINE,fpparam); - if (ptr == NULL) { - eof = 1; - fclose(fpparam); - } else n = strlen(line) + 1; - } - MPI_Bcast(&eof,1,MPI_INT,0,world); - if (eof) break; - MPI_Bcast(&n,1,MPI_INT,0,world); - MPI_Bcast(line,n,MPI_CHAR,0,world); - - // strip comment, skip line if blank - - if ((ptr = strchr(line,'#'))) *ptr = '\0'; - nwords = atom->count_words(line); - if (nwords == 0) continue; - - if (nwords != 2) - error->all(FLERR,"Incorrect format in SNAP parameter file"); - - // words = ptrs to all words in line - // strip single and double quotes from words - - char* keywd = strtok(line,"' \t\n\r\f"); - char* keyval = strtok(NULL,"' \t\n\r\f"); - - if (comm->me == 0) { - if (screen) fprintf(screen,"SNAP keyword %s %s \n",keywd,keyval); - if (logfile) fprintf(logfile,"SNAP keyword %s %s \n",keywd,keyval); - } - - if (strcmp(keywd,"rcutfac") == 0) { - rcutfac = atof(keyval); - rcutfacflag = 1; - } else if (strcmp(keywd,"twojmax") == 0) { - twojmax = atoi(keyval); - twojmaxflag = 1; - } else if (strcmp(keywd,"rfac0") == 0) - rfac0 = atof(keyval); - else if (strcmp(keywd,"rmin0") == 0) - rmin0 = atof(keyval); - else if (strcmp(keywd,"diagonalstyle") == 0) - diagonalstyle = atoi(keyval); - else if (strcmp(keywd,"switchflag") == 0) - switchflag = atoi(keyval); - else if (strcmp(keywd,"bzeroflag") == 0) - bzeroflag = atoi(keyval); - else if (strcmp(keywd,"quadraticflag") == 0) - quadraticflag = atoi(keyval); - else - error->all(FLERR,"Incorrect SNAP parameter file"); - } - - if (rcutfacflag == 0 || twojmaxflag == 0) - error->all(FLERR,"Incorrect SNAP parameter file"); - -} - -/* ---------------------------------------------------------------------- - memory usage -------------------------------------------------------------------------- */ - -double PairNNSNAP::memory_usage() -{ - double bytes = Pair::memory_usage(); - int n = atom->ntypes+1; - bytes += n*n*sizeof(int); - bytes += n*n*sizeof(double); - bytes += 3*nmax*sizeof(double); - bytes += nmax*sizeof(int); - bytes += (2*ncoeffall)*sizeof(double); - bytes += (ncoeff*3)*sizeof(double); - bytes += sna[0]->memory_usage()*nthreads; - return bytes; -} - From be5d3d6a19345a7378e2df0eaea3b7ad6e1ac660 Mon Sep 17 00:00:00 2001 From: athomps Date: Wed, 12 Jun 2019 17:04:22 -0600 Subject: [PATCH 16/21] Not part of this pull request --- examples/snap/in.nnsnap | 45 ----------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 examples/snap/in.nnsnap diff --git a/examples/snap/in.nnsnap b/examples/snap/in.nnsnap deleted file mode 100644 index d575757d56..0000000000 --- a/examples/snap/in.nnsnap +++ /dev/null @@ -1,45 +0,0 @@ -# Demonstrate SNAP Ta potential - -# Initialize simulation - -variable nsteps index 100 -variable nrep equal 4 -variable a equal 3.1803 -units metal - -# generate the box and atom positions using a BCC lattice - -variable nx equal ${nrep} -variable ny equal ${nrep} -variable nz equal ${nrep} - -boundary p p p - -lattice bcc $a -region box block 0 ${nx} 0 ${ny} 0 ${nz} -create_box 1 box -create_atoms 1 box - -mass 1 183.84 - -# choose potential - -include W.nnsnap - -# Setup output - -thermo 10 -thermo_modify norm yes - -# Set up NVE run - -timestep 0.5e-3 -neighbor 1.0 bin -neigh_modify once no every 1 delay 0 check yes - -# Run MD - -velocity all create 300.0 4928459 -fix 1 all nve -run ${nsteps} - From c5c03230cb2a1a8649f902b2dce0983c5a309565 Mon Sep 17 00:00:00 2001 From: athomps Date: Wed, 12 Jun 2019 17:05:47 -0600 Subject: [PATCH 17/21] Not part of this pull request --- src/SNAP/pair_nn_snap.h | 184 ---------------------------------------- 1 file changed, 184 deletions(-) delete mode 100644 src/SNAP/pair_nn_snap.h diff --git a/src/SNAP/pair_nn_snap.h b/src/SNAP/pair_nn_snap.h deleted file mode 100644 index f77ddee207..0000000000 --- a/src/SNAP/pair_nn_snap.h +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(nn/snap,PairNNSNAP) - -#else - -#ifndef LMP_PAIR_NN_SNAP_H -#define LMP_PAIR_NN_SNAP_H - -#include "pair.h" - -namespace LAMMPS_NS { - -class PairNNSNAP : public Pair { -public: - PairNNSNAP(class LAMMPS *); - ~PairNNSNAP(); - virtual void compute(int, int); - void compute_regular(int, int); - void compute_optimized(int, int); - void settings(int, char **); - virtual void coeff(int, char **); - virtual void init_style(); - virtual double init_one(int, int); - virtual double memory_usage(); - - double rcutfac, quadraticflag; // declared public to workaround gcc 4.9 - int ncoeff; // compiler bug, manifest in KOKKOS package - -protected: - int ncoeffq, ncoeffall; - double **bvec, ***dbvec; - class SNA** sna; - int nmax; - int nthreads; - virtual void allocate(); - void read_files(char *, char *); - inline int equal(double* x,double* y); - inline double dist2(double* x,double* y); - double extra_cutoff(); - void load_balance(); - void set_sna_to_shared(int snaid,int i); - void build_per_atom_arrays(); - - void compute_beta(); - - int schedule_user; - double schedule_time_guided; - double schedule_time_dynamic; - - int ncalls_neigh; - int do_load_balance; - int ilistmask_max; - int* ilistmask; - int ghostinum; - int ghostilist_max; - int* ghostilist; - int ghostnumneigh_max; - int* ghostnumneigh; - int* ghostneighs; - int* ghostfirstneigh; - int ghostneighs_total; - int ghostneighs_max; - - int use_optimized; - int use_shared_arrays; - - int i_max; - int i_neighmax; - int i_numpairs; - int **i_pairs; - double ***i_rij; - int **i_inside; - double **i_wj; - double **i_rcutij; - int *i_ninside; - double ****i_uarraytot_r, ****i_uarraytot_i; - double ******i_zarray_r, ******i_zarray_i; - -#ifdef TIMING_INFO - // timespec starttime, endtime; - double timers[4]; -#endif - - double rcutmax; // max cutoff for all elements - int nelements; // # of unique elements - char **elements; // names of unique elements - double *radelem; // element radii - double *wjelem; // elements weights - double **coeffelem; // element bispectrum coefficients - double** beta; // betas for all atoms in list - int *map; // mapping from atom types to elements - int twojmax, diagonalstyle, switchflag, bzeroflag; - double rfac0, rmin0, wj1, wj2; - int rcutfacflag, twojmaxflag; // flags for required parameters - int beta_max; // length of beta -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Communication cutoff too small for SNAP micro load balancing - -This can happen if you change the neighbor skin after your pair_style -command or if your box dimensions grow during a run. You can set the -cutoff explicitly via the comm_modify cutoff command. - -E: Illegal ... command - -Self-explanatory. Check the input script syntax and compare to the -documentation for the command. You can use -echo screen as a -command-line option when running LAMMPS to see the offending line. - -E: Must set number of threads via package omp command - -Because you are using the USER-OMP package, set the number of threads -via its settings, not by the pair_style snap nthreads setting. - -W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf - -Self-explanatory. - -E: Incorrect args for pair coefficients - -Self-explanatory. Check the input script or data file. - -E: Incorrect SNAP coeff file - -UNDOCUMENTED - -E: Incorrect SNAP parameter file - -The file cannot be parsed correctly, check its internal syntax. - -E: Pair style SNAP requires newton pair on - -See the newton command. This is a restriction to use the SNAP -potential. - -E: All pair coeffs are not set - -All pair coefficients must be set in the data file or by the -pair_coeff command before running a simulation. - -E: Cannot open SNAP coefficient file %s - -The specified SNAP coefficient file cannot be opened. Check that the -path and name are correct. - -E: Incorrect format in SNAP coefficient file - -Incorrect number of words per line in the coefficient file. - -E: Cannot open SNAP parameter file %s - -The specified SNAP parameter file cannot be opened. Check that the -path and name are correct. - -E: Incorrect format in SNAP parameter file - -Incorrect number of words per line in the parameter file. - -E: Did not find all elements in SNAP coefficient file. - -One or more elements listed in the pair_coeff command were not found in the coefficient file. - -*/ From 65b87fa2781c5f7dd9a4599193c026d8b37825ae Mon Sep 17 00:00:00 2001 From: "Aidan P. Thompson" Date: Thu, 13 Jun 2019 09:54:56 -0600 Subject: [PATCH 18/21] Updated SNAP in KOKKOS package so it compiles and runs --- src/KOKKOS/pair_snap_kokkos_impl.h | 18 +----- src/KOKKOS/sna_kokkos.h | 8 +-- src/KOKKOS/sna_kokkos_impl.h | 97 ++++++++++++------------------ src/SNAP/sna.cpp | 3 - 4 files changed, 39 insertions(+), 87 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index bb2a5e9171..0ec4ed0995 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -85,9 +85,6 @@ void PairSNAPKokkos::init_style() if (force->newton_pair == 0) error->all(FLERR,"Pair style SNAP requires newton pair on"); - if (diagonalstyle != 3) - error->all(FLERR,"Must use diagonal style = 3 with pair snap/kk"); - // irequest = neigh request made by parent class neighflag = lmp->kokkos->neighflag; @@ -343,23 +340,12 @@ void PairSNAPKokkos::coeff(int narg, char **arg) Kokkos::deep_copy(d_coeffelem,h_coeffelem); Kokkos::deep_copy(d_map,h_map); - // deallocate non-kokkos sna - - if (sna) { - for (int tid = 0; tid(rfac0,twojmax, - diagonalstyle,use_shared_arrays, rmin0,switchflag,bzeroflag); - //if (!use_shared_arrays) - snaKK.grow_rij(nmax); + snaKK.grow_rij(0); snaKK.init(); } @@ -667,8 +653,6 @@ double PairSNAPKokkos::memory_usage() int n = atom->ntypes+1; bytes += n*n*sizeof(int); bytes += n*n*sizeof(double); - bytes += 3*nmax*sizeof(double); - bytes += nmax*sizeof(int); bytes += (2*ncoeffall)*sizeof(double); bytes += (ncoeff*3)*sizeof(double); bytes += snaKK.memory_usage(); diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 7a80b262b7..40e5fe0ad4 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -48,7 +48,7 @@ inline SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team); inline - SNAKokkos(double, int, int, int, double, int, int); + SNAKokkos(double, int, double, int, int); KOKKOS_INLINE_FUNCTION ~SNAKokkos(); @@ -178,12 +178,6 @@ inline double, double, double, // compute_duidrj double, double, double, double, double); - // if number of atoms are small use per atom arrays - // for twojmax arrays, rij, inside, bvec - // this will increase the memory footprint considerably, - // but allows parallel filling and reuse of these arrays - int use_shared_arrays; - // Sets the style for the switching function // 0 = none // 1 = cosine diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 0f2a450a3d..c43003af97 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -27,19 +27,17 @@ static const double MY_PI = 3.14159265358979323846; // pi template inline SNAKokkos::SNAKokkos(double rfac0_in, - int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in, + int twojmax_in, double rmin0_in, int switch_flag_in, int bzero_flag_in) { wself = 1.0; - use_shared_arrays = use_shared_arrays_in; rfac0 = rfac0_in; rmin0 = rmin0_in; switch_flag = switch_flag_in; bzero_flag = bzero_flag_in; twojmax = twojmax_in; - diagonalstyle = diagonalstyle_in; ncoeff = compute_ncoeff(); @@ -70,14 +68,12 @@ KOKKOS_INLINE_FUNCTION SNAKokkos::SNAKokkos(const SNAKokkos& sna, const typename Kokkos::TeamPolicy::member_type& team) { wself = sna.wself; - use_shared_arrays = sna.use_shared_arrays; rfac0 = sna.rfac0; rmin0 = sna.rmin0; switch_flag = sna.switch_flag; bzero_flag = sna.bzero_flag; twojmax = sna.twojmax; - diagonalstyle = sna.diagonalstyle; ncoeff = sna.ncoeff; nmax = sna.nmax; @@ -104,48 +100,45 @@ template inline void SNAKokkos::build_indexlist() { - if(diagonalstyle == 3) { - int idxj_count = 0; - int idxj_full_count = 0; + int idxj_count = 0; + int idxj_full_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) idxj_count++; - idxj_full_count++; - } + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + if (j >= j1) idxj_count++; + idxj_full_count++; + } - // indexList can be changed here + // indexList can be changed here - idxj = Kokkos::View("SNAKokkos::idxj",idxj_count); - idxj_full = Kokkos::View("SNAKokkos::idxj_full",idxj_full_count); - auto h_idxj = Kokkos::create_mirror_view(idxj); - auto h_idxj_full = Kokkos::create_mirror_view(idxj_full); + idxj = Kokkos::View("SNAKokkos::idxj",idxj_count); + idxj_full = Kokkos::View("SNAKokkos::idxj_full",idxj_full_count); + auto h_idxj = Kokkos::create_mirror_view(idxj); + auto h_idxj_full = Kokkos::create_mirror_view(idxj_full); - idxj_max = idxj_count; - idxj_full_max = idxj_full_count; + idxj_max = idxj_count; + idxj_full_max = idxj_full_count; - idxj_count = 0; - idxj_full_count = 0; + idxj_count = 0; + idxj_full_count = 0; - for(int j1 = 0; j1 <= twojmax; j1++) - for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) { - h_idxj[idxj_count].j1 = j1; - h_idxj[idxj_count].j2 = j2; - h_idxj[idxj_count].j = j; - idxj_count++; - } - h_idxj_full[idxj_full_count].j1 = j1; - h_idxj_full[idxj_full_count].j2 = j2; - h_idxj_full[idxj_full_count].j = j; - idxj_full_count++; - } - Kokkos::deep_copy(idxj,h_idxj); - Kokkos::deep_copy(idxj_full,h_idxj_full); - - } + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { + if (j >= j1) { + h_idxj[idxj_count].j1 = j1; + h_idxj[idxj_count].j2 = j2; + h_idxj[idxj_count].j = j; + idxj_count++; + } + h_idxj_full[idxj_full_count].j1 = j1; + h_idxj_full[idxj_full_count].j2 = j2; + h_idxj_full[idxj_full_count].j = j; + idxj_full_count++; + } + Kokkos::deep_copy(idxj,h_idxj); + Kokkos::deep_copy(idxj_full,h_idxj_full); } /* ---------------------------------------------------------------------- */ @@ -1223,26 +1216,10 @@ int SNAKokkos::compute_ncoeff() ncount = 0; for (int j1 = 0; j1 <= twojmax; j1++) - if(diagonalstyle == 0) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 1) { - int j2 = j1; - + for (int j2 = 0; j2 <= j1; j2++) for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - ncount++; - } else if(diagonalstyle == 2) { - ncount++; - } else if(diagonalstyle == 3) { - for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) ncount++; - } - + j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) ncount++; return ncount; } diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index 75601b8e17..fd25d35677 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -1208,9 +1208,6 @@ double SNA::memory_usage() bytes += nmax * sizeof(double); // wj bytes += nmax * sizeof(double); // rcutij - printf("SNAP Z list Memory Usage %d\n",idxz_max * sizeof(double) * 2); - printf("SNAP CG list Memory Usage %d\n",idxcg_max * sizeof(double)); - return bytes; } /* ---------------------------------------------------------------------- */ From 67a1a63f5fec8a68f460e07d0c4768838cf83ce2 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 13 Jun 2019 10:10:37 -0600 Subject: [PATCH 19/21] Removed old text --- doc/src/compute_sna_atom.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/src/compute_sna_atom.txt b/doc/src/compute_sna_atom.txt index 10e68f5698..9dca6b1c6f 100644 --- a/doc/src/compute_sna_atom.txt +++ b/doc/src/compute_sna_atom.txt @@ -25,7 +25,6 @@ R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l w_1, w_2,... = list of neighbor weights, one for each type :l zero or more keyword/value pairs may be appended :l keyword = {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l -// {3} = subset satisfying j2 <= j1 <= j {rmin0} value = parameter in distance to angle conversion (distance units) {switchflag} value = {0} or {1} {0} = do not use switching function From 5fb505ca8cbb7f0a8be96ae400eff1a0802eff98 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 13 Jun 2019 10:24:18 -0600 Subject: [PATCH 20/21] Fixed typo --- doc/src/compute_sna_atom.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/compute_sna_atom.txt b/doc/src/compute_sna_atom.txt index 9dca6b1c6f..518d28aec9 100644 --- a/doc/src/compute_sna_atom.txt +++ b/doc/src/compute_sna_atom.txt @@ -200,7 +200,7 @@ for the number of bispectrum components was removed in 2019, since all potentials use the value of 3, corresponding to the above set of bispectrum components. -ompute {snad/atom} evaluates a per-atom array. The columns are +Compute {snad/atom} evaluates a per-atom array. The columns are arranged into {ntypes} blocks, listed in order of atom type {I}. Each block contains three sub-blocks corresponding to the {x}, {y}, and {z} components of the atom position. Each of these sub-blocks contains From 4d7d3a5d530c7b847dd08ca62aa7d091a94e6139 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Thu, 13 Jun 2019 15:56:18 -0600 Subject: [PATCH 21/21] Switched algorithm for compute_yi to one based on zlist ordering --- src/SNAP/sna.cpp | 155 +++++++++++++++++++++++------------------------ 1 file changed, 76 insertions(+), 79 deletions(-) diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index fd25d35677..ec545c51b2 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -398,13 +398,18 @@ void SNA::compute_zi() } /* ---------------------------------------------------------------------- - compute Yi from Ui without storing Zi, looping over ylist + compute Yi from Ui without storing Zi, looping over zlist indices ------------------------------------------------------------------------- */ void SNA::compute_yi(const double* beta) { + int j; + int jjz; + int jju; + double betaj; + for(int j = 0; j <= twojmax; j++) { - int jju = idxu_block[j]; + jju = idxu_block[j]; for(int mb = 0; 2*mb <= j; mb++) for(int ma = 0; ma <= j; ma++) { ylist_r[jju] = 0.0; @@ -413,93 +418,85 @@ void SNA::compute_yi(const double* beta) } // end loop over ma, mb } // end loop over j - for(int jjb = 0; jjb < idxb_max; jjb++) { - const int j1b = idxb[jjb].j1; - const int j2b = idxb[jjb].j2; - const int j3b = idxb[jjb].j; + int ma2, mb2; + for(int jjz = 0; jjz < idxz_max; jjz++) { + const int j1 = idxz[jjz].j1; + const int j2 = idxz[jjz].j2; + const int j = idxz[jjz].j; + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; - compute_yterm(j1b,j2b,j3b,beta); - compute_yterm(j3b,j2b,j1b,beta); - compute_yterm(j3b,j1b,j2b,beta); + const double* cgblock = cglist + idxcg_block[j1][j2][j]; + int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; + int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; - } // end loop over jjb + double ztmp_r = 0.0; + double ztmp_i = 0.0; -} + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { -void SNA::compute_yterm(int j1, int j2, int j, const double* beta) { - double betaj; + double suma1_r = 0.0; + double suma1_i = 0.0; - int jju = idxu_block[j]; - int jjz = idxz_block[j1][j2][j]; + const double* u1_r = &ulisttot_r[jju1]; + const double* u1_i = &ulisttot_i[jju1]; + const double* u2_r = &ulisttot_r[jju2]; + const double* u2_i = &ulisttot_i[jju2]; + + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; + + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); + ma1++; + ma2--; + icga += j2; + } // end loop over ia + + ztmp_r += cgblock[icgb] * suma1_r; + ztmp_i += cgblock[icgb] * suma1_i; + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib + + // apply to z(j1,j2,j,ma,mb) to unique element of y(j) + // find right y_list[jju] and beta[jjb] entries + // multiply and divide by j+1 factors + // account for multiplicity of 1, 2, or 3 + + const int jju = idxz[jjz].jju; // pick out right beta value - if (j >= j1) { - const int jjb = idxb_block[j1][j2][j]; - betaj = beta[jjb]; - } else if (j >= j2) { - const int jjb = idxb_block[j][j2][j1]; - betaj = beta[jjb]*(j1+1)/(j+1.0); - } else { - const int jjb = idxb_block[j2][j][j1]; - betaj = beta[jjb]*(j1+1)/(j+1.0); - } + if (j >= j1) { + const int jjb = idxb_block[j1][j2][j]; + if (j1 == j) { + if (j2 == j) betaj = 3*beta[jjb]; + else betaj = 2*beta[jjb]; + } else betaj = beta[jjb]; + } else if (j >= j2) { + const int jjb = idxb_block[j][j2][j1]; + if (j2 == j) betaj = 2*beta[jjb]*(j1+1)/(j+1.0); + else betaj = beta[jjb]*(j1+1)/(j+1.0); + } else { + const int jjb = idxb_block[j2][j][j1]; + betaj = beta[jjb]*(j1+1)/(j+1.0); + } - // can replace this with a single loop over jjz + ylist_r[jju] += betaj*ztmp_r; + ylist_i[jju] += betaj*ztmp_i; - for (int mb = 0; 2*mb <= j; mb++) - for (int ma = 0; ma <= j; ma++) { - - const int ma1min = idxz[jjz].ma1min; - const int ma2max = idxz[jjz].ma2max; - const int na = idxz[jjz].na; - const int mb1min = idxz[jjz].mb1min; - const int mb2max = idxz[jjz].mb2max; - const int nb = idxz[jjz].nb; - - const double* cgblock = cglist + idxcg_block[j1][j2][j]; - - double ztmp_r = 0.0; - double ztmp_i = 0.0; - - int jju1 = idxu_block[j1] + (j1+1)*mb1min; - int jju2 = idxu_block[j2] + (j2+1)*mb2max; - int icgb = mb1min*(j2+1) + mb2max; - for(int ib = 0; ib < nb; ib++) { - - double suma1_r = 0.0; - double suma1_i = 0.0; - - const double* u1_r = &ulisttot_r[jju1]; - const double* u1_i = &ulisttot_i[jju1]; - const double* u2_r = &ulisttot_r[jju2]; - const double* u2_i = &ulisttot_i[jju2]; - - int ma1 = ma1min; - int ma2 = ma2max; - int icga = ma1min*(j2+1) + ma2max; - - for(int ia = 0; ia < na; ia++) { - suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); - suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); - ma1++; - ma2--; - icga += j2; - } // end loop over ia - - ztmp_r += cgblock[icgb] * suma1_r; - ztmp_i += cgblock[icgb] * suma1_i; - jju1 += j1+1; - jju2 -= j2+1; - icgb += j2; - } // end loop over ib - - // printf("jju betaj ztmp ylist %d %g %g %d %d %d %d %d\n",jju,betaj,ztmp_r,j1,j2,j,ma,mb); - ylist_r[jju] += betaj*ztmp_r; - ylist_i[jju] += betaj*ztmp_i; - jjz++; - jju++; - } // end loop over ma, mb + } // end loop over jjz } /* ----------------------------------------------------------------------