diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh new file mode 100644 index 0000000000..a290fb47bd --- /dev/null +++ b/src/USER-OMP/Install.sh @@ -0,0 +1,51 @@ +# Install/unInstall package files in LAMMPS +# do not install child files if parent does not exist + +for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp; do + # let us see if the "rain man" can count the toothpicks... + ofile=`echo $file | sed -e s,_pppm_tip4p_omp,_long_tip4p_omp, \ + -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.h,\\\\1.h, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.cpp,\\\\1.cpp,` + if (test $1 = 1) then + if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then + : # always install those files. + elif (test ! -e ../$ofile) then + continue + fi + + cp $file .. + + elif (test $1 = 0) then + rm -f ../$file + fi +done + +if (test $1 = 1) then + + if (test -e ../Makefile.package) then + sed -i -e 's/[^ \t]*OMP[^ \t]* //' ../Makefile.package + sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_OMP |' ../Makefile.package + fi + + # force rebuild of files with LMP_USER_OMP switch + + touch ../accelerator_omp.h + + cp thr_data.h .. + cp thr_data.cpp .. + +elif (test $1 = 0) then + + if (test -e ../Makefile.package) then + sed -i -e 's/[^ \t]*OMP[^ \t]* //' ../Makefile.package + fi + + # force rebuild of files with LMP_USER_OMP switch + + touch ../accelerator_omp.h + + rm -f ../thr_data.h + rm -f ../thr_data.cpp + +fi diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh new file mode 100644 index 0000000000..6f577b2791 --- /dev/null +++ b/src/USER-OMP/Package.sh @@ -0,0 +1,46 @@ +# Update package files in LAMMPS +# copy package file to src if it doesn't exists or is different +# do not copy OpenMP style files, if a non-OpenMP version does +# not exist. Do remove OpenMP style files that have no matching +# non-OpenMP version installed, e.g. after a package has been +# removed +for file in *_omp.cpp *_omp.h pppm*proxy.h pppm*proxy.cpp thr_data.h thr_data.cpp; do + # let us see if the "rain man" can count the toothpicks... + ofile=`echo $file | sed -e s,_pppm_tip4p_omp,_long_tip4p_omp, \ + -e s,pppm.\\*_proxy,pppm_omp, -e s,_pppm_omp,_long_omp, \ + -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,` + if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") \ + || (test $file = "thr_data.h") || (test $file = "thr_data.cpp") then + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi + elif (test ! -e ../$ofile) then + if (test -e ../$file) then + echo " removing src/$file" + rm -f ../$file + fi + else + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi + fi +done + +for file in thr_data.h thr_data.cpp; do + if (test ! -e ../$file) then + echo " creating src/$file" + cp $file .. + elif ! cmp -s $file ../$file ; then + echo " updating src/$file" + cp $file .. + fi +done + diff --git a/src/USER-OMP/README b/src/USER-OMP/README new file mode 100644 index 0000000000..46f63f646b --- /dev/null +++ b/src/USER-OMP/README @@ -0,0 +1,19 @@ +This package provides OpenMP multi-threading support and other +optimizations of various LAMMPS pair styles, dihedral styles, and fix +styles. + +See this section of the manual to get started: + +doc/Section_accelerate.html, sub-section 5.2 + +The person who created this package is Axel Kohlmeyer at Temple U +(akohlmey at gmail.com). Contact him directly if you have questions. + +-------------------------- + +This directory also contains a shell script: + +hack_openmp_for_pgi.sh + +which will convert OpenMP directives in src files +into a form compatible with the PGI compiler. diff --git a/src/USER-OMP/angle_charmm_omp.cpp b/src/USER-OMP/angle_charmm_omp.cpp new file mode 100644 index 0000000000..4cc86dd11c --- /dev/null +++ b/src/USER-OMP/angle_charmm_omp.cpp @@ -0,0 +1,196 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_charmm_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + + AngleCharmmOMP::AngleCharmmOMP(class LAMMPS *lmp) + : AngleCharmm(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCharmmOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCharmmOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,tk; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + double delxUB,delyUB,delzUB,rsqUB,rUB,dr,rk,forceUB; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // Urey-Bradley bond + + delxUB = x[i3][0] - x[i1][0]; + delyUB = x[i3][1] - x[i1][1]; + delzUB = x[i3][2] - x[i1][2]; + + rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB; + rUB = sqrt(rsqUB); + + // Urey-Bradley force & energy + + dr = rUB - r_ub[type]; + rk = k_ub[type] * dr; + + if (rUB > 0.0) forceUB = -2.0*rk/rUB; + else forceUB = 0.0; + + if (EFLAG) eangle = rk*dr; + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // harmonic force & energy + + dtheta = acos(c) - theta0[type]; + tk = k[type] * dtheta; + + if (EFLAG) eangle += tk*dtheta; + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2 - delxUB*forceUB; + f1[1] = a11*dely1 + a12*dely2 - delyUB*forceUB; + f1[2] = a11*delz1 + a12*delz2 - delzUB*forceUB; + + f3[0] = a22*delx2 + a12*delx1 + delxUB*forceUB; + f3[1] = a22*dely2 + a12*dely1 + delyUB*forceUB; + f3[2] = a22*delz2 + a12*delz1 + delzUB*forceUB; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_charmm_omp.h b/src/USER-OMP/angle_charmm_omp.h new file mode 100644 index 0000000000..8cbb992c3b --- /dev/null +++ b/src/USER-OMP/angle_charmm_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(charmm/omp,AngleCharmmOMP) + +#else + +#ifndef LMP_ANGLE_CHARMM_OMP_H +#define LMP_ANGLE_CHARMM_OMP_H + +#include "angle_charmm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCharmmOMP : public AngleCharmm, public ThrOMP { + + public: + AngleCharmmOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_class2_omp.cpp b/src/USER-OMP/angle_class2_omp.cpp new file mode 100644 index 0000000000..ef887a93e2 --- /dev/null +++ b/src/USER-OMP/angle_class2_omp.cpp @@ -0,0 +1,240 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleClass2OMP::AngleClass2OMP(class LAMMPS *lmp) + : AngleClass2(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,dtheta2,dtheta3,dtheta4,de_angle; + double dr1,dr2,tk1,tk2,aa1,aa2,aa11,aa12,aa21,aa22; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22,b1,b2; + double vx11,vx12,vy11,vy12,vz11,vz12,vx21,vx22,vy21,vy22,vz21,vz22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy for angle term + + dtheta = acos(c) - theta0[type]; + dtheta2 = dtheta*dtheta; + dtheta3 = dtheta2*dtheta; + dtheta4 = dtheta3*dtheta; + + de_angle = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 + + 4.0*k4[type]*dtheta3; + + a = -de_angle*s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + if (EFLAG) eangle = k2[type]*dtheta2 + k3[type]*dtheta3 + k4[type]*dtheta4; + + // force & energy for bond-bond term + + dr1 = r1 - bb_r1[type]; + dr2 = r2 - bb_r2[type]; + tk1 = bb_k[type] * dr1; + tk2 = bb_k[type] * dr2; + + f1[0] -= delx1*tk2/r1; + f1[1] -= dely1*tk2/r1; + f1[2] -= delz1*tk2/r1; + + f3[0] -= delx2*tk1/r2; + f3[1] -= dely2*tk1/r2; + f3[2] -= delz2*tk1/r2; + + if (EFLAG) eangle += bb_k[type]*dr1*dr2; + + // force & energy for bond-angle term + + aa1 = s * dr1 * ba_k1[type]; + aa2 = s * dr2 * ba_k2[type]; + + aa11 = aa1 * c / rsq1; + aa12 = -aa1 / (r1 * r2); + aa21 = aa2 * c / rsq1; + aa22 = -aa2 / (r1 * r2); + + vx11 = (aa11 * delx1) + (aa12 * delx2); + vx12 = (aa21 * delx1) + (aa22 * delx2); + vy11 = (aa11 * dely1) + (aa12 * dely2); + vy12 = (aa21 * dely1) + (aa22 * dely2); + vz11 = (aa11 * delz1) + (aa12 * delz2); + vz12 = (aa21 * delz1) + (aa22 * delz2); + + aa11 = aa1 * c / rsq2; + aa21 = aa2 * c / rsq2; + + vx21 = (aa11 * delx2) + (aa12 * delx1); + vx22 = (aa21 * delx2) + (aa22 * delx1); + vy21 = (aa11 * dely2) + (aa12 * dely1); + vy22 = (aa21 * dely2) + (aa22 * dely1); + vz21 = (aa11 * delz2) + (aa12 * delz1); + vz22 = (aa21 * delz2) + (aa22 * delz1); + + b1 = ba_k1[type] * dtheta / r1; + b2 = ba_k2[type] * dtheta / r2; + + f1[0] -= vx11 + b1*delx1 + vx12; + f1[1] -= vy11 + b1*dely1 + vy12; + f1[2] -= vz11 + b1*delz1 + vz12; + + f3[0] -= vx21 + b2*delx2 + vx22; + f3[1] -= vy21 + b2*dely2 + vy22; + f3[2] -= vz21 + b2*delz2 + vz22; + + if (EFLAG) eangle += ba_k1[type]*dr1*dtheta + ba_k2[type]*dr2*dtheta; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_class2_omp.h b/src/USER-OMP/angle_class2_omp.h new file mode 100644 index 0000000000..fcfdae51a7 --- /dev/null +++ b/src/USER-OMP/angle_class2_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(class2/omp,AngleClass2OMP) + +#else + +#ifndef LMP_ANGLE_CLASS2_OMP_H +#define LMP_ANGLE_CLASS2_OMP_H + +#include "angle_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleClass2OMP : public AngleClass2, public ThrOMP { + + public: + AngleClass2OMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_delta_omp.cpp b/src/USER-OMP/angle_cosine_delta_omp.cpp new file mode 100644 index 0000000000..caf259b15c --- /dev/null +++ b/src/USER-OMP/angle_cosine_delta_omp.cpp @@ -0,0 +1,189 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_cosine_delta_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleCosineDeltaOMP::AngleCosineDeltaOMP(class LAMMPS *lmp) + : AngleCosineDelta(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCosineDeltaOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineDeltaOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2,theta,dtheta,dcostheta,tk; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,a,cot,a11,a12,a22,b11,b12,b22,c0,s0,s; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + theta = acos(c); + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + cot = c/s; + + // force & energy + + dtheta = theta - theta0[type]; + dcostheta = cos(dtheta); + tk = k[type] * (1.0-dcostheta); + + if (EFLAG) eangle = tk; + + a = -k[type]; + + // expand dtheta for cos and sin contribution to force + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + b11 = -a*c*cot / rsq1; + b12 = a*cot / (r1*r2); + b22 = -a*c*cot / rsq2; + + c0 = cos(theta0[type]); + s0 = sin(theta0[type]); + + f1[0] = (a11*delx1 + a12*delx2)*c0 + (b11*delx1 + b12*delx2)*s0; + f1[1] = (a11*dely1 + a12*dely2)*c0 + (b11*dely1 + b12*dely2)*s0; + f1[2] = (a11*delz1 + a12*delz2)*c0 + (b11*delz1 + b12*delz2)*s0; + f3[0] = (a22*delx2 + a12*delx1)*c0 + (b22*delx2 + b12*delx1)*s0; + f3[1] = (a22*dely2 + a12*dely1)*c0 + (b22*dely2 + b12*dely1)*s0; + f3[2] = (a22*delz2 + a12*delz1)*c0 + (b22*delz2 + b12*delz1)*s0; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_delta_omp.h b/src/USER-OMP/angle_cosine_delta_omp.h new file mode 100644 index 0000000000..57b1d9364d --- /dev/null +++ b/src/USER-OMP/angle_cosine_delta_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/delta/omp,AngleCosineDeltaOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_DELTA_OMP_H +#define LMP_ANGLE_COSINE_DELTA_OMP_H + +#include "angle_cosine_delta.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineDeltaOMP : public AngleCosineDelta, public ThrOMP { + + public: + AngleCosineDeltaOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_omp.cpp b/src/USER-OMP/angle_cosine_omp.cpp new file mode 100644 index 0000000000..51d3494a4f --- /dev/null +++ b/src/USER-OMP/angle_cosine_omp.cpp @@ -0,0 +1,166 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_cosine_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleCosineOMP::AngleCosineOMP(class LAMMPS *lmp) + : AngleCosine(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCosineOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + + if (EFLAG) eangle = k[type]*(1.0+c); + + a = k[type]; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_omp.h b/src/USER-OMP/angle_cosine_omp.h new file mode 100644 index 0000000000..16eb3ff944 --- /dev/null +++ b/src/USER-OMP/angle_cosine_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/omp,AngleCosineOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_OMP_H +#define LMP_ANGLE_COSINE_OMP_H + +#include "angle_cosine.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineOMP : public AngleCosine, public ThrOMP { + + public: + AngleCosineOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp new file mode 100644 index 0000000000..6c5932e70c --- /dev/null +++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp @@ -0,0 +1,204 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_cosine_periodic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleCosinePeriodicOMP::AngleCosinePeriodicOMP(class LAMMPS *lmp) + : AngleCosinePeriodic(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCosinePeriodicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i,i1,i2,i3,n,m,type,b_factor; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + double tn,tn_1,tn_2,un,un_1,un_2; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + m = multiplicity[type]; + b_factor = b[type]; + + // cos(n*x) = Tn(cos(x)) + // Tn(x) = Chebyshev polynomials of the first kind: T_0 = 1, T_1 = x, ... + // recurrence relationship: + // Tn(x) = 2*x*T[n-1](x) - T[n-2](x) where T[-1](x) = 0 + // also, dTn(x)/dx = n*U[n-1](x) + // where Un(x) = 2*x*U[n-1](x) - U[n-2](x) and U[-1](x) = 0 + // finally need to handle special case for n = 1 + + tn = 1.0; + tn_1 = 1.0; + tn_2 = 0.0; + un = 1.0; + un_1 = 2.0; + un_2 = 0.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy + + tn_2 = c; + for (i = 1; i <= m; i++) { + tn = 2*c*tn_1 - tn_2; + tn_2 = tn_1; + tn_1 = tn; + } + + for (i = 2; i <= m; i++) { + un = 2*c*un_1 - un_2; + un_2 = un_1; + un_1 = un; + } + tn = b_factor*pow(-1.0,(double)m)*tn; + un = b_factor*pow(-1.0,(double)m)*m*un; + + if (EFLAG) eangle = 2*k[type]*(1.0 - tn); + + a = -k[type]*un; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_periodic_omp.h b/src/USER-OMP/angle_cosine_periodic_omp.h new file mode 100644 index 0000000000..3c7da10ceb --- /dev/null +++ b/src/USER-OMP/angle_cosine_periodic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/periodic/omp,AngleCosinePeriodicOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_PERIODIC_OMP_H +#define LMP_ANGLE_COSINE_PERIODIC_OMP_H + +#include "angle_cosine_periodic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosinePeriodicOMP : public AngleCosinePeriodic, public ThrOMP { + + public: + AngleCosinePeriodicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp new file mode 100644 index 0000000000..18af91cf7d --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp @@ -0,0 +1,187 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_cosine_shift_exp_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleCosineShiftExpOMP::AngleCosineShiftExpOMP(class LAMMPS *lmp) + : AngleCosineShiftExp(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCosineShiftExpOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3],ff; + double rsq1,rsq2,r1,r2,c,s,a11,a12,a22; + double exp2,aa,uumin,cccpsss,cssmscc; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // C= sine of angle + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // force & energy + + aa=a[type]; + uumin=umin[type]; + + cccpsss = c*cost[type]+s*sint[type]; + cssmscc = c*sint[type]-s*cost[type]; + + if (doExpansion[type]) + { // |a|<0.01 so use expansions relative precision <1e-5 +// std::cout << "Using expansion\n"; + if (EFLAG) eangle = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin; + ff=0.25*uumin*cssmscc*(2+aa*cccpsss)/s; + } + else + { +// std::cout << "Not using expansion\n"; + exp2=exp(0.5*aa*(1+cccpsss)); + if (EFLAG) eangle = opt1[type]*(1-exp2); + ff=0.5*a[type]*opt1[type]*exp2*cssmscc/s; + } + + a11 = ff*c/ rsq1; + a12 = -ff / (r1*r2); + a22 = ff*c/ rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.h b/src/USER-OMP/angle_cosine_shift_exp_omp.h new file mode 100644 index 0000000000..aba70f8a3c --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_exp_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/shift/exp/omp,AngleCosineShiftExpOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H +#define LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H + +#include "angle_cosine_shift_exp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineShiftExpOMP : public AngleCosineShiftExp, public ThrOMP { + + public: + AngleCosineShiftExpOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_shift_omp.cpp b/src/USER-OMP/angle_cosine_shift_omp.cpp new file mode 100644 index 0000000000..a7f40fee8a --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_omp.cpp @@ -0,0 +1,172 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_cosine_shift_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleCosineShiftOMP::AngleCosineShiftOMP(class LAMMPS *lmp) + : AngleCosineShift(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCosineShiftOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineShiftOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,s,cps,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + double eangle = 0.0; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // C= sine of angle + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // force & energy + const double kcos=kcost[type]; + const double ksin=ksint[type]; + if (EFLAG) eangle = -k[type]-kcos*c-ksin*s; + + cps = c/s; // NOTE absorbed one c + + a11 = (-kcos +ksin*cps )*c/ rsq1; + a12 = ( kcos -ksin*cps ) / (r1*r2); + a22 = (-kcos +ksin*cps )*c/ rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_shift_omp.h b/src/USER-OMP/angle_cosine_shift_omp.h new file mode 100644 index 0000000000..09402fb341 --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/shift/omp,AngleCosineShiftOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_SHIFT_OMP_H +#define LMP_ANGLE_COSINE_SHIFT_OMP_H + +#include "angle_cosine_shift.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineShiftOMP : public AngleCosineShift, public ThrOMP { + + public: + AngleCosineShiftOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_squared_omp.cpp b/src/USER-OMP/angle_cosine_squared_omp.cpp new file mode 100644 index 0000000000..e8eaff6b2a --- /dev/null +++ b/src/USER-OMP/angle_cosine_squared_omp.cpp @@ -0,0 +1,171 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_cosine_squared_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleCosineSquaredOMP::AngleCosineSquaredOMP(class LAMMPS *lmp) + : AngleCosineSquared(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleCosineSquaredOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineSquaredOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dcostheta,tk; + double rsq1,rsq2,r1,r2,c,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + + dcostheta = c - cos(theta0[type]); + tk = k[type] * dcostheta; + + if (EFLAG) eangle = tk*dcostheta; + + a = 2.0 * tk; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_squared_omp.h b/src/USER-OMP/angle_cosine_squared_omp.h new file mode 100644 index 0000000000..758863e612 --- /dev/null +++ b/src/USER-OMP/angle_cosine_squared_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/squared/omp,AngleCosineSquaredOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_SQUARED_OMP_H +#define LMP_ANGLE_COSINE_SQUARED_OMP_H + +#include "angle_cosine_squared.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineSquaredOMP : public AngleCosineSquared, public ThrOMP { + + public: + AngleCosineSquaredOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_dipole_omp.cpp b/src/USER-OMP/angle_dipole_omp.cpp new file mode 100644 index 0000000000..025ffcd16c --- /dev/null +++ b/src/USER-OMP/angle_dipole_omp.cpp @@ -0,0 +1,125 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_dipole_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleDipoleOMP::AngleDipoleOMP(class LAMMPS *lmp) + : AngleDipole(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleDipoleOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + if (!force->newton_bond) + error->all(FLERR,"'newton' flag for bonded interactions must be 'on'"); + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (eflag) + eval<1>(ifrom, ito, thr); + else + eval<0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region + +} + +template +void AngleDipoleOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int iRef,iDip,iDummy,n,type; + double delx,dely,delz; + double eangle,tangle; + double r,cosGamma,deltaGamma,kdg,rmu; + + const double * const * const x = atom->x; // position vector + const double * const * const mu = atom->mu; // point-dipole components and moment magnitude + double * const * const torque = thr->get_torque(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + const double f1[3] = {0.0, 0.0, 0.0}; + const double f3[3] = {0.0, 0.0, 0.0}; + + for (n = nfrom; n < nto; n++) { + iDip = anglelist[n][0]; // dipole whose orientation is to be restrained + iRef = anglelist[n][1]; // reference atom toward which dipole will point + iDummy = anglelist[n][2]; // dummy atom - irrelevant to the interaction + type = anglelist[n][3]; + + delx = x[iRef][0] - x[iDip][0]; + dely = x[iRef][1] - x[iDip][1]; + delz = x[iRef][2] - x[iDip][2]; + + r = sqrt(delx*delx + dely*dely + delz*delz); + + rmu = r * mu[iDip][3]; + cosGamma = (mu[iDip][0]*delx+mu[iDip][1]*dely+mu[iDip][2]*delz) / rmu; + deltaGamma = cosGamma - cos(gamma0[type]); + kdg = k[type] * deltaGamma; + + if (EFLAG) eangle = kdg * deltaGamma; // energy + + tangle = 2.0 * kdg / rmu; + + torque[iDip][0] += tangle * (dely*mu[iDip][2] - delz*mu[iDip][1]); + torque[iDip][1] += tangle * (delz*mu[iDip][0] - delx*mu[iDip][2]); + torque[iDip][2] += tangle * (delx*mu[iDip][1] - dely*mu[iDip][0]); + + if (EFLAG) // tally energy (virial=0 because force=0) + ev_tally_thr(this,iRef,iDip,iDummy,nlocal,/* NEWTON_BOND */ 1, + eangle,f1,f3,0.0,0.0,0.0,0.0,0.0,0.0,thr); + } +} diff --git a/src/USER-OMP/angle_dipole_omp.h b/src/USER-OMP/angle_dipole_omp.h new file mode 100644 index 0000000000..ab7133da34 --- /dev/null +++ b/src/USER-OMP/angle_dipole_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(dipole/omp,AngleDipoleOMP) + +#else + +#ifndef LMP_ANGLE_DIPOLE_OMP_H +#define LMP_ANGLE_DIPOLE_OMP_H + +#include "angle_dipole.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleDipoleOMP : public AngleDipole, public ThrOMP { + + public: + AngleDipoleOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_harmonic_omp.cpp b/src/USER-OMP/angle_harmonic_omp.cpp new file mode 100644 index 0000000000..c30f830b77 --- /dev/null +++ b/src/USER-OMP/angle_harmonic_omp.cpp @@ -0,0 +1,175 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleHarmonicOMP::AngleHarmonicOMP(class LAMMPS *lmp) + : AngleHarmonic(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,tk; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy + + dtheta = acos(c) - theta0[type]; + tk = k[type] * dtheta; + + if (EFLAG) eangle = tk*dtheta; + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_harmonic_omp.h b/src/USER-OMP/angle_harmonic_omp.h new file mode 100644 index 0000000000..80c8f491aa --- /dev/null +++ b/src/USER-OMP/angle_harmonic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(harmonic/omp,AngleHarmonicOMP) + +#else + +#ifndef LMP_ANGLE_HARMONIC_OMP_H +#define LMP_ANGLE_HARMONIC_OMP_H + +#include "angle_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleHarmonicOMP : public AngleHarmonic, public ThrOMP { + + public: + AngleHarmonicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_sdk_omp.cpp b/src/USER-OMP/angle_sdk_omp.cpp new file mode 100644 index 0000000000..bd780e0fe0 --- /dev/null +++ b/src/USER-OMP/angle_sdk_omp.cpp @@ -0,0 +1,230 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_sdk_omp.h" +#include "atom.h" +#include "neighbor.h" +#include "domain.h" +#include "comm.h" +#include "force.h" +#include "math_const.h" + +#include + +#include "lj_sdk_common.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace LJSDKParms; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleSDKOMP::AngleSDKOMP(class LAMMPS *lmp) + : AngleSDK(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleSDKOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleSDKOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3; + double eangle,f1[3],f3[3],e13,f13; + double dtheta,tk; + double rsq1,rsq2,rsq3,r1,r2,c,s,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // 1-3 LJ interaction. + // we only want to use the repulsive part, + // and it can be scaled (or off). + // so this has to be done here and not in the + // general non-bonded code. + + f13 = e13 = delx3 = dely3 = delz3 = 0.0; + + if (repflag) { + + delx3 = x[i1][0] - x[i3][0]; + dely3 = x[i1][1] - x[i3][1]; + delz3 = x[i1][2] - x[i3][2]; + rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3; + + const int type1 = atom->type[i1]; + const int type3 = atom->type[i3]; + + if (rsq3 < rminsq[type1][type3]) { + const int ljt = lj_type[type1][type3]; + const double r2inv = 1.0/rsq3; + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + + f13 = r4inv*(lj1[type1][type3]*r4inv*r4inv - lj2[type1][type3]); + if (EFLAG) e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]); + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + + f13 = r6inv*(lj1[type1][type3]*r3inv - lj2[type1][type3]); + if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]); + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + + f13 = r6inv*(lj1[type1][type3]*r6inv - lj2[type1][type3]); + if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]); + } + + // make sure energy is 0.0 at the cutoff. + if (EFLAG) e13 -= emin[type1][type3]; + + f13 *= r2inv; + } + } + + // force & energy + + dtheta = acos(c) - theta0[type]; + tk = k[type] * dtheta; + + if (EFLAG) eangle = tk*dtheta; + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of the 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0] + f13*delx3; + f[i1][1] += f1[1] + f13*dely3; + f[i1][2] += f1[2] + f13*delz3; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0] - f13*delx3; + f[i3][1] += f3[1] - f13*dely3; + f[i3][2] += f3[2] - f13*delz3; + } + + if (EVFLAG) { + ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + if (repflag) ev_tally13_thr(this,i1,i3,nlocal,NEWTON_BOND, + e13,f13,delx3,dely3,delz3,thr); + } + } +} diff --git a/src/USER-OMP/angle_sdk_omp.h b/src/USER-OMP/angle_sdk_omp.h new file mode 100644 index 0000000000..9ab75904ce --- /dev/null +++ b/src/USER-OMP/angle_sdk_omp.h @@ -0,0 +1,47 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(sdk/omp,AngleSDKOMP) +AngleStyle(cg/cmm/omp,AngleSDKOMP) + +#else + +#ifndef LMP_ANGLE_SDK_OMP_H +#define LMP_ANGLE_SDK_OMP_H + +#include "angle_sdk.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleSDKOMP : public AngleSDK, public ThrOMP { + + public: + AngleSDKOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_table_omp.cpp b/src/USER-OMP/angle_table_omp.cpp new file mode 100644 index 0000000000..15c32c8943 --- /dev/null +++ b/src/USER-OMP/angle_table_omp.cpp @@ -0,0 +1,175 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "angle_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +AngleTableOMP::AngleTableOMP(class LAMMPS *lmp) + : AngleTable(lmp), ThrOMP(lmp,THR_ANGLE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void AngleTableOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleTableOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double eangle,f1[3],f3[3]; + double delx1,dely1,delz1,delx2,dely2,delz2; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + double theta,u,mdu; //mdu: minus du, -du/dx=f + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // tabulated force & energy + + theta = acos(c); + uf_lookup(type,theta,u,mdu); + + if (EFLAG) eangle = u; + + a = mdu * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_table_omp.h b/src/USER-OMP/angle_table_omp.h new file mode 100644 index 0000000000..97c9bb65a7 --- /dev/null +++ b/src/USER-OMP/angle_table_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(table/omp,AngleTableOMP) + +#else + +#ifndef LMP_ANGLE_TABLE_OMP_H +#define LMP_ANGLE_TABLE_OMP_H + +#include "angle_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleTableOMP : public AngleTable, public ThrOMP { + + public: + AngleTableOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_class2_omp.cpp b/src/USER-OMP/bond_class2_omp.cpp new file mode 100644 index 0000000000..580d9b9804 --- /dev/null +++ b/src/USER-OMP/bond_class2_omp.cpp @@ -0,0 +1,132 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + + +/* ---------------------------------------------------------------------- */ + +BondClass2OMP::BondClass2OMP(class LAMMPS *lmp) + : BondClass2(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,dr2,dr3,dr4,de_bond; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + dr2 = dr*dr; + dr3 = dr2*dr; + dr4 = dr3*dr; + + // force & energy + + de_bond = 2.0*k2[type]*dr + 3.0*k3[type]*dr2 + 4.0*k4[type]*dr3; + if (r > 0.0) fbond = -de_bond/r; + else fbond = 0.0; + + if (EFLAG) ebond = k2[type]*dr2 + k3[type]*dr3 + k4[type]*dr4; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_class2_omp.h b/src/USER-OMP/bond_class2_omp.h new file mode 100644 index 0000000000..845e28f592 --- /dev/null +++ b/src/USER-OMP/bond_class2_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(class2/omp,BondClass2OMP) + +#else + +#ifndef LMP_BOND_CLASS2_OMP_H +#define LMP_BOND_CLASS2_OMP_H + +#include "bond_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondClass2OMP : public BondClass2, public ThrOMP { + + public: + BondClass2OMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_fene_expand_omp.cpp b/src/USER-OMP/bond_fene_expand_omp.cpp new file mode 100644 index 0000000000..4706ef8232 --- /dev/null +++ b/src/USER-OMP/bond_fene_expand_omp.cpp @@ -0,0 +1,162 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_fene_expand_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" +#include "error.h" +#include "update.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondFENEExpandOMP::BondFENEExpandOMP(class LAMMPS *lmp) + : BondFENEExpand(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondFENEExpandOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondFENEExpandOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r0sq,rlogarg,sr2,sr6; + double r,rshift,rshiftsq; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + rshift = r - shift[type]; + rshiftsq = rshift*rshift; + r0sq = r0[type] * r0[type]; + rlogarg = 1.0 - rshiftsq/r0sq; + + // if r -> r0, then rlogarg < 0.0 which is an error + // issue a warning and reset rlogarg = epsilon + // if r > 2*r0 something serious is wrong, abort + + if (rlogarg < 0.1) { + char str[128]; + + sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g", + update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq)); + error->warning(FLERR,str,0); + + if (check_error_thr((rlogarg <= -3.0),tid,FLERR,"Bad FENE bond")) + return; + + rlogarg = 0.1; + } + + fbond = -k[type]*rshift/rlogarg/r; + + // force from LJ term + + if (rshiftsq < TWO_1_3*sigma[type]*sigma[type]) { + sr2 = sigma[type]*sigma[type]/rshiftsq; + sr6 = sr2*sr2*sr2; + fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rshift/r; + } + + // energy + + if (EFLAG) { + ebond = -0.5 * k[type]*r0sq*log(rlogarg); + if (rshiftsq < TWO_1_3*sigma[type]*sigma[type]) + ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type]; + } + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_fene_expand_omp.h b/src/USER-OMP/bond_fene_expand_omp.h new file mode 100644 index 0000000000..317b81e5de --- /dev/null +++ b/src/USER-OMP/bond_fene_expand_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(fene/expand/omp,BondFENEExpandOMP) + +#else + +#ifndef LMP_BOND_FENE_EXPAND_OMP_H +#define LMP_BOND_FENE_EXPAND_OMP_H + +#include "bond_fene_expand.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondFENEExpandOMP : public BondFENEExpand, public ThrOMP { + + public: + BondFENEExpandOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_fene_omp.cpp b/src/USER-OMP/bond_fene_omp.cpp new file mode 100644 index 0000000000..34f90ea3a0 --- /dev/null +++ b/src/USER-OMP/bond_fene_omp.cpp @@ -0,0 +1,158 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_fene_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" +#include "error.h" +#include "update.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondFENEOMP::BondFENEOMP(class LAMMPS *lmp) + : BondFENE(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondFENEOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondFENEOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r0sq,rlogarg,sr2,sr6; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r0sq = r0[type] * r0[type]; + rlogarg = 1.0 - rsq/r0sq; + + // if r -> r0, then rlogarg < 0.0 which is an error + // issue a warning and reset rlogarg = epsilon + // if r > 2*r0 something serious is wrong, abort + + if (rlogarg < 0.1) { + char str[128]; + + sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g", + update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq)); + error->warning(FLERR,str,0); + + if (check_error_thr((rlogarg <= -3.0),tid,FLERR,"Bad FENE bond")) + return; + + rlogarg = 0.1; + } + + fbond = -k[type]/rlogarg; + + // force from LJ term + + if (rsq < TWO_1_3*sigma[type]*sigma[type]) { + sr2 = sigma[type]*sigma[type]/rsq; + sr6 = sr2*sr2*sr2; + fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rsq; + } + + // energy + + if (EFLAG) { + ebond = -0.5 * k[type]*r0sq*log(rlogarg); + if (rsq < TWO_1_3*sigma[type]*sigma[type]) + ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type]; + } + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_fene_omp.h b/src/USER-OMP/bond_fene_omp.h new file mode 100644 index 0000000000..13e843fe13 --- /dev/null +++ b/src/USER-OMP/bond_fene_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(fene/omp,BondFENEOMP) + +#else + +#ifndef LMP_BOND_FENE_OMP_H +#define LMP_BOND_FENE_OMP_H + +#include "bond_fene.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondFENEOMP : public BondFENE, public ThrOMP { + + public: + BondFENEOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_harmonic_omp.cpp b/src/USER-OMP/bond_harmonic_omp.cpp new file mode 100644 index 0000000000..dc8bab1c54 --- /dev/null +++ b/src/USER-OMP/bond_harmonic_omp.cpp @@ -0,0 +1,128 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondHarmonicOMP::BondHarmonicOMP(class LAMMPS *lmp) + : BondHarmonic(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,rk; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + rk = k[type] * dr; + + // force & energy + + if (r > 0.0) fbond = -2.0*rk/r; + else fbond = 0.0; + + if (EFLAG) ebond = rk*dr; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_harmonic_omp.h b/src/USER-OMP/bond_harmonic_omp.h new file mode 100644 index 0000000000..0a81b52f41 --- /dev/null +++ b/src/USER-OMP/bond_harmonic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(harmonic/omp,BondHarmonicOMP) + +#else + +#ifndef LMP_BOND_HARMONIC_OMP_H +#define LMP_BOND_HARMONIC_OMP_H + +#include "bond_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondHarmonicOMP : public BondHarmonic, public ThrOMP { + + public: + BondHarmonicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp new file mode 100644 index 0000000000..97f7715bfb --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp @@ -0,0 +1,132 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_harmonic_shift_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondHarmonicShiftCutOMP::BondHarmonicShiftCutOMP(class LAMMPS *lmp) + : BondHarmonicShiftCut(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondHarmonicShiftCutOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondHarmonicShiftCutOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,rk; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + + if (r>r1[type]) continue; + + dr = r - r0[type]; + rk = k[type] * dr; + + // force & energy + + if (r > 0.0) fbond = -2.0*rk/r; + else fbond = 0.0; + + if (EFLAG) + ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) ); + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.h b/src/USER-OMP/bond_harmonic_shift_cut_omp.h new file mode 100644 index 0000000000..f465ff5b3a --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(harmonic/shift/cut/omp,BondHarmonicShiftCutOMP) + +#else + +#ifndef LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H +#define LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H + +#include "bond_harmonic_shift_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondHarmonicShiftCutOMP : public BondHarmonicShiftCut, public ThrOMP { + + public: + BondHarmonicShiftCutOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_harmonic_shift_omp.cpp b/src/USER-OMP/bond_harmonic_shift_omp.cpp new file mode 100644 index 0000000000..49d985b871 --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp @@ -0,0 +1,129 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_harmonic_shift_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondHarmonicShiftOMP::BondHarmonicShiftOMP(class LAMMPS *lmp) + : BondHarmonicShift(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondHarmonicShiftOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondHarmonicShiftOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,rk; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + rk = k[type] * dr; + + // force & energy + + if (r > 0.0) fbond = -2.0*rk/r; + else fbond = 0.0; + + if (EFLAG) + ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) ); + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_harmonic_shift_omp.h b/src/USER-OMP/bond_harmonic_shift_omp.h new file mode 100644 index 0000000000..58ee6ce23d --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(harmonic/shift/omp,BondHarmonicShiftOMP) + +#else + +#ifndef LMP_BOND_HARMONIC_SHIFT_OMP_H +#define LMP_BOND_HARMONIC_SHIFT_OMP_H + +#include "bond_harmonic_shift.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondHarmonicShiftOMP : public BondHarmonicShift, public ThrOMP { + + public: + BondHarmonicShiftOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_morse_omp.cpp b/src/USER-OMP/bond_morse_omp.cpp new file mode 100644 index 0000000000..21063c636a --- /dev/null +++ b/src/USER-OMP/bond_morse_omp.cpp @@ -0,0 +1,129 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_morse_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondMorseOMP::BondMorseOMP(class LAMMPS *lmp) + : BondMorse(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondMorseOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondMorseOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,ralpha; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + + dr = r - r0[type]; + ralpha = exp(-alpha[type]*dr); + + // force & energy + + if (r > 0.0) fbond = -2.0*d0[type]*alpha[type]*(1-ralpha)*ralpha/r; + else fbond = 0.0; + + if (EFLAG) ebond = d0[type]*(1-ralpha)*(1-ralpha); + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_morse_omp.h b/src/USER-OMP/bond_morse_omp.h new file mode 100644 index 0000000000..09d3a4458d --- /dev/null +++ b/src/USER-OMP/bond_morse_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(morse/omp,BondMorseOMP) + +#else + +#ifndef LMP_BOND_MORSE_OMP_H +#define LMP_BOND_MORSE_OMP_H + +#include "bond_morse.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondMorseOMP : public BondMorse, public ThrOMP { + + public: + BondMorseOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_nonlinear_omp.cpp b/src/USER-OMP/bond_nonlinear_omp.cpp new file mode 100644 index 0000000000..9e61ca3215 --- /dev/null +++ b/src/USER-OMP/bond_nonlinear_omp.cpp @@ -0,0 +1,129 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_nonlinear_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondNonlinearOMP::BondNonlinearOMP(class LAMMPS *lmp) + : BondNonlinear(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondNonlinearOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondNonlinearOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,drsq,lamdasq,denom,denomsq; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + drsq = dr*dr; + lamdasq = lamda[type]*lamda[type]; + denom = lamdasq - drsq; + denomsq = denom*denom; + + // force & energy + + fbond = -epsilon[type]/r * 2.0*dr*lamdasq/denomsq; + if (EFLAG) ebond = epsilon[type] * drsq / denom; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_nonlinear_omp.h b/src/USER-OMP/bond_nonlinear_omp.h new file mode 100644 index 0000000000..fcb0fa325d --- /dev/null +++ b/src/USER-OMP/bond_nonlinear_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(nonlinear/omp,BondNonlinearOMP) + +#else + +#ifndef LMP_BOND_NONLINEAR_OMP_H +#define LMP_BOND_NONLINEAR_OMP_H + +#include "bond_nonlinear.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondNonlinearOMP : public BondNonlinear, public ThrOMP { + + public: + BondNonlinearOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_quartic_omp.cpp b/src/USER-OMP/bond_quartic_omp.cpp new file mode 100644 index 0000000000..79d7621a61 --- /dev/null +++ b/src/USER-OMP/bond_quartic_omp.cpp @@ -0,0 +1,197 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_quartic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" +#include "pair.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondQuarticOMP::BondQuarticOMP(class LAMMPS *lmp) + : BondQuartic(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondQuarticOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + // insure pair->ev_tally() will use 1-4 virial contribution + + if (vflag_global == 2) + force->pair->vflag_either = force->pair->vflag_global = 1; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondQuarticOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,m,type,itype,jtype; + double delx,dely,delz,ebond,fbond,evdwl,fpair; + double r,rsq,dr,r2,ra,rb,sr2,sr6; + + ebond = evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int * const * const bondlist = neighbor->bondlist; + const double * const * const cutsq = force->pair->cutsq; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + + // skip bond if already broken + + if (bondlist[n][2] <= 0) continue; + + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + + // if bond breaks, set type to 0 + // both in temporary bondlist and permanent bond_type + // if this proc owns both atoms, + // negate bond_type twice if other atom stores it + // if other proc owns 2nd atom, other proc will also break bond + + if (rsq > rc[type]*rc[type]) { + bondlist[n][2] = 0; + for (m = 0; m < atom->num_bond[i1]; m++) + if (atom->bond_atom[i1][m] == atom->tag[i2]) + atom->bond_type[i1][m] = 0; + if (i2 < atom->nlocal) + for (m = 0; m < atom->num_bond[i2]; m++) + if (atom->bond_atom[i2][m] == atom->tag[i1]) + atom->bond_type[i2][m] = 0; + continue; + } + + // quartic bond + // 1st portion is from quartic term + // 2nd portion is from LJ term cut at 2^(1/6) with eps = sigma = 1.0 + + r = sqrt(rsq); + dr = r - rc[type]; + r2 = dr*dr; + ra = dr - b1[type]; + rb = dr - b2[type]; + fbond = -k[type]/r * (r2*(ra+rb) + 2.0*dr*ra*rb); + + if (rsq < TWO_1_3) { + sr2 = 1.0/rsq; + sr6 = sr2*sr2*sr2; + fbond += 48.0*sr6*(sr6-0.5)/rsq; + } + + if (EFLAG) { + ebond = k[type]*r2*ra*rb + u0[type]; + if (rsq < TWO_1_3) ebond += 4.0*sr6*(sr6-1.0) + 1.0; + } + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,ebond,fbond,delx,dely,delz,thr); + + // subtract out pairwise contribution from 2 atoms via pair->single() + // required since special_bond = 1,1,1 + // tally energy/virial in pair, using newton_bond as newton flag + + itype = atom->type[i1]; + jtype = atom->type[i2]; + + if (rsq < cutsq[itype][jtype]) { + evdwl = -force->pair->single(i1,i2,itype,jtype,rsq,1.0,1.0,fpair); + fpair = -fpair; + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fpair; + f[i1][1] += dely*fpair; + f[i1][2] += delz*fpair; + } + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fpair; + f[i2][1] -= dely*fpair; + f[i2][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(force->pair,i1,i2,nlocal,NEWTON_BOND, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} diff --git a/src/USER-OMP/bond_quartic_omp.h b/src/USER-OMP/bond_quartic_omp.h new file mode 100644 index 0000000000..3a249a6cac --- /dev/null +++ b/src/USER-OMP/bond_quartic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(quartic/omp,BondQuarticOMP) + +#else + +#ifndef LMP_BOND_QUARTIC_OMP_H +#define LMP_BOND_QUARTIC_OMP_H + +#include "bond_quartic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondQuarticOMP : public BondQuartic, public ThrOMP { + + public: + BondQuarticOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_table_omp.cpp b/src/USER-OMP/bond_table_omp.cpp new file mode 100644 index 0000000000..3184991315 --- /dev/null +++ b/src/USER-OMP/bond_table_omp.cpp @@ -0,0 +1,126 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "bond_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +BondTableOMP::BondTableOMP(class LAMMPS *lmp) + : BondTable(lmp), ThrOMP(lmp,THR_BOND) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void BondTableOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondTableOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r; + double u,mdu; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + + // force & energy + + uf_lookup(type,r,u,mdu); + fbond = mdu/r; + ebond = u; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_table_omp.h b/src/USER-OMP/bond_table_omp.h new file mode 100644 index 0000000000..37e057ee26 --- /dev/null +++ b/src/USER-OMP/bond_table_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(table/omp,BondTableOMP) + +#else + +#ifndef LMP_BOND_TABLE_OMP_H +#define LMP_BOND_TABLE_OMP_H + +#include "bond_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondTableOMP : public BondTable, public ThrOMP { + + public: + BondTableOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp new file mode 100644 index 0000000000..b069af43c5 --- /dev/null +++ b/src/USER-OMP/dihedral_charmm_omp.cpp @@ -0,0 +1,322 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "dihedral_charmm_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "pair.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +DihedralCharmmOMP::DihedralCharmmOMP(class LAMMPS *lmp) + : DihedralCharmm(lmp), ThrOMP(lmp,THR_DIHEDRAL|THR_CHARMM) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralCharmmOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + // insure pair->ev_tally() will use 1-4 virial contribution + + if (weightflag && vflag_global == 2) + force->pair->vflag_either = force->pair->vflag_global = 1; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralCharmmOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,i,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,df1,ddf1,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,p,sx2,sy2,sz2; + int itype,jtype; + double delx,dely,delz,rsq,r2inv,r6inv; + double forcecoul,forcelj,fpair,ecoul,evdwl; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const atomtype = atom->type; + const int * const * const dihedrallist = neighbor->dihedrallist; + const double qqrd2e = force->qqrd2e; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c,s calculation + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + m = multiplicity[type]; + p = 1.0; + ddf1 = df1 = 0.0; + + for (i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*cos_shift[type] + df1*sin_shift[type]; + df1 = df1*cos_shift[type] - ddf1*sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + cos_shift[type]; + df1 = 0.0; + } + + if (EFLAG) edihedral = k[type] * p; + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + df = -k[type] * df1; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + // 1-4 LJ and Coulomb interactions + // tally energy/virial in pair, using newton_bond as newton flag + + if (weight[type] > 0.0) { + itype = atomtype[i1]; + jtype = atomtype[i4]; + + delx = x[i1][0] - x[i4][0]; + dely = x[i1][1] - x[i4][1]; + delz = x[i1][2] - x[i4][2]; + rsq = delx*delx + dely*dely + delz*delz; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + + if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; + else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); + forcelj = r6inv * (lj14_1[itype][jtype]*r6inv - lj14_2[itype][jtype]); + fpair = weight[type] * (forcelj+forcecoul)*r2inv; + + if (EFLAG) { + ecoul = weight[type] * forcecoul; + evdwl = r6inv * (lj14_3[itype][jtype]*r6inv - lj14_4[itype][jtype]); + evdwl *= weight[type]; + } + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fpair; + f[i1][1] += dely*fpair; + f[i1][2] += delz*fpair; + } + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] -= delx*fpair; + f[i4][1] -= dely*fpair; + f[i4][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } +} diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h new file mode 100644 index 0000000000..cd0ae5f8ea --- /dev/null +++ b/src/USER-OMP/dihedral_charmm_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(charmm/omp,DihedralCharmmOMP) + +#else + +#ifndef LMP_DIHEDRAL_CHARMM_OMP_H +#define LMP_DIHEDRAL_CHARMM_OMP_H + +#include "dihedral_charmm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP { + + public: + DihedralCharmmOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp new file mode 100644 index 0000000000..d21c2869e9 --- /dev/null +++ b/src/USER-OMP/dihedral_class2_omp.cpp @@ -0,0 +1,531 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.0000001 + +/* ---------------------------------------------------------------------- */ + +DihedralClass2OMP::DihedralClass2OMP(class LAMMPS *lmp) + : DihedralClass2(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,i,j,k,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral; + double r1mag2,r1,r2mag2,r2,r3mag2,r3; + double sb1,rb1,sb2,rb2,sb3,rb3,c0,r12c1; + double r12c2,costh12,costh13,costh23,sc1,sc2,s1,s2,c; + double cosphi,phi,sinphi,a11,a22,a33,a12,a13,a23,sx1,sx2; + double sx12,sy1,sy2,sy12,sz1,sz2,sz12,dphi1,dphi2,dphi3; + double de_dihedral,t1,t2,t3,t4,cos2phi,cos3phi,bt1,bt2; + double bt3,sumbte,db,sumbtf,at1,at2,at3,da,da1,da2,r1_0; + double r3_0,dr1,dr2,tk1,tk2,s12,sin2; + double dcosphidr[4][3],dphidr[4][3],dbonddr[3][4][3],dthetadr[2][4][3]; + double fabcd[4][3]; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // distances + + r1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + r1 = sqrt(r1mag2); + r2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + r2 = sqrt(r2mag2); + r3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + r3 = sqrt(r3mag2); + + sb1 = 1.0/r1mag2; + rb1 = 1.0/r1; + sb2 = 1.0/r2mag2; + rb2 = 1.0/r2; + sb3 = 1.0/r3mag2; + rb3 = 1.0/r3; + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // angles + + r12c1 = rb1*rb2; + r12c2 = rb2*rb3; + costh12 = (vb1x*vb2x + vb1y*vb2y + vb1z*vb2z) * r12c1; + costh13 = c0; + costh23 = (vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z) * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - costh12*costh12,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - costh23*costh23,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + costh12*costh23) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + cosphi = c; + phi = acos(c); + + sinphi = sqrt(1.0 - c*c); + sinphi = MAX(sinphi,SMALL); + + a11 = -c*sb1*s1; + a22 = sb2 * (2.0*costh13*s12 - c*(s1+s2)); + a33 = -c*sb3*s2; + a12 = r12c1 * (costh12*c*s1 + costh23*s12); + a13 = rb1*rb3*s12; + a23 = r12c2 * (-costh23*c*s2 - costh12*s12); + + sx1 = a11*vb1x + a12*vb2x + a13*vb3x; + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sx12 = a13*vb1x + a23*vb2x + a33*vb3x; + sy1 = a11*vb1y + a12*vb2y + a13*vb3y; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sy12 = a13*vb1y + a23*vb2y + a33*vb3y; + sz1 = a11*vb1z + a12*vb2z + a13*vb3z; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + sz12 = a13*vb1z + a23*vb2z + a33*vb3z; + + // set up d(cos(phi))/d(r) and dphi/dr arrays + + dcosphidr[0][0] = -sx1; + dcosphidr[0][1] = -sy1; + dcosphidr[0][2] = -sz1; + dcosphidr[1][0] = sx2 + sx1; + dcosphidr[1][1] = sy2 + sy1; + dcosphidr[1][2] = sz2 + sz1; + dcosphidr[2][0] = sx12 - sx2; + dcosphidr[2][1] = sy12 - sy2; + dcosphidr[2][2] = sz12 - sz2; + dcosphidr[3][0] = -sx12; + dcosphidr[3][1] = -sy12; + dcosphidr[3][2] = -sz12; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + dphidr[i][j] = -dcosphidr[i][j] / sinphi; + + // energy + + dphi1 = phi - phi1[type]; + dphi2 = 2.0*phi - phi2[type]; + dphi3 = 3.0*phi - phi3[type]; + + if (EFLAG) edihedral = k1[type]*(1.0 - cos(dphi1)) + + k2[type]*(1.0 - cos(dphi2)) + + k3[type]*(1.0 - cos(dphi3)); + + de_dihedral = k1[type]*sin(dphi1) + 2.0*k2[type]*sin(dphi2) + + 3.0*k3[type]*sin(dphi3); + + // torsion forces on all 4 atoms + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] = de_dihedral*dphidr[i][j]; + + // set up d(bond)/d(r) array + // dbonddr(i,j,k) = bond i, atom j, coordinate k + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dbonddr[i][j][k] = 0.0; + + // bond1 + + dbonddr[0][0][0] = vb1x / r1; + dbonddr[0][0][1] = vb1y / r1; + dbonddr[0][0][2] = vb1z / r1; + dbonddr[0][1][0] = -vb1x / r1; + dbonddr[0][1][1] = -vb1y / r1; + dbonddr[0][1][2] = -vb1z / r1; + + // bond2 + + dbonddr[1][1][0] = vb2x / r2; + dbonddr[1][1][1] = vb2y / r2; + dbonddr[1][1][2] = vb2z / r2; + dbonddr[1][2][0] = -vb2x / r2; + dbonddr[1][2][1] = -vb2y / r2; + dbonddr[1][2][2] = -vb2z / r2; + + // bond3 + + dbonddr[2][2][0] = vb3x / r3; + dbonddr[2][2][1] = vb3y / r3; + dbonddr[2][2][2] = vb3z / r3; + dbonddr[2][3][0] = -vb3x / r3; + dbonddr[2][3][1] = -vb3y / r3; + dbonddr[2][3][2] = -vb3z / r3; + + // set up d(theta)/d(r) array + // dthetadr(i,j,k) = angle i, atom j, coordinate k + + for (i = 0; i < 2; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dthetadr[i][j][k] = 0.0; + + t1 = costh12 / r1mag2; + t2 = costh23 / r2mag2; + t3 = costh12 / r2mag2; + t4 = costh23 / r3mag2; + + // angle12 + + dthetadr[0][0][0] = sc1 * ((t1 * vb1x) - (vb2x * r12c1)); + dthetadr[0][0][1] = sc1 * ((t1 * vb1y) - (vb2y * r12c1)); + dthetadr[0][0][2] = sc1 * ((t1 * vb1z) - (vb2z * r12c1)); + + dthetadr[0][1][0] = sc1 * ((-t1 * vb1x) + (vb2x * r12c1) + + (-t3 * vb2x) + (vb1x * r12c1)); + dthetadr[0][1][1] = sc1 * ((-t1 * vb1y) + (vb2y * r12c1) + + (-t3 * vb2y) + (vb1y * r12c1)); + dthetadr[0][1][2] = sc1 * ((-t1 * vb1z) + (vb2z * r12c1) + + (-t3 * vb2z) + (vb1z * r12c1)); + + dthetadr[0][2][0] = sc1 * ((t3 * vb2x) - (vb1x * r12c1)); + dthetadr[0][2][1] = sc1 * ((t3 * vb2y) - (vb1y * r12c1)); + dthetadr[0][2][2] = sc1 * ((t3 * vb2z) - (vb1z * r12c1)); + + // angle23 + + dthetadr[1][1][0] = sc2 * ((t2 * vb2x) + (vb3x * r12c2)); + dthetadr[1][1][1] = sc2 * ((t2 * vb2y) + (vb3y * r12c2)); + dthetadr[1][1][2] = sc2 * ((t2 * vb2z) + (vb3z * r12c2)); + + dthetadr[1][2][0] = sc2 * ((-t2 * vb2x) - (vb3x * r12c2) + + (t4 * vb3x) + (vb2x * r12c2)); + dthetadr[1][2][1] = sc2 * ((-t2 * vb2y) - (vb3y * r12c2) + + (t4 * vb3y) + (vb2y * r12c2)); + dthetadr[1][2][2] = sc2 * ((-t2 * vb2z) - (vb3z * r12c2) + + (t4 * vb3z) + (vb2z * r12c2)); + + dthetadr[1][3][0] = -sc2 * ((t4 * vb3x) + (vb2x * r12c2)); + dthetadr[1][3][1] = -sc2 * ((t4 * vb3y) + (vb2y * r12c2)); + dthetadr[1][3][2] = -sc2 * ((t4 * vb3z) + (vb2z * r12c2)); + + // mid-bond/torsion coupling + // energy on bond2 (middle bond) + + cos2phi = cos(2.0*phi); + cos3phi = cos(3.0*phi); + + bt1 = mbt_f1[type] * cosphi; + bt2 = mbt_f2[type] * cos2phi; + bt3 = mbt_f3[type] * cos3phi; + sumbte = bt1 + bt2 + bt3; + db = r2 - mbt_r0[type]; + if (EFLAG) edihedral += db * sumbte; + + // force on bond2 + + bt1 = -mbt_f1[type] * sinphi; + bt2 = -2.0 * mbt_f2[type] * sin(2.0*phi); + bt3 = -3.0 * mbt_f3[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[1][i][j]; + + // end-bond/torsion coupling + // energy on bond1 (first bond) + + bt1 = ebt_f1_1[type] * cosphi; + bt2 = ebt_f2_1[type] * cos2phi; + bt3 = ebt_f3_1[type] * cos3phi; + sumbte = bt1 + bt2 + bt3; + + db = r1 - ebt_r0_1[type]; + if (EFLAG) edihedral += db * (bt1+bt2+bt3); + + // force on bond1 + + bt1 = ebt_f1_1[type] * sinphi; + bt2 = 2.0 * ebt_f2_1[type] * sin(2.0*phi); + bt3 = 3.0 * ebt_f3_1[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] -= db*sumbtf*dphidr[i][j] + sumbte*dbonddr[0][i][j]; + + // end-bond/torsion coupling + // energy on bond3 (last bond) + + bt1 = ebt_f1_2[type] * cosphi; + bt2 = ebt_f2_2[type] * cos2phi; + bt3 = ebt_f3_2[type] * cos3phi; + sumbte = bt1 + bt2 + bt3; + + db = r3 - ebt_r0_2[type]; + if (EFLAG) edihedral += db * (bt1+bt2+bt3); + + // force on bond3 + + bt1 = -ebt_f1_2[type] * sinphi; + bt2 = -2.0 * ebt_f2_2[type] * sin(2.0*phi); + bt3 = -3.0 * ebt_f3_2[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[2][i][j]; + + // angle/torsion coupling + // energy on angle1 + + at1 = at_f1_1[type] * cosphi; + at2 = at_f2_1[type] * cos2phi; + at3 = at_f3_1[type] * cos3phi; + sumbte = at1 + at2 + at3; + + da = acos(costh12) - at_theta0_1[type]; + if (EFLAG) edihedral += da * (at1+at2+at3); + + // force on angle1 + + bt1 = at_f1_1[type] * sinphi; + bt2 = 2.0 * at_f2_1[type] * sin(2.0*phi); + bt3 = 3.0 * at_f3_1[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] -= da*sumbtf*dphidr[i][j] + sumbte*dthetadr[0][i][j]; + + // energy on angle2 + + at1 = at_f1_2[type] * cosphi; + at2 = at_f2_2[type] * cos2phi; + at3 = at_f3_2[type] * cos3phi; + sumbte = at1 + at2 + at3; + + da = acos(costh23) - at_theta0_2[type]; + if (EFLAG) edihedral += da * (at1+at2+at3); + + // force on angle2 + + bt1 = -at_f1_2[type] * sinphi; + bt2 = -2.0 * at_f2_2[type] * sin(2.0*phi); + bt3 = -3.0 * at_f3_2[type] * sin(3.0*phi); + sumbtf = bt1 + bt2 + bt3; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] += da*sumbtf*dphidr[i][j] + sumbte*dthetadr[1][i][j]; + + // angle/angle/torsion coupling + + da1 = acos(costh12) - aat_theta0_1[type]; + da2 = acos(costh23) - aat_theta0_2[type]; + + if (EFLAG) edihedral += aat_k[type]*da1*da2*cosphi; + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] -= aat_k[type] * + (cosphi * (da2*dthetadr[0][i][j] - da1*dthetadr[1][i][j]) + + sinphi * da1*da2*dphidr[i][j]); + + // bond1/bond3 coupling + + if (fabs(bb13t_k[type]) > SMALL) { + + r1_0 = bb13t_r10[type]; + r3_0 = bb13t_r30[type]; + dr1 = r1 - r1_0; + dr2 = r3 - r3_0; + tk1 = -bb13t_k[type] * dr1 / r3; + tk2 = -bb13t_k[type] * dr2 / r1; + + if (EFLAG) edihedral += bb13t_k[type]*dr1*dr2; + + fabcd[0][0] += tk2 * vb1x; + fabcd[0][1] += tk2 * vb1y; + fabcd[0][2] += tk2 * vb1z; + + fabcd[1][0] -= tk2 * vb1x; + fabcd[1][1] -= tk2 * vb1y; + fabcd[1][2] -= tk2 * vb1z; + + fabcd[2][0] -= tk1 * vb3x; + fabcd[2][1] -= tk1 * vb3y; + fabcd[2][2] -= tk1 * vb3z; + + fabcd[3][0] += tk1 * vb3x; + fabcd[3][1] += tk1 * vb3y; + fabcd[3][2] += tk1 * vb3z; + } + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += fabcd[0][0]; + f[i1][1] += fabcd[0][1]; + f[i1][2] += fabcd[0][2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += fabcd[1][0]; + f[i2][1] += fabcd[1][1]; + f[i2][2] += fabcd[1][2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += fabcd[2][0]; + f[i3][1] += fabcd[2][1]; + f[i3][2] += fabcd[2][2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += fabcd[3][0]; + f[i4][1] += fabcd[3][1]; + f[i4][2] += fabcd[3][2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral, + fabcd[0],fabcd[2],fabcd[3], + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h new file mode 100644 index 0000000000..889bbaf920 --- /dev/null +++ b/src/USER-OMP/dihedral_class2_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(class2/omp,DihedralClass2OMP) + +#else + +#ifndef LMP_DIHEDRAL_CLASS2_OMP_H +#define LMP_DIHEDRAL_CLASS2_OMP_H + +#include "dihedral_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralClass2OMP : public DihedralClass2, public ThrOMP { + + public: + DihedralClass2OMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp new file mode 100644 index 0000000000..ecb1a85fa4 --- /dev/null +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp @@ -0,0 +1,262 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_cosine_shift_exp_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +DihedralCosineShiftExpOMP::DihedralCosineShiftExpOMP(class LAMMPS *lmp) + : DihedralCosineShiftExp(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralCosineShiftExpOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,sx2,sy2,sz2; + double cccpsss,cssmscc,exp2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c,s calculation + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + double aa=a[type]; + double uumin=umin[type]; + + cccpsss = c*cost[type]+s*sint[type]; + cssmscc = c*sint[type]-s*cost[type]; + + if (doExpansion[type]) { + // |a|<0.001 so use expansions relative precision <1e-5 + if (EFLAG) edihedral = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin; + df=0.5*uumin*( cssmscc + 0.5*aa*cccpsss); + } else { + exp2=exp(0.5*aa*(1+cccpsss)); + if (EFLAG) edihedral = opt1[type]*(1-exp2); + df= 0.5*opt1[type]*aa* ( exp2*cssmscc ); + } + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h new file mode 100644 index 0000000000..7e1accebf1 --- /dev/null +++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(cosine/shift/exp/omp,DihedralCosineShiftExpOMP) + +#else + +#ifndef LMP_DIHEDRAL_COSINE_SHIFT_EXP_OMP_H +#define LMP_DIHEDRAL_COSINE_SHIFT_EXP_OMP_H + +#include "dihedral_cosine_shift_exp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP { + + public: + DihedralCosineShiftExpOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp new file mode 100644 index 0000000000..e3b94c8f9a --- /dev/null +++ b/src/USER-OMP/dihedral_harmonic_omp.cpp @@ -0,0 +1,269 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +DihedralHarmonicOMP::DihedralHarmonicOMP(class LAMMPS *lmp) + : DihedralHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,i,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv; + double df,df1,ddf1,fg,hg,fga,hgb,gaa,gbb; + double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz; + double c,s,p,sx2,sy2,sz2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c,s calculation + + ax = vb1y*vb2zm - vb1z*vb2ym; + ay = vb1z*vb2xm - vb1x*vb2zm; + az = vb1x*vb2ym - vb1y*vb2xm; + bx = vb3y*vb2zm - vb3z*vb2ym; + by = vb3z*vb2xm - vb3x*vb2zm; + bz = vb3x*vb2ym - vb3y*vb2xm; + + rasq = ax*ax + ay*ay + az*az; + rbsq = bx*bx + by*by + bz*bz; + rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm; + rg = sqrt(rgsq); + + rginv = ra2inv = rb2inv = 0.0; + if (rg > 0) rginv = 1.0/rg; + if (rasq > 0) ra2inv = 1.0/rasq; + if (rbsq > 0) rb2inv = 1.0/rbsq; + rabinv = sqrt(ra2inv*rb2inv); + + c = (ax*bx + ay*by + az*bz)*rabinv; + s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + m = multiplicity[type]; + p = 1.0; + ddf1 = df1 = 0.0; + + for (i = 0; i < m; i++) { + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; + } + + p = p*cos_shift[type] + df1*sin_shift[type]; + df1 = df1*cos_shift[type] - ddf1*sin_shift[type]; + df1 *= -m; + p += 1.0; + + if (m == 0) { + p = 1.0 + cos_shift[type]; + df1 = 0.0; + } + + if (EFLAG) edihedral = k[type] * p; + + fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; + hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm; + fga = fg*ra2inv*rginv; + hgb = hg*rb2inv*rginv; + gaa = -ra2inv*rg; + gbb = rb2inv*rg; + + dtfx = gaa*ax; + dtfy = gaa*ay; + dtfz = gaa*az; + dtgx = fga*ax - hgb*bx; + dtgy = fga*ay - hgb*by; + dtgz = fga*az - hgb*bz; + dthx = gbb*bx; + dthy = gbb*by; + dthz = gbb*bz; + + df = -k[type] * df1; + + sx2 = df*dtgx; + sy2 = df*dtgy; + sz2 = df*dtgz; + + f1[0] = df*dtfx; + f1[1] = df*dtfy; + f1[2] = df*dtfz; + + f2[0] = sx2 - f1[0]; + f2[1] = sy2 - f1[1]; + f2[2] = sz2 - f1[2]; + + f4[0] = df*dthx; + f4[1] = df*dthy; + f4[2] = df*dthz; + + f3[0] = -sx2 - f4[0]; + f3[1] = -sy2 - f4[1]; + f3[2] = -sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h new file mode 100644 index 0000000000..b55bf015f2 --- /dev/null +++ b/src/USER-OMP/dihedral_harmonic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(harmonic/omp,DihedralHarmonicOMP) + +#else + +#ifndef LMP_DIHEDRAL_HARMONIC_OMP_H +#define LMP_DIHEDRAL_HARMONIC_OMP_H + +#include "dihedral_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP { + + public: + DihedralHarmonicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp new file mode 100644 index 0000000000..fd7025cdb4 --- /dev/null +++ b/src/USER-OMP/dihedral_helix_omp.cpp @@ -0,0 +1,282 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_helix_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "math_const.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define TOLERANCE 0.05 +#define SMALL 0.001 +#define SMALLER 0.00001 + +/* ---------------------------------------------------------------------- */ + +DihedralHelixOMP::DihedralHelixOMP(class LAMMPS *lmp) + : DihedralHelix(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralHelixOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralHelixOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + cx = vb1y*vb2z - vb1z*vb2y; + cy = vb1z*vb2x - vb1x*vb2z; + cz = vb1x*vb2y - vb1y*vb2x; + cmag = sqrt(cx*cx + cy*cy + cz*cz); + dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + phi = acos(c); + if (dx < 0.0) phi *= -1.0; + si = sin(phi); + if (fabs(si) < SMALLER) si = SMALLER; + siinv = 1.0/si; + + pd = -aphi[type] + 3.0*bphi[type]*sin(3.0*phi)*siinv + + cphi[type]*sin(phi + MY_PI4)*siinv; + + if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) + + cphi[type]*(1.0 + cos(phi + MY_PI4)); + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2 * (c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h new file mode 100644 index 0000000000..24f16c4d7e --- /dev/null +++ b/src/USER-OMP/dihedral_helix_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(helix/omp,DihedralHelixOMP) + +#else + +#ifndef LMP_DIHEDRAL_HELIX_OMP_H +#define LMP_DIHEDRAL_HELIX_OMP_H + +#include "dihedral_helix.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralHelixOMP : public DihedralHelix, public ThrOMP { + + public: + DihedralHelixOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp new file mode 100644 index 0000000000..a68bebfdf6 --- /dev/null +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp @@ -0,0 +1,269 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_multi_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +DihedralMultiHarmonicOMP::DihedralMultiHarmonicOMP(class LAMMPS *lmp) + : DihedralMultiHarmonic(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralMultiHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralMultiHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,sin2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = sum (i=1,5) a_i * c**(i-1) + // pd = dp/dc + + pd = a2[type] + c*(2.0*a3[type] + c*(3.0*a4[type] + c*4.0*a5[type])); + + if (EFLAG) + edihedral = a1[type] + c*(a2[type] + c*(a3[type] + c*(a4[type] + c*a5[type]))); + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1*(c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2*(c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h new file mode 100644 index 0000000000..c6b80237a5 --- /dev/null +++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(multi/harmonic/omp,DihedralMultiHarmonicOMP) + +#else + +#ifndef LMP_DIHEDRAL_MULTI_HARMONIC_OMP_H +#define LMP_DIHEDRAL_MULTI_HARMONIC_OMP_H + +#include "dihedral_multi_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP { + + public: + DihedralMultiHarmonicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp new file mode 100644 index 0000000000..3810dc1985 --- /dev/null +++ b/src/USER-OMP/dihedral_opls_omp.cpp @@ -0,0 +1,285 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_opls_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 +#define SMALLER 0.00001 + +/* ---------------------------------------------------------------------- */ + +DihedralOPLSOMP::DihedralOPLSOMP(class LAMMPS *lmp) + : DihedralOPLS(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralOPLSOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralOPLSOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + double s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2; + + edihedral = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sin2 = MAX(1.0 - c1mag*c1mag,0.0); + sc1 = sqrt(sin2); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sin2 = MAX(1.0 - c2mag*c2mag,0.0); + sc2 = sqrt(sin2); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + cx = vb1y*vb2z - vb1z*vb2y; + cy = vb1z*vb2x - vb1x*vb2z; + cz = vb1x*vb2y - vb1y*vb2x; + cmag = sqrt(cx*cx + cy*cy + cz*cz); + dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = sum (i=1,4) k_i * (1 + (-1)**(i+1)*cos(i*phi) ) + // pd = dp/dc + + phi = acos(c); + if (dx < 0.0) phi *= -1.0; + si = sin(phi); + if (fabs(si) < SMALLER) si = SMALLER; + siinv = 1.0/si; + + pd = k1[type] - 2.0*k2[type]*sin(2.0*phi)*siinv + + 3.0*k3[type]*sin(3.0*phi)*siinv - 4.0*k4[type]*sin(4.0*phi)*siinv; + + if (EFLAG) edihedral = k1[type]*(1.0 + c) + k2[type]*(1.0 - cos(2.0*phi)) + + k3[type]*(1.0 + cos(3.0*phi)) + k4[type]*(1.0 - cos(4.0*phi)); + + + a = pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2 * (c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h new file mode 100644 index 0000000000..fbb5455f9c --- /dev/null +++ b/src/USER-OMP/dihedral_opls_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef DIHEDRAL_CLASS + +DihedralStyle(opls/omp,DihedralOPLSOMP) + +#else + +#ifndef LMP_DIHEDRAL_OPLS_OMP_H +#define LMP_DIHEDRAL_OPLS_OMP_H + +#include "dihedral_opls.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP { + + public: + DihedralOPLSOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/dihedral_table_omp.cpp b/src/USER-OMP/dihedral_table_omp.cpp new file mode 100644 index 0000000000..55f5ffd97b --- /dev/null +++ b/src/USER-OMP/dihedral_table_omp.cpp @@ -0,0 +1,345 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "dihedral_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace DIHEDRAL_TABLE_NS; +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +DihedralTableOMP::DihedralTableOMP(class LAMMPS *lmp) + : DihedralTable(lmp), ThrOMP(lmp,THR_DIHEDRAL) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void DihedralTableOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->ndihedrallist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void DihedralTableOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double edihedral,f1[3],f2[3],f3[3],f4[3]; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const dihedrallist = neighbor->dihedrallist; + const int nlocal = atom->nlocal; + + // The dihedral angle "phi" is the angle between n123 and n234 + // the planes defined by atoms i1,i2,i3, and i2,i3,i4. + // + // Definitions of vectors: vb12, vb23, vb34, perp12on23 + // proj12on23, perp43on32, proj43on32 + // + // Note: The positions of the 4 atoms are labeled x[i1], x[i2], x[i3], x[i4] + // (which are also vectors) + // + // proj12on23 proj34on23 + // ---------> -----------> + // . + // . + // . + // x[i2] . x[i3] + // . __@----------vb23-------->@ . . . . . + // /|\ /| \ | + // | / \ | + // | / \ | + // perp12vs23 / \ | + // | / \ perp34vs23 + // | vb12 \ | + // | / vb34 | + // | / \ | + // | / \ | + // | / \ | + // @ \ | + // _\| \|/ + // x[i1] @ + // + // x[i4] + // + + double vb12[g_dim]; // displacement vector from atom i1 towards atom i2 + // vb12[d] = x[i2][d] - x[i1][d] (for d=0,1,2) + double vb23[g_dim]; // displacement vector from atom i2 towards atom i3 + // vb23[d] = x[i3][d] - x[i2][d] (for d=0,1,2) + double vb34[g_dim]; // displacement vector from atom i3 towards atom i4 + // vb34[d] = x[i4][d] - x[i3][d] (for d=0,1,2) + + // n123 & n234: These two unit vectors are normal to the planes + // defined by atoms 1,2,3 and 2,3,4. + double n123[g_dim]; //n123=vb23 x vb12 / |vb23 x vb12| ("x" is cross product) + double n234[g_dim]; //n234=vb23 x vb34 / |vb23 x vb34| ("x" is cross product) + + double proj12on23[g_dim]; + // proj12on23[d] = (vb23[d]/|vb23|) * DotProduct(vb12,vb23)/|vb12|*|vb23| + double proj34on23[g_dim]; + // proj34on23[d] = (vb34[d]/|vb23|) * DotProduct(vb34,vb23)/|vb34|*|vb23| + double perp12on23[g_dim]; + // perp12on23[d] = v12[d] - proj12on23[d] + double perp34on23[g_dim]; + // perp34on23[d] = v34[d] - proj34on23[d] + + edihedral = 0.0; + + for (n = nfrom; n < nto; n++) { + + i1 = dihedrallist[n][0]; + i2 = dihedrallist[n][1]; + i3 = dihedrallist[n][2]; + i4 = dihedrallist[n][3]; + type = dihedrallist[n][4]; + + // ------ Step 1: Compute the dihedral angle "phi" ------ + // + + // Phi() calculates the dihedral angle. + // This function also calculates the vectors: + // vb12, vb23, vb34, n123, and n234, which we will need later. + + + double phi = Phi(x[i1], x[i2], x[i3], x[i4], domain, + vb12, vb23, vb34, n123, n234); + + + // ------ Step 2: Compute the gradient of phi with atomic position: ------ + // + // Gradient variables: + // + // dphi_dx1, dphi_dx2, dphi_dx3, dphi_dx4 are the gradients of phi with + // respect to the atomic positions of atoms i1, i2, i3, i4, respectively. + // As an example, consider dphi_dx1. The d'th element is: + double dphi_dx1[g_dim]; // d phi + double dphi_dx2[g_dim]; // dphi_dx1[d] = ---------- (partial derivatives) + double dphi_dx3[g_dim]; // d x[i1][d] + double dphi_dx4[g_dim]; //where d=0,1,2 corresponds to x,y,z (if g_dim==3) + + double dot123 = DotProduct(vb12, vb23); + double dot234 = DotProduct(vb23, vb34); + double L23sqr = DotProduct(vb23, vb23); + double L23 = sqrt(L23sqr); // (central bond length) + double inv_L23sqr = 0.0; + double inv_L23 = 0.0; + if (L23sqr != 0.0) { + inv_L23sqr = 1.0 / L23sqr; + inv_L23 = 1.0 / L23; + } + double neg_inv_L23 = -inv_L23; + double dot123_over_L23sqr = dot123 * inv_L23sqr; + double dot234_over_L23sqr = dot234 * inv_L23sqr; + + for (int d=0; d < g_dim; ++d) { + // See figure above for a visual definitions of these vectors: + proj12on23[d] = vb23[d] * dot123_over_L23sqr; + proj34on23[d] = vb23[d] * dot234_over_L23sqr; + perp12on23[d] = vb12[d] - proj12on23[d]; + perp34on23[d] = vb34[d] - proj34on23[d]; + } + + + // --- Compute the gradient vectors dphi/dx1 and dphi/dx4: --- + + // These two gradients point in the direction of n123 and n234, + // and are scaled by the distances of atoms 1 and 4 from the central axis. + // Distance of atom 1 to central axis: + double perp12on23_len = sqrt(DotProduct(perp12on23, perp12on23)); + // Distance of atom 4 to central axis: + double perp34on23_len = sqrt(DotProduct(perp34on23, perp34on23)); + + double inv_perp12on23 = 0.0; + if (perp12on23_len != 0.0) inv_perp12on23 = 1.0 / perp12on23_len; + double inv_perp34on23 = 0.0; + if (perp34on23_len != 0.0) inv_perp34on23 = 1.0 / perp34on23_len; + + for (int d=0; d < g_dim; ++d) { + dphi_dx1[d] = n123[d] * inv_perp12on23; + dphi_dx4[d] = n234[d] * inv_perp34on23; + } + + // --- Compute the gradient vectors dphi/dx2 and dphi/dx3: --- + // + // This is more tricky because atoms 2 and 3 are shared by both planes + // 123 and 234 (the angle between which defines "phi"). Moving either + // one of these atoms effects both the 123 and 234 planes + // Both the 123 and 234 planes intersect with the plane perpendicular to the + // central bond axis (vb23). The two lines where these intersections occur + // will shift when you move either atom 2 or atom 3. The angle between + // these lines is the dihedral angle, phi. We can define four quantities: + // dphi123_dx2 is the change in "phi" due to the movement of the 123 plane + // ...as a result of moving atom 2. + // dphi234_dx2 is the change in "phi" due to the movement of the 234 plane + // ...as a result of moving atom 2. + // dphi123_dx3 is the change in "phi" due to the movement of the 123 plane + // ...as a result of moving atom 3. + // dphi234_dx3 is the change in "phi" due to the movement of the 234 plane + // ...as a result of moving atom 3. + + double proj12on23_len = dot123 * inv_L23; + double proj34on23_len = dot234 * inv_L23; + // Interpretation: + //The magnitude of "proj12on23_len" is the length of the proj12on23 vector. + //The sign is positive if it points in the same direction as the central + //bond (vb23). Otherwise it is negative. The same goes for "proj34on23". + //(In the example figure in the comment above, both variables are positive.) + + // The forumula used in the 8 lines below explained here: + // "supporting_information/doc/gradient_formula_explanation/" + double dphi123_dx2_coef = neg_inv_L23 * (L23 + proj12on23_len); + double dphi234_dx2_coef = inv_L23 * proj34on23_len; + + double dphi234_dx3_coef = neg_inv_L23 * (L23 + proj34on23_len); + double dphi123_dx3_coef = inv_L23 * proj12on23_len; + + for (int d=0; d < g_dim; ++d) { + // Recall that the n123 and n234 plane normal vectors are proportional to + // the dphi/dx1 and dphi/dx2 gradients vectors + // It turns out we can save slightly more CPU cycles by expressing + // dphi/dx2 and dphi/dx3 as linear combinations of dphi/dx1 and dphi/dx2 + // which we computed already (instead of n123 & n234). + dphi_dx2[d] = dphi123_dx2_coef*dphi_dx1[d] + dphi234_dx2_coef*dphi_dx4[d]; + dphi_dx3[d] = dphi123_dx3_coef*dphi_dx1[d] + dphi234_dx3_coef*dphi_dx4[d]; + } + + + + + #ifdef DIH_DEBUG_NUM + // ----- Numerical test? ----- + + cerr << " -- testing gradient for dihedral (n="< + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/ewald_omp.cpp b/src/USER-OMP/ewald_omp.cpp new file mode 100644 index 0000000000..93b61af186 --- /dev/null +++ b/src/USER-OMP/ewald_omp.cpp @@ -0,0 +1,415 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "ewald_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" + +#include + +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +EwaldOMP::EwaldOMP(LAMMPS *lmp, int narg, char **arg) + : Ewald(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ +void EwaldOMP::allocate() +{ + Ewald::allocate(); + + // always re-allocate for simplicity. + delete[] sfacrl; + delete[] sfacim; + + sfacrl = new double[kmax3d*comm->nthreads]; + sfacim = new double[kmax3d*comm->nthreads]; +} + +/* ---------------------------------------------------------------------- + compute the Ewald long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldOMP::compute(int eflag, int vflag) +{ + // set energy/virial flags + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + eik_dot_r(); + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + + double * const * const f = atom->f; + const double * const q = atom->q; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qscale = force->qqrd2e * scale; + + double eng_tmp = 0.0; + double v0,v1,v2,v3,v4,v5; + v0=v1=v2=v3=v4=v5=0.0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5) +#endif + { + + int i,j,k,ifrom,ito,tid; + int kx,ky,kz; + double cypz,sypz,exprl,expim,partial; + + loop_setup_thr(ifrom, ito, tid, nlocal, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr); + + for (i = ifrom; i < ito; i++) { + ek[i][0] = 0.0; + ek[i][1] = 0.0; + ek[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = ifrom; i < ito; i++) { + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; + ek[i][0] += partial*eg[k][0]; + ek[i][1] += partial*eg[k][1]; + ek[i][2] += partial*eg[k][2]; + + if (evflag_atom) { + const double partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; + if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; + if (vflag_atom) + for (j = 0; j < 6; j++) + vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; + } + } + } + + // convert E-field to force + + for (i = ifrom; i < ito; i++) { + const double fac = qscale*q[i]; + f[i][0] += fac*ek[i][0]; + f[i][1] += fac*ek[i][1]; + f[i][2] += fac*ek[i][2]; + } + + // global energy + + if (eflag_global) { +#if defined(_OPENMP) +#pragma omp for private(k) +#endif + for (k = 0; k < kcount; k++) + eng_tmp += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + } + + // global virial + + if (vflag_global) { +#if defined(_OPENMP) +#pragma omp for private(k) +#endif + for (k = 0; k < kcount; k++) { + double uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + v0 += uk*vg[k][0]; + v1 += uk*vg[k][1]; + v2 += uk*vg[k][2]; + v3 += uk*vg[k][3]; + v4 += uk*vg[k][4]; + v5 += uk*vg[k][5]; + } + } + + // per-atom energy/virial + // energy includes self-energy correction + + if (evflag_atom) { + if (eflag_atom) { + for (i = ifrom; i < ito; i++) { + eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / + (g_ewald*g_ewald*volume); + eatom[i] *= qscale; + } + } + + if (vflag_atom) + for (i = ifrom; i < ito; i++) + for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; + } + + reduce_thr(this, eflag,vflag,thr); + } // end of omp parallel region + + if (eflag_global) { + eng_tmp -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy = eng_tmp * qscale; + } + + if (vflag_global) { + virial[0] = v0 * qscale; + virial[1] = v1 * qscale; + virial[2] = v2 * qscale; + virial[3] = v3 * qscale; + virial[4] = v4 * qscale; + virial[5] = v5 * qscale; + } + + if (slabflag) slabcorr(); +} + +/* ---------------------------------------------------------------------- */ + +void EwaldOMP::eik_dot_r() +{ + const double * const * const x = atom->x; + const double * const q = atom->q; + const int nlocal = atom->nlocal; + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int i,ifrom,ito,k,l,m,n,ic,tid; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + + loop_setup_thr(ifrom, ito, tid, nlocal, nthreads); + + double * const sfacrl_thr = sfacrl + tid*kmax3d; + double * const sfacim_thr = sfacim + tid*kmax3d; + + n = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = ifrom; i < ito; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + cstr1 += q[i]*cs[1][ic][i]; + sstr1 += q[i]*sn[1][ic][i]; + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + } + } + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = ifrom; i < ito; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + cstr1 += q[i]*cs[m][ic][i]; + sstr1 += q[i]*sn[m][ic][i]; + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = ifrom; i < ito; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = ifrom; i < ito; i++) { + cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kxmax; k++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = ifrom; i < ito; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kxmax; k++) { + for (l = 1; l <= kymax; l++) { + for (m = 1; m <= kzmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = ifrom; i < ito; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + sfacrl_thr[n] = cstr3; + sfacim_thr[n++] = sstr3; + sfacrl_thr[n] = cstr4; + sfacim_thr[n++] = sstr4; + } + } + } + } + + sync_threads(); + data_reduce_thr(sfacrl,kmax3d,nthreads,1,tid); + data_reduce_thr(sfacim,kmax3d,nthreads,1,tid); + + } // end of parallel region +} diff --git a/src/USER-OMP/ewald_omp.h b/src/USER-OMP/ewald_omp.h new file mode 100644 index 0000000000..222806f15a --- /dev/null +++ b/src/USER-OMP/ewald_omp.h @@ -0,0 +1,42 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(ewald/omp,EwaldOMP) + +#else + +#ifndef LMP_EWALD_OMP_H +#define LMP_EWALD_OMP_H + +#include "ewald.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class EwaldOMP : public Ewald, public ThrOMP { + public: + EwaldOMP(class LAMMPS *, int, char **); + virtual ~EwaldOMP() { }; + virtual void allocate(); + virtual void compute(int, int); + + protected: + virtual void eik_dot_r(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_gravity_omp.cpp b/src/USER-OMP/fix_gravity_omp.cpp new file mode 100644 index 0000000000..fd32a00e58 --- /dev/null +++ b/src/USER-OMP/fix_gravity_omp.cpp @@ -0,0 +1,112 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "fix_gravity_omp.h" +#include "atom.h" +#include "update.h" +#include "domain.h" +#include "input.h" +#include "modify.h" +#include "respa.h" +#include "variable.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{CHUTE,SPHERICAL,GRADIENT,VECTOR}; +enum{CONSTANT,EQUAL}; + +/* ---------------------------------------------------------------------- */ + +FixGravityOMP::FixGravityOMP(LAMMPS *lmp, int narg, char **arg) : + FixGravity(lmp, narg, arg) { } + +/* ---------------------------------------------------------------------- */ + +void FixGravityOMP::post_force(int vflag) +{ + // update gravity due to variables + + if (varflag != CONSTANT) { + modify->clearstep_compute(); + if (mstyle == EQUAL) magnitude = input->variable->compute_equal(mvar); + if (vstyle == EQUAL) magnitude = input->variable->compute_equal(vvar); + if (pstyle == EQUAL) magnitude = input->variable->compute_equal(pvar); + if (tstyle == EQUAL) magnitude = input->variable->compute_equal(tvar); + if (xstyle == EQUAL) magnitude = input->variable->compute_equal(xvar); + if (ystyle == EQUAL) magnitude = input->variable->compute_equal(yvar); + if (zstyle == EQUAL) magnitude = input->variable->compute_equal(zvar); + modify->addstep_compute(update->ntimestep + 1); + + set_acceleration(); + } + + const double * const * const x = atom->x; + double * const * const f = atom->f; + double * const rmass = atom->rmass; + double * const mass = atom->mass; + int * const mask = atom->mask; + int * const type = atom->type; + const int nlocal = atom->nlocal; + const double xacc_thr = xacc; + const double yacc_thr = yacc; + const double zacc_thr = zacc; + double massone; + + int i; + eflag = 0; + double grav = 0.0; + + if (rmass) { +#if defined(_OPENMP) +#pragma omp parallel for private(i,massone) default(none) reduction(-:grav) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + massone = rmass[i]; + f[i][0] += massone*xacc_thr; + f[i][1] += massone*yacc_thr; + f[i][2] += massone*zacc_thr; + grav -= massone * (xacc_thr*x[i][0] + yacc_thr*x[i][1] + zacc_thr*x[i][2]); + } + } else { +#if defined(_OPENMP) +#pragma omp parallel for private(i,massone) default(none) reduction(-:grav) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + massone = mass[type[i]]; + f[i][0] += massone*xacc_thr; + f[i][1] += massone*yacc_thr; + f[i][2] += massone*zacc_thr; + grav -= massone * (xacc_thr*x[i][0] + yacc_thr*x[i][1] + zacc_thr*x[i][2]); + } + } + egrav = grav; +} + +/* ---------------------------------------------------------------------- */ + +void FixGravityOMP::post_force_respa(int vflag, int ilevel, int iloop) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} diff --git a/src/USER-OMP/fix_gravity_omp.h b/src/USER-OMP/fix_gravity_omp.h new file mode 100644 index 0000000000..ca104a1a9a --- /dev/null +++ b/src/USER-OMP/fix_gravity_omp.h @@ -0,0 +1,38 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(gravity/omp,FixGravityOMP) + +#else + +#ifndef LMP_FIX_GRAVITY_OMP_H +#define LMP_FIX_GRAVITY_OMP_H + +#include "fix_gravity.h" + +namespace LAMMPS_NS { + +class FixGravityOMP : public FixGravity { + + public: + FixGravityOMP(class LAMMPS *, int, char **); + virtual void post_force(int); + virtual void post_force_respa(int, int, int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp new file mode 100644 index 0000000000..bbe009ecc9 --- /dev/null +++ b/src/USER-OMP/fix_nve_sphere_omp.cpp @@ -0,0 +1,137 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "string.h" +#include "fix_nve_sphere_omp.h" +#include "atom.h" +#include "atom_vec.h" +#include "update.h" +#include "respa.h" +#include "force.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +#define INERTIA 0.4 // moment of inertia prefactor for sphere + +enum{NONE,DIPOLE}; + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ + +void FixNVESphereOMP::initial_integrate(int vflag) +{ + double * const * const x = atom->x; + double * const * const v = atom->v; + const double * const * const f = atom->f; + double * const * const omega = atom->omega; + const double * const * const torque = atom->torque; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const int * const mask = atom->mask; + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; + int i; + + // set timestep here since dt may have changed or come via rRESPA + const double dtfrotate = dtf / INERTIA; + + // update v,x,omega for all particles + // d_omega/dt = torque / inertia +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) +#endif + for (i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + const double dtfm = dtf / rmass[i]; + v[i][0] += dtfm * f[i][0]; + v[i][1] += dtfm * f[i][1]; + v[i][2] += dtfm * f[i][2]; + x[i][0] += dtv * v[i][0]; + x[i][1] += dtv * v[i][1]; + x[i][2] += dtv * v[i][2]; + + const double dtirotate = dtfrotate / (radius[i]*radius[i]*rmass[i]); + omega[i][0] += dtirotate * torque[i][0]; + omega[i][1] += dtirotate * torque[i][1]; + omega[i][2] += dtirotate * torque[i][2]; + } + } + + // update mu for dipoles + // d_mu/dt = omega cross mu + // renormalize mu to dipole length + + if (extra == DIPOLE) { + double * const * const mu = atom->mu; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) +#endif + for (i = 0; i < nlocal; i++) { + double g0,g1,g2,msq,scale; + if (mask[i] & groupbit) { + if (mu[i][3] > 0.0) { + g0 = mu[i][0] + dtv * (omega[i][1]*mu[i][2]-omega[i][2]*mu[i][1]); + g1 = mu[i][1] + dtv * (omega[i][2]*mu[i][0]-omega[i][0]*mu[i][2]); + g2 = mu[i][2] + dtv * (omega[i][0]*mu[i][1]-omega[i][1]*mu[i][0]); + msq = g0*g0 + g1*g1 + g2*g2; + scale = mu[i][3]/sqrt(msq); + mu[i][0] = g0*scale; + mu[i][1] = g1*scale; + mu[i][2] = g2*scale; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixNVESphereOMP::final_integrate() +{ + double * const * const v = atom->v; + const double * const * const f = atom->f; + double * const * const omega = atom->omega; + const double * const * const torque = atom->torque; + const double * const rmass = atom->rmass; + const double * const radius = atom->radius; + const int * const mask = atom->mask; + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; + int i; + + // set timestep here since dt may have changed or come via rRESPA + + const double dtfrotate = dtf / INERTIA; + + // update v,omega for all particles + // d_omega/dt = torque / inertia + +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) +#endif + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + const double dtfm = dtf / rmass[i]; + v[i][0] += dtfm * f[i][0]; + v[i][1] += dtfm * f[i][1]; + v[i][2] += dtfm * f[i][2]; + + const double dtirotate = dtfrotate / (radius[i]*radius[i]*rmass[i]); + omega[i][0] += dtirotate * torque[i][0]; + omega[i][1] += dtirotate * torque[i][1]; + omega[i][2] += dtirotate * torque[i][2]; + } +} diff --git a/src/USER-OMP/fix_nve_sphere_omp.h b/src/USER-OMP/fix_nve_sphere_omp.h new file mode 100644 index 0000000000..ea965f1bb1 --- /dev/null +++ b/src/USER-OMP/fix_nve_sphere_omp.h @@ -0,0 +1,39 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(nve/sphere/omp,FixNVESphereOMP) + +#else + +#ifndef LMP_FIX_NVE_SPHERE_OMP_H +#define LMP_FIX_NVE_SPHERE_OMP_H + +#include "fix_nve_sphere.h" + +namespace LAMMPS_NS { + +class FixNVESphereOMP : public FixNVESphere { + public: + FixNVESphereOMP(class LAMMPS *lmp, int narg, char **arg) : + FixNVESphere(lmp, narg, arg) {}; + + virtual void initial_integrate(int); + virtual void final_integrate(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp new file mode 100644 index 0000000000..7e770ce907 --- /dev/null +++ b/src/USER-OMP/fix_omp.cpp @@ -0,0 +1,330 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + OpenMP based threading support for LAMMPS + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "min.h" + +#include "fix_omp.h" +#include "thr_data.h" +#include "thr_omp.h" + +#include "pair_hybrid.h" +#include "bond_hybrid.h" +#include "angle_hybrid.h" +#include "dihedral_hybrid.h" +#include "improper_hybrid.h" +#include "kspace.h" + +#include +#include +#include + +#include "suffix.h" + +#if defined(LMP_USER_CUDA) +#include "cuda_modify_flags.h" +#endif + +using namespace LAMMPS_NS; +using namespace FixConst; +#if defined(LMP_USER_CUDA) +using namespace FixConstCuda; +#endif + +static int get_tid() +{ + int tid = 0; +#if defined(_OPENMP) + tid = omp_get_thread_num(); +#endif + return tid; +} + +/* ---------------------------------------------------------------------- */ + +FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), + thr(NULL), last_omp_style(NULL), last_pair_hybrid(NULL), + _nthr(-1), _neighbor(true), _newton(false) +{ + if ((narg < 4) || (narg > 6)) error->all(FLERR,"Illegal fix OMP command"); + if (strcmp(arg[1],"all") != 0) error->all(FLERR,"Illegal fix OMP command"); + + int nthreads = 1; + if (narg > 3) { +#if defined(_OPENMP) + if (strcmp(arg[3],"*") == 0) +#pragma omp parallel default(none) shared(nthreads) + nthreads = omp_get_num_threads(); + else + nthreads = atoi(arg[3]); +#endif + } + + if (nthreads < 1) + error->all(FLERR,"Illegal number of threads requested."); + + if (nthreads != comm->nthreads) { +#if defined(_OPENMP) + omp_set_num_threads(nthreads); +#endif + comm->nthreads = nthreads; + if (comm->me == 0) { + if (screen) + fprintf(screen," reset %d OpenMP thread(s) per MPI task\n", nthreads); + if (logfile) + fprintf(logfile," reset %d OpenMP thread(s) per MPI task\n", nthreads); + } + } + + if (narg > 4) { + if (strcmp(arg[4],"force/neigh") == 0) + _neighbor = true; + else if (strcmp(arg[4],"force") == 0) + _neighbor = false; + else + error->all(FLERR,"Illegal fix omp mode requested."); + + if (comm->me == 0) { + const char * const mode = _neighbor ? "OpenMP capable" : "serial"; + + if (screen) + fprintf(screen," using %s neighbor list subroutines\n", mode); + if (logfile) + fprintf(logfile," using %s neighbor list subroutines\n", mode); + } + } + +#if 0 /* to be enabled when we can switch between half and full neighbor lists */ + if (narg > 5) { + if (strcmp(arg[5],"neigh/half") == 0) + _newton = true; + else if (strcmp(arg[5],"neigh/full") == 0) + _newton = false; + else + error->all(FLERR,"Illegal fix OMP command"); + + if (comm->me == 0) { + const char * const mode = _newton ? "half" : "full"; + + if (screen) + fprintf(screen," using /omp styles with %s neighbor list builds\n", mode); + if (logfile) + fprintf(logfile," using /omp styles with %s neighbor list builds\n", mode); + } + } +#endif + + // allocate list for per thread accumulator manager class instances + // and then have each thread create an instance of this class to + // encourage the OS to use storage that is "close" to each thread's CPU. + thr = new ThrData *[nthreads]; + _nthr = nthreads; +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const int tid = get_tid(); + thr[tid] = new ThrData(tid); + } +} + +/* ---------------------------------------------------------------------- */ + +FixOMP::~FixOMP() +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const int tid = get_tid(); + delete thr[tid]; + } + delete[] thr; +} + +/* ---------------------------------------------------------------------- */ + +int FixOMP::setmask() +{ + // compatibility with USER-CUDA + // our fix doesn't need any data transfer. +#if defined(LMP_USER_CUDA) + if (lmp->cuda) { + int mask = 0; + mask |= PRE_FORCE_CUDA; + mask |= PRE_FORCE_RESPA; + mask |= MIN_PRE_FORCE; + return mask; + } +#endif + + int mask = 0; + mask |= PRE_FORCE; + mask |= PRE_FORCE_RESPA; + mask |= MIN_PRE_FORCE; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixOMP::init() +{ + if (strstr(update->integrate_style,"respa") != NULL) + error->all(FLERR,"Cannot use r-RESPA with /omp styles"); + + int check_hybrid; + last_pair_hybrid = NULL; + last_omp_style = NULL; + const char *last_omp_name = NULL; + const char *last_hybrid_name = NULL; + const char *last_force_name = NULL; + +// determine which is the last force style with OpenMP +// support as this is the one that has to reduce the forces + +#define CheckStyleForOMP(name) \ + check_hybrid = 0; \ + if (force->name) { \ + if ( (strcmp(force->name ## _style,"hybrid") == 0) || \ + (strcmp(force->name ## _style,"hybrid/overlay") == 0) ) \ + check_hybrid=1; \ + if (force->name->suffix_flag & Suffix::OMP) { \ + last_force_name = (const char *) #name; \ + last_omp_name = force->name ## _style; \ + last_omp_style = (void *) force->name; \ + } \ + } + +#define CheckHybridForOMP(name,Class) \ + if (check_hybrid) { \ + Class ## Hybrid *style = (Class ## Hybrid *) force->name; \ + for (int i=0; i < style->nstyles; i++) { \ + if (style->styles[i]->suffix_flag & Suffix::OMP) { \ + last_force_name = (const char *) #name; \ + last_omp_name = style->keywords[i]; \ + last_omp_style = style->styles[i]; \ + } \ + } \ + } + + CheckStyleForOMP(pair); + CheckHybridForOMP(pair,Pair); + if (check_hybrid) { + last_pair_hybrid = last_omp_style; + last_hybrid_name = last_omp_name; + } + + CheckStyleForOMP(bond); + CheckHybridForOMP(bond,Bond); + + CheckStyleForOMP(angle); + CheckHybridForOMP(angle,Angle); + + CheckStyleForOMP(dihedral); + CheckHybridForOMP(dihedral,Dihedral); + + CheckStyleForOMP(improper); + CheckHybridForOMP(improper,Improper); + + CheckStyleForOMP(kspace); + +#undef CheckStyleForOMP +#undef CheckHybridForOMP + set_neighbor_omp(); + + // diagnostic output + if (comm->me == 0) { + if (last_omp_style) { + if (last_pair_hybrid) { + if (screen) + fprintf(screen,"Hybrid pair style last /omp style %s\n", last_hybrid_name); + if (logfile) + fprintf(logfile,"Hybrid pair style last /omp style %s\n", last_hybrid_name); + } + if (screen) + fprintf(screen,"Last active /omp style is %s_style %s\n", + last_force_name, last_omp_name); + if (logfile) + fprintf(logfile,"Last active /omp style is %s_style %s\n", + last_force_name, last_omp_name); + } else { + if (screen) + fprintf(screen,"No /omp style for force computation currently active\n"); + if (logfile) + fprintf(screen,"No /omp style for force computation currently active\n"); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixOMP::set_neighbor_omp() +{ + // select or deselect multi-threaded neighbor + // list build depending on setting in package omp. + // NOTE: since we are at the top of the list of + // fixes, we cannot adjust neighbor lists from + // other fixes. those have to be re-implemented + // as /omp fix styles. :-( + + const int neigh_omp = _neighbor ? 1 : 0; + const int nrequest = neighbor->nrequest; + + for (int i = 0; i < nrequest; ++i) + neighbor->requests[i]->omp = neigh_omp; +} + +/* ---------------------------------------------------------------------- */ + +// adjust size and clear out per thread accumulator arrays +void FixOMP::pre_force(int) +{ + const int nall = atom->nlocal + atom->nghost; + + double **f = atom->f; + double **torque = atom->torque; + double *erforce = atom->erforce; + double *de = atom->de; + double *drho = atom->drho; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(f,torque,erforce,de,drho) +#endif + { + const int tid = get_tid(); + thr[tid]->check_tid(tid); + thr[tid]->init_force(nall,f,torque,erforce,de,drho); + } +} + +/* ---------------------------------------------------------------------- */ + +double FixOMP::memory_usage() +{ + double bytes = comm->nthreads * (sizeof(ThrData *) + sizeof(ThrData)); + bytes += comm->nthreads * thr[0]->memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h new file mode 100644 index 0000000000..d797b08418 --- /dev/null +++ b/src/USER-OMP/fix_omp.h @@ -0,0 +1,73 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(OMP,FixOMP) + +#else + +#ifndef LMP_FIX_OMP_H +#define LMP_FIX_OMP_H + +#include "fix.h" + + +namespace LAMMPS_NS { + +class ThrData; + +class FixOMP : public Fix { + friend class ThrOMP; + + public: + FixOMP(class LAMMPS *, int, char **); + virtual ~FixOMP(); + virtual int setmask(); + virtual void init(); + virtual void pre_force(int); + + virtual void setup_pre_force(int vflag) { pre_force(vflag); }; + virtual void min_setup_pre_force(int vflag) { pre_force(vflag); }; + virtual void min_pre_force(int vflag) { pre_force(vflag); }; + virtual void setup_pre_force_respa(int vflag,int) { pre_force(vflag); }; + virtual void pre_force_respa(int vflag,int,int) { pre_force(vflag); }; + + virtual double memory_usage(); + + ThrData *get_thr(int tid) { return thr[tid]; }; + int get_nthr() const { return _nthr; } + + protected: + ThrData **thr; + void *last_omp_style; // pointer to the style that needs + // to do the general force reduction + void *last_pair_hybrid; // pointer to the pair style that needs + // to call virial_fdot_compute() + + public: + bool get_neighbor() const {return _neighbor;}; + bool get_newton() const {return _newton;}; + + private: + int _nthr; // number of currently active ThrData object + bool _neighbor; // en/disable threads for neighbor list construction + bool _newton; // en/disable newton's 3rd law for local atoms. + + void set_neighbor_omp(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_peri_neigh_omp.cpp b/src/USER-OMP/fix_peri_neigh_omp.cpp new file mode 100644 index 0000000000..000278c8b7 --- /dev/null +++ b/src/USER-OMP/fix_peri_neigh_omp.cpp @@ -0,0 +1,50 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Parks (SNL) +------------------------------------------------------------------------- */ + +#include "fix_peri_neigh_omp.h" +#include "fix_omp.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_request.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +void FixPeriNeighOMP::init() +{ + if (!first) return; + + // determine status of neighbor flag of the omp package command + int ifix = modify->find_fix("package_omp"); + int use_omp = 0; + if (ifix >=0) { + FixOMP * fix = static_cast(lmp->modify->fix[ifix]); + if (fix->get_neighbor()) use_omp = 1; + } + + // need a full neighbor list once + + int irequest = neighbor->request((void *) this); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->omp = use_omp; + neighbor->requests[irequest]->occasional = 1; +} diff --git a/src/USER-OMP/fix_peri_neigh_omp.h b/src/USER-OMP/fix_peri_neigh_omp.h new file mode 100644 index 0000000000..6d91f39be1 --- /dev/null +++ b/src/USER-OMP/fix_peri_neigh_omp.h @@ -0,0 +1,38 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(PERI_NEIGH_OMP,FixPeriNeighOMP) + +#else + +#ifndef LMP_FIX_PERI_NEIGH_OMP_H +#define LMP_FIX_PERI_NEIGH_OMP_H + +#include "fix_peri_neigh.h" + +namespace LAMMPS_NS { + +class FixPeriNeighOMP : public FixPeriNeigh { + + public: + FixPeriNeighOMP(class LAMMPS *lmp, int narg, char **argv) : + FixPeriNeigh(lmp,narg,argv) {}; + virtual void init(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_qeq_comb_omp.cpp b/src/USER-OMP/fix_qeq_comb_omp.cpp new file mode 100644 index 0000000000..18b9b5305c --- /dev/null +++ b/src/USER-OMP/fix_qeq_comb_omp.cpp @@ -0,0 +1,192 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "fix_qeq_comb_omp.h" +#include "fix_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "group.h" +#include "memory.h" +#include "modify.h" +#include "error.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "respa.h" +#include "update.h" +#include "pair_comb_omp.h" + +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) + : FixQEQComb(lmp, narg, arg) +{ + if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEQCombOMP::init() +{ + if (!atom->q_flag) + error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q"); + + comb = (PairComb *) force->pair_match("comb/omp",1); + if (comb == NULL) + comb = (PairComb *) force->pair_match("comb",1); + if (comb == NULL) error->all(FLERR,"Must use pair_style comb or comb/omp with fix qeq/comb"); + + if (strstr(update->integrate_style,"respa")) + nlevels_respa = ((Respa *) update->integrate)->nlevels; + + ngroup = group->count(igroup); + if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms"); + + // determine status of neighbor flag of the omp package command + int ifix = modify->find_fix("package_omp"); + int use_omp = 0; + if (ifix >=0) { + FixOMP * fix = static_cast(lmp->modify->fix[ifix]); + if (fix->get_neighbor()) use_omp = 1; + } + + int irequest = neighbor->request(this); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->omp = use_omp; +} + +/* ---------------------------------------------------------------------- */ + +void FixQEQCombOMP::post_force(int vflag) +{ + int i,ii,iloop,loopmax,inum,*ilist; + double heatpq,qmass,dtq,dtq2; + double enegchkall,enegmaxall; + + if (update->ntimestep % nevery) return; + + // reallocate work arrays if necessary + // qf = charge force + // q1 = charge displacement + // q2 = tmp storage of charge force for next iteration + + if (atom->nmax > nmax) { + memory->destroy(qf); + memory->destroy(q1); + memory->destroy(q2); + nmax = atom->nmax; + memory->create(qf,nmax,"qeq:qf"); + memory->create(q1,nmax,"qeq:q1"); + memory->create(q2,nmax,"qeq:q2"); + vector_atom = qf; + } + + // more loops for first-time charge equilibrium + + iloop = 0; + if (firstflag) loopmax = 500; + else loopmax = 200; + + // charge-equilibration loop + + if (me == 0 && fp) + fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n", + update->ntimestep); + + heatpq = 0.05; + qmass = 0.016; + dtq = 0.01; + dtq2 = 0.5*dtq*dtq/qmass; + + double enegchk = 0.0; + double enegtot = 0.0; + double enegmax = 0.0; + + double *q = atom->q; + int *mask = atom->mask; + + inum = comb->list->inum; + ilist = comb->list->ilist; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + q1[i] = q2[i] = qf[i] = 0.0; + } + + for (iloop = 0; iloop < loopmax; iloop ++ ) { + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) { + q1[i] += qf[i]*dtq2 - heatpq*q1[i]; + q[i] += q1[i]; + } + } + comm->forward_comm_fix(this); + + if(comb) enegtot = comb->yasu_char(qf,igroup); + enegtot /= ngroup; + enegchk = enegmax = 0.0; + + for (ii = 0; ii < inum ; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) { + q2[i] = enegtot-qf[i]; + enegmax = MAX(enegmax,fabs(q2[i])); + enegchk += fabs(q2[i]); + qf[i] = q2[i]; + } + } + + MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world); + enegchk = enegchkall/ngroup; + MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world); + enegmax = enegmaxall; + + if (enegchk <= precision && enegmax <= 100.0*precision) break; + + if (me == 0 && fp) + fprintf(fp," iteration: %d, enegtot %.6g, " + "enegmax %.6g, fq deviation: %.6g\n", + iloop,enegtot,enegmax,enegchk); + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) + q1[i] += qf[i]*dtq2 - heatpq*q1[i]; + } + } + + if (me == 0 && fp) { + if (iloop == loopmax) + fprintf(fp,"Charges did not converge in %d iterations\n",iloop); + else + fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n", + iloop,enegchk); + } +} diff --git a/src/USER-OMP/fix_qeq_comb_omp.h b/src/USER-OMP/fix_qeq_comb_omp.h new file mode 100644 index 0000000000..0febe6b0aa --- /dev/null +++ b/src/USER-OMP/fix_qeq_comb_omp.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(qeq/comb/omp,FixQEQCombOMP) + +#else + +#ifndef LMP_FIX_QEQ_COMB_OMP_H +#define LMP_FIX_QEQ_COMB_OMP_H + +#include "fix_qeq_comb.h" + +namespace LAMMPS_NS { + +class FixQEQCombOMP : public FixQEQComb { + public: + FixQEQCombOMP(class LAMMPS *, int, char **); + virtual void init(); + virtual void post_force(int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp new file mode 100644 index 0000000000..d410f6363c --- /dev/null +++ b/src/USER-OMP/fix_shear_history_omp.cpp @@ -0,0 +1,152 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "string.h" +#include "stdio.h" +#include "fix_shear_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "force.h" +#include "pair.h" +#include "update.h" +#include "modify.h" +#include "error.h" + +#if defined(_OPENMP) +#include +#endif + +using namespace LAMMPS_NS; +using namespace FixConst; + +#define MAXTOUCH 15 + +/* ---------------------------------------------------------------------- + copy shear partner info from neighbor lists to atom arrays + so can be exchanged with atoms +------------------------------------------------------------------------- */ + +void FixShearHistoryOMP::pre_exchange() +{ + + const int nlocal = atom->nlocal; + const int nghost = atom->nghost; + const int nall = nlocal + nghost; + const int nthreads = comm->nthreads; + + int flag = 0; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(flag) +#endif + { + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + // each thread works on a fixed chunk of local and ghost atoms. + const int ldelta = 1 + nlocal/nthreads; + const int lfrom = tid*ldelta; + const int lmax = lfrom +ldelta; + const int lto = (lmax > nlocal) ? nlocal : lmax; + + const int gdelta = 1 + nghost/nthreads; + const int gfrom = nlocal + tid*gdelta; + const int gmax = gfrom + gdelta; + const int gto = (gmax > nall) ? nall : gmax; + + + int i,j,ii,jj,m,inum,jnum; + int *ilist,*jlist,*numneigh,**firstneigh; + int *touch,**firsttouch; + double *shear,*allshear,**firstshear; + + // zero npartners for all current atoms + + for (i = lfrom; i < lto; i++) npartner[i] = 0; + + // copy shear info from neighbor list atoms to atom arrays + + int *tag = atom->tag; + + NeighList *list = pair->list; + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + firsttouch = list->listgranhistory->firstneigh; + firstshear = list->listgranhistory->firstdouble; + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + jlist = firstneigh[i]; + allshear = firstshear[i]; + jnum = numneigh[i]; + touch = firsttouch[i]; + + for (jj = 0; jj < jnum; jj++) { + if (touch[jj]) { + j = jlist[jj]; + j &= NEIGHMASK; + shear = &allshear[3*jj]; + + if ((i >= lfrom) && (i < lto)) { + if (npartner[i] < MAXTOUCH) { + m = npartner[i]; + partner[i][m] = tag[j]; + shearpartner[i][m][0] = shear[0]; + shearpartner[i][m][1] = shear[1]; + shearpartner[i][m][2] = shear[2]; + } + npartner[i]++; + } + + if ((j >= lfrom) && (j < lto)) { + if (npartner[j] < MAXTOUCH) { + m = npartner[j]; + partner[j][m] = tag[i]; + shearpartner[j][m][0] = -shear[0]; + shearpartner[j][m][1] = -shear[1]; + shearpartner[j][m][2] = -shear[2]; + } + npartner[j]++; + } + + if ((j >= gfrom) && (j < gto)) { + npartner[j]++; + } + } + } + } + + // test for too many touching neighbors + int myflag = 0; + for (i = lfrom; i < lto; i++) + if (npartner[i] >= MAXTOUCH) myflag = 1; + + if (myflag) +#if defined(_OPENMP) +#pragma omp atomic +#endif + ++flag; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) + error->all(FLERR,"Too many touching neighbors - boost MAXTOUCH"); +} diff --git a/src/USER-OMP/fix_shear_history_omp.h b/src/USER-OMP/fix_shear_history_omp.h new file mode 100644 index 0000000000..aa581ff6a7 --- /dev/null +++ b/src/USER-OMP/fix_shear_history_omp.h @@ -0,0 +1,38 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(SHEAR_HISTORY/omp,FixShearHistoryOMP) + +#else + +#ifndef LMP_FIX_SHEAR_HISTORY_OMP_H +#define LMP_FIX_SHEAR_HISTORY_OMP_H + +#include "fix_shear_history.h" + +namespace LAMMPS_NS { + +class FixShearHistoryOMP : public FixShearHistory { + + public: + FixShearHistoryOMP(class LAMMPS *lmp, int narg, char **argv) + : FixShearHistory(lmp,narg,argv) {}; + virtual void pre_exchange(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_wall_gran_omp.cpp b/src/USER-OMP/fix_wall_gran_omp.cpp new file mode 100644 index 0000000000..e131acc0fa --- /dev/null +++ b/src/USER-OMP/fix_wall_gran_omp.cpp @@ -0,0 +1,148 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "fix_wall_gran_omp.h" +#include "atom.h" +#include "update.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{XPLANE=0,YPLANE=1,ZPLANE=2,ZCYLINDER}; // XYZ PLANE need to be 0,1,2 +enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY}; + +#define BIG 1.0e20 + +/* ---------------------------------------------------------------------- */ + +FixWallGranOMP::FixWallGranOMP(LAMMPS *lmp, int narg, char **arg) : + FixWallGran(lmp, narg, arg) { } + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOMP::post_force(int vflag) +{ + double vwall[3]; + + // set position of wall to initial settings and velocity to 0.0 + // if wiggle or shear, set wall position and velocity accordingly + + double wlo = lo; + double whi = hi; + vwall[0] = vwall[1] = vwall[2] = 0.0; + if (wiggle) { + double arg = omega * (update->ntimestep - time_origin) * dt; + if (wallstyle == axis) { + wlo = lo + amplitude - amplitude*cos(arg); + whi = hi + amplitude - amplitude*cos(arg); + } + vwall[axis] = amplitude*omega*sin(arg); + } else if (wshear) vwall[axis] = vshear; + + // loop over all my atoms + // rsq = distance from wall + // dx,dy,dz = signed distance from wall + // for rotating cylinder, reset vwall based on particle position + // skip atom if not close enough to wall + // if wall was set to NULL, it's skipped since lo/hi are infinity + // compute force and torque on atom if close enough to wall + // via wall potential matched to pair potential + // set shear if pair potential stores history + + double * const * const x = atom->x; + double * const * const v = atom->v; + double * const * const f = atom->f; + double * const * const omega = atom->omega; + double * const * const torque = atom->torque; + double * const radius = atom->radius; + double * const rmass = atom->rmass; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + + shearupdate = (update->setupflag) ? 0 : 1; + + int i; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) firstprivate(vwall,wlo,whi) +#endif + for (i = 0; i < nlocal; i++) { + + if (mask[i] & groupbit) { + double dx,dy,dz,del1,del2,delxy,delr,rsq; + + dx = dy = dz = 0.0; + + if (wallstyle == XPLANE) { + del1 = x[i][0] - wlo; + del2 = whi - x[i][0]; + if (del1 < del2) dx = del1; + else dx = -del2; + } else if (wallstyle == YPLANE) { + del1 = x[i][1] - wlo; + del2 = whi - x[i][1]; + if (del1 < del2) dy = del1; + else dy = -del2; + } else if (wallstyle == ZPLANE) { + del1 = x[i][2] - wlo; + del2 = whi - x[i][2]; + if (del1 < del2) dz = del1; + else dz = -del2; + } else if (wallstyle == ZCYLINDER) { + delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]); + delr = cylradius - delxy; + if (delr > radius[i]) dz = cylradius; + else { + dx = -delr/delxy * x[i][0]; + dy = -delr/delxy * x[i][1]; + if (wshear && axis != 2) { + vwall[0] = vshear * x[i][1]/delxy; + vwall[1] = -vshear * x[i][0]/delxy; + vwall[2] = 0.0; + } + } + } + + rsq = dx*dx + dy*dy + dz*dz; + + if (rsq > radius[i]*radius[i]) { + if (pairstyle != HOOKE) { + shear[i][0] = 0.0; + shear[i][1] = 0.0; + shear[i][2] = 0.0; + } + } else { + if (pairstyle == HOOKE) + hooke(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i], + radius[i],rmass[i]); + else if (pairstyle == HOOKE_HISTORY) + hooke_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i], + radius[i],rmass[i],shear[i]); + else if (pairstyle == HERTZ_HISTORY) + hertz_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i], + radius[i],rmass[i],shear[i]); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOMP::post_force_respa(int vflag, int ilevel, int iloop) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} diff --git a/src/USER-OMP/fix_wall_gran_omp.h b/src/USER-OMP/fix_wall_gran_omp.h new file mode 100644 index 0000000000..cd26a7b40a --- /dev/null +++ b/src/USER-OMP/fix_wall_gran_omp.h @@ -0,0 +1,38 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(wall/gran/omp,FixWallGranOMP) + +#else + +#ifndef LMP_FIX_WALL_GRAN_OMP_H +#define LMP_FIX_WALL_GRAN_OMP_H + +#include "fix_wall_gran.h" + +namespace LAMMPS_NS { + +class FixWallGranOMP : public FixWallGran { + + public: + FixWallGranOMP(class LAMMPS *, int, char **); + virtual void post_force(int); + virtual void post_force_respa(int, int, int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/hack_openmp_for_pgi.sh b/src/USER-OMP/hack_openmp_for_pgi.sh new file mode 100644 index 0000000000..a076fd1fcb --- /dev/null +++ b/src/USER-OMP/hack_openmp_for_pgi.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +for f in *.h *.cpp +do \ + sed -e '/#pragma omp/s/^\(.*default\)(none)\(.*\)$/\1(shared)\2/' \ + -e '/#pragma omp/s/shared([a-z0-9,_]\+)//' \ + -i.bak $f +done diff --git a/src/USER-OMP/improper_class2_omp.cpp b/src/USER-OMP/improper_class2_omp.cpp new file mode 100644 index 0000000000..08f4a21e00 --- /dev/null +++ b/src/USER-OMP/improper_class2_omp.cpp @@ -0,0 +1,698 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperClass2OMP::ImproperClass2OMP(class LAMMPS *lmp) + : ImproperClass2(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,i,j,k,n,type; + double eimproper; + double delr[3][3],rmag[3],rinvmag[3],rmag2[3]; + double theta[3],costheta[3],sintheta[3]; + double cossqtheta[3],sinsqtheta[3],invstheta[3]; + double rABxrCB[3],rDBxrAB[3],rCBxrDB[3]; + double ddelr[3][4],dr[3][4][3],dinvr[3][4][3]; + double dthetadr[3][4][3],dinvsth[3][4][3]; + double dinv3r[4][3],dinvs3r[3][4][3]; + double drCBxrDB[3],rCBxdrDB[3],drDBxrAB[3],rDBxdrAB[3]; + double drABxrCB[3],rABxdrCB[3]; + double dot1,dot2,dd[3]; + double fdot[3][4][3],ftmp,invs3r[3],inv3r; + double t,tt1,tt3,sc1; + double dotCBDBAB,dotDBABCB,dotABCBDB; + double chi,deltachi,d2chi,cossin2; + double drAB[3][4][3],drCB[3][4][3],drDB[3][4][3]; + double dchi[3][4][3],dtotalchi[4][3]; + double schiABCD,chiABCD,schiCBDA,chiCBDA,schiDBAC,chiDBAC; + double fabcd[4][3]; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) { + dthetadr[i][j][k] = 0.0; + drAB[i][j][k] = 0.0; + drCB[i][j][k] = 0.0; + drDB[i][j][k] = 0.0; + } + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + if (k0[type] == 0.0) continue; + + // difference vectors + + delr[0][0] = x[i1][0] - x[i2][0]; + delr[0][1] = x[i1][1] - x[i2][1]; + delr[0][2] = x[i1][2] - x[i2][2]; + + delr[1][0] = x[i3][0] - x[i2][0]; + delr[1][1] = x[i3][1] - x[i2][1]; + delr[1][2] = x[i3][2] - x[i2][2]; + + delr[2][0] = x[i4][0] - x[i2][0]; + delr[2][1] = x[i4][1] - x[i2][1]; + delr[2][2] = x[i4][2] - x[i2][2]; + + // bond lengths and associated values + + for (i = 0; i < 3; i++) { + rmag2[i] = delr[i][0]*delr[i][0] + delr[i][1]*delr[i][1] + + delr[i][2]*delr[i][2]; + rmag[i] = sqrt(rmag2[i]); + rinvmag[i] = 1.0/rmag[i]; + } + + // angle ABC, CBD, ABD + + costheta[0] = (delr[0][0]*delr[1][0] + delr[0][1]*delr[1][1] + + delr[0][2]*delr[1][2]) / (rmag[0]*rmag[1]); + costheta[1] = (delr[1][0]*delr[2][0] + delr[1][1]*delr[2][1] + + delr[1][2]*delr[2][2]) / (rmag[1]*rmag[2]); + costheta[2] = (delr[0][0]*delr[2][0] + delr[0][1]*delr[2][1] + + delr[0][2]*delr[2][2]) / (rmag[0]*rmag[2]); + + // angle error check + + for (i = 0; i < 3; i++) { + if (costheta[i] == -1.0) { + int me = comm->me; + if (screen) { + char str[128]; + sprintf(str, + "Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me, thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + } + + for (i = 0; i < 3; i++) { + if (costheta[i] > 1.0) costheta[i] = 1.0; + if (costheta[i] < -1.0) costheta[i] = -1.0; + theta[i] = acos(costheta[i]); + cossqtheta[i] = costheta[i]*costheta[i]; + sintheta[i] = sin(theta[i]); + invstheta[i] = 1.0/sintheta[i]; + sinsqtheta[i] = sintheta[i]*sintheta[i]; + } + + // cross & dot products + + cross(delr[0],delr[1],rABxrCB); + cross(delr[2],delr[0],rDBxrAB); + cross(delr[1],delr[2],rCBxrDB); + + dotCBDBAB = dot(rCBxrDB,delr[0]); + dotDBABCB = dot(rDBxrAB,delr[1]); + dotABCBDB = dot(rABxrCB,delr[2]); + + t = rmag[0] * rmag[1] * rmag[2]; + inv3r = 1.0/t; + invs3r[0] = invstheta[1] * inv3r; + invs3r[1] = invstheta[2] * inv3r; + invs3r[2] = invstheta[0] * inv3r; + + // chi ABCD, CBDA, DBAC + // final chi is average of three + + schiABCD = dotCBDBAB * invs3r[0]; + chiABCD = asin(schiABCD); + schiCBDA = dotDBABCB * invs3r[1]; + chiCBDA = asin(schiCBDA); + schiDBAC = dotABCBDB * invs3r[2]; + chiDBAC = asin(schiDBAC); + + chi = (chiABCD + chiCBDA + chiDBAC) / 3.0; + deltachi = chi - chi0[type]; + d2chi = deltachi * deltachi; + + // energy + + if (EFLAG) eimproper = k0[type]*d2chi; + + // forces + // define d(delr) + // i = bond AB/CB/DB, j = atom A/B/C/D + + ddelr[0][0] = 1.0; + ddelr[0][1] = -1.0; + ddelr[0][2] = 0.0; + ddelr[0][3] = 0.0; + ddelr[1][0] = 0.0; + ddelr[1][1] = -1.0; + ddelr[1][2] = 1.0; + ddelr[1][3] = 0.0; + ddelr[2][0] = 0.0; + ddelr[2][1] = -1.0; + ddelr[2][2] = 0.0; + ddelr[2][3] = 1.0; + + // compute d(|r|)/dr and d(1/|r|)/dr for each direction, bond and atom + // define d(r) for each r + // i = bond AB/CB/DB, j = atom A/B/C/D, k = X/Y/Z + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) { + dr[i][j][k] = delr[i][k] * ddelr[i][j] / rmag[i]; + dinvr[i][j][k] = -dr[i][j][k] / rmag2[i]; + } + + // compute d(1 / (|r_AB| * |r_CB| * |r_DB|) / dr + // i = atom A/B/C/D, j = X/Y/Z + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + dinv3r[i][j] = rinvmag[1] * (rinvmag[2] * dinvr[0][i][j] + + rinvmag[0] * dinvr[2][i][j]) + + rinvmag[2] * rinvmag[0] * dinvr[1][i][j]; + + // compute d(theta)/d(r) for 3 angles + // angleABC + + tt1 = costheta[0] / rmag2[0]; + tt3 = costheta[0] / rmag2[1]; + sc1 = 1.0 / sqrt(1.0 - cossqtheta[0]); + + dthetadr[0][0][0] = sc1 * ((tt1 * delr[0][0]) - + (delr[1][0] * rinvmag[0] * rinvmag[1])); + dthetadr[0][0][1] = sc1 * ((tt1 * delr[0][1]) - + (delr[1][1] * rinvmag[0] * rinvmag[1])); + dthetadr[0][0][2] = sc1 * ((tt1 * delr[0][2]) - + (delr[1][2] * rinvmag[0] * rinvmag[1])); + dthetadr[0][1][0] = -sc1 * ((tt1 * delr[0][0]) - + (delr[1][0] * rinvmag[0] * rinvmag[1]) + + (tt3 * delr[1][0]) - + (delr[0][0] * rinvmag[0] * rinvmag[1])); + dthetadr[0][1][1] = -sc1 * ((tt1 * delr[0][1]) - + (delr[1][1] * rinvmag[0] * rinvmag[1]) + + (tt3 * delr[1][1]) - + (delr[0][1] * rinvmag[0] * rinvmag[1])); + dthetadr[0][1][2] = -sc1 * ((tt1 * delr[0][2]) - + (delr[1][2] * rinvmag[0] * rinvmag[1]) + + (tt3 * delr[1][2]) - + (delr[0][2] * rinvmag[0] * rinvmag[1])); + dthetadr[0][2][0] = sc1 * ((tt3 * delr[1][0]) - + (delr[0][0] * rinvmag[0] * rinvmag[1])); + dthetadr[0][2][1] = sc1 * ((tt3 * delr[1][1]) - + (delr[0][1] * rinvmag[0] * rinvmag[1])); + dthetadr[0][2][2] = sc1 * ((tt3 * delr[1][2]) - + (delr[0][2] * rinvmag[0] * rinvmag[1])); + + // angleCBD + + tt1 = costheta[1] / rmag2[1]; + tt3 = costheta[1] / rmag2[2]; + sc1 = 1.0 / sqrt(1.0 - cossqtheta[1]); + + dthetadr[1][2][0] = sc1 * ((tt1 * delr[1][0]) - + (delr[2][0] * rinvmag[1] * rinvmag[2])); + dthetadr[1][2][1] = sc1 * ((tt1 * delr[1][1]) - + (delr[2][1] * rinvmag[1] * rinvmag[2])); + dthetadr[1][2][2] = sc1 * ((tt1 * delr[1][2]) - + (delr[2][2] * rinvmag[1] * rinvmag[2])); + dthetadr[1][1][0] = -sc1 * ((tt1 * delr[1][0]) - + (delr[2][0] * rinvmag[1] * rinvmag[2]) + + (tt3 * delr[2][0]) - + (delr[1][0] * rinvmag[2] * rinvmag[1])); + dthetadr[1][1][1] = -sc1 * ((tt1 * delr[1][1]) - + (delr[2][1] * rinvmag[1] * rinvmag[2]) + + (tt3 * delr[2][1]) - + (delr[1][1] * rinvmag[2] * rinvmag[1])); + dthetadr[1][1][2] = -sc1 * ((tt1 * delr[1][2]) - + (delr[2][2] * rinvmag[1] * rinvmag[2]) + + (tt3 * delr[2][2]) - + (delr[1][2] * rinvmag[2] * rinvmag[1])); + dthetadr[1][3][0] = sc1 * ((tt3 * delr[2][0]) - + (delr[1][0] * rinvmag[2] * rinvmag[1])); + dthetadr[1][3][1] = sc1 * ((tt3 * delr[2][1]) - + (delr[1][1] * rinvmag[2] * rinvmag[1])); + dthetadr[1][3][2] = sc1 * ((tt3 * delr[2][2]) - + (delr[1][2] * rinvmag[2] * rinvmag[1])); + + // angleABD + + tt1 = costheta[2] / rmag2[0]; + tt3 = costheta[2] / rmag2[2]; + sc1 = 1.0 / sqrt(1.0 - cossqtheta[2]); + + dthetadr[2][0][0] = sc1 * ((tt1 * delr[0][0]) - + (delr[2][0] * rinvmag[0] * rinvmag[2])); + dthetadr[2][0][1] = sc1 * ((tt1 * delr[0][1]) - + (delr[2][1] * rinvmag[0] * rinvmag[2])); + dthetadr[2][0][2] = sc1 * ((tt1 * delr[0][2]) - + (delr[2][2] * rinvmag[0] * rinvmag[2])); + dthetadr[2][1][0] = -sc1 * ((tt1 * delr[0][0]) - + (delr[2][0] * rinvmag[0] * rinvmag[2]) + + (tt3 * delr[2][0]) - + (delr[0][0] * rinvmag[2] * rinvmag[0])); + dthetadr[2][1][1] = -sc1 * ((tt1 * delr[0][1]) - + (delr[2][1] * rinvmag[0] * rinvmag[2]) + + (tt3 * delr[2][1]) - + (delr[0][1] * rinvmag[2] * rinvmag[0])); + dthetadr[2][1][2] = -sc1 * ((tt1 * delr[0][2]) - + (delr[2][2] * rinvmag[0] * rinvmag[2]) + + (tt3 * delr[2][2]) - + (delr[0][2] * rinvmag[2] * rinvmag[0])); + dthetadr[2][3][0] = sc1 * ((tt3 * delr[2][0]) - + (delr[0][0] * rinvmag[2] * rinvmag[0])); + dthetadr[2][3][1] = sc1 * ((tt3 * delr[2][1]) - + (delr[0][1] * rinvmag[2] * rinvmag[0])); + dthetadr[2][3][2] = sc1 * ((tt3 * delr[2][2]) - + (delr[0][2] * rinvmag[2] * rinvmag[0])); + + // compute d( 1 / sin(theta))/dr + // i = angle, j = atom, k = direction + + for (i = 0; i < 3; i++) { + cossin2 = -costheta[i] / sinsqtheta[i]; + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dinvsth[i][j][k] = cossin2 * dthetadr[i][j][k]; + } + + // compute d(1 / sin(theta) * |r_AB| * |r_CB| * |r_DB|)/dr + // i = angle, j = atom + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) { + dinvs3r[0][i][j] = (invstheta[1] * dinv3r[i][j]) + + (inv3r * dinvsth[1][i][j]); + dinvs3r[1][i][j] = (invstheta[2] * dinv3r[i][j]) + + (inv3r * dinvsth[2][i][j]); + dinvs3r[2][i][j] = (invstheta[0] * dinv3r[i][j]) + + (inv3r * dinvsth[0][i][j]); + } + + // drCB(i,j,k), etc + // i = vector X'/Y'/Z', j = atom A/B/C/D, k = direction X/Y/Z + + for (i = 0; i < 3; i++) { + drCB[i][1][i] = -1.0; + drAB[i][1][i] = -1.0; + drDB[i][1][i] = -1.0; + drDB[i][3][i] = 1.0; + drCB[i][2][i] = 1.0; + drAB[i][0][i] = 1.0; + } + + // d((r_CB x r_DB) dot r_AB) + // r_CB x d(r_DB) + // d(r_CB) x r_DB + // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) + // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) dot r_AB + // d(r_AB) dot (r_CB x r_DB) + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) { + cross(delr[1],drDB[i][j],rCBxdrDB); + cross(drCB[i][j],delr[2],drCBxrDB); + for (k = 0; k < 3; k++) dd[k] = rCBxdrDB[k] + drCBxrDB[k]; + dot1 = dot(dd,delr[0]); + dot2 = dot(rCBxrDB,drAB[i][j]); + fdot[0][j][i] = dot1 + dot2; + } + + // d((r_DB x r_AB) dot r_CB) + // r_DB x d(r_AB) + // d(r_DB) x r_AB + // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) + // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) dot r_CB + // d(r_CB) dot (r_DB x r_AB) + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) { + cross(delr[2],drAB[i][j],rDBxdrAB); + cross(drDB[i][j],delr[0],drDBxrAB); + for (k = 0; k < 3; k++) dd[k] = rDBxdrAB[k] + drDBxrAB[k]; + dot1 = dot(dd,delr[1]); + dot2 = dot(rDBxrAB,drCB[i][j]); + fdot[1][j][i] = dot1 + dot2; + } + + // d((r_AB x r_CB) dot r_DB) + // r_AB x d(r_CB) + // d(r_AB) x r_CB + // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) + // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) dot r_DB + // d(r_DB) dot (r_AB x r_CB) + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) { + cross(delr[0],drCB[i][j],rABxdrCB); + cross(drAB[i][j],delr[1],drABxrCB); + for (k = 0; k < 3; k++) dd[k] = rABxdrCB[k] + drABxrCB[k]; + dot1 = dot(dd,delr[2]); + dot2 = dot(rABxrCB,drDB[i][j]); + fdot[2][j][i] = dot1 + dot2; + } + + // force on each atom + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) { + ftmp = (fdot[0][i][j] * invs3r[0]) + + (dinvs3r[0][i][j] * dotCBDBAB); + dchi[0][i][j] = ftmp / cos(chiABCD); + ftmp = (fdot[1][i][j] * invs3r[1]) + + (dinvs3r[1][i][j] * dotDBABCB); + dchi[1][i][j] = ftmp / cos(chiCBDA); + ftmp = (fdot[2][i][j] * invs3r[2]) + + (dinvs3r[2][i][j] * dotABCBDB); + dchi[2][i][j] = ftmp / cos(chiDBAC); + dtotalchi[i][j] = (dchi[0][i][j]+dchi[1][i][j]+dchi[2][i][j]) / 3.0; + } + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] = -2.0*k0[type] * deltachi*dtotalchi[i][j]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += fabcd[0][0]; + f[i1][1] += fabcd[0][1]; + f[i1][2] += fabcd[0][2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += fabcd[1][0]; + f[i2][1] += fabcd[1][1]; + f[i2][2] += fabcd[1][2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += fabcd[2][0]; + f[i3][1] += fabcd[2][1]; + f[i3][2] += fabcd[2][2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += fabcd[3][0]; + f[i4][1] += fabcd[3][1]; + f[i4][2] += fabcd[3][2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper, + fabcd[0],fabcd[2],fabcd[3], + delr[0][0],delr[0][1],delr[0][2], + delr[1][0],delr[1][1],delr[1][2], + delr[2][0]-delr[1][0],delr[2][1]-delr[1][1], + delr[2][2]-delr[1][2],thr); + } + + // compute angle-angle interactions + angleangle_thr(nfrom,nto,thr); +} + +/* ---------------------------------------------------------------------- + angle-angle interactions within improper +------------------------------------------------------------------------- */ +template +void ImproperClass2OMP::angleangle_thr(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,i,j,k,n,type; + double eimproper; + double delxAB,delyAB,delzAB,rABmag2,rAB; + double delxBC,delyBC,delzBC,rBCmag2,rBC; + double delxBD,delyBD,delzBD,rBDmag2,rBD; + double costhABC,thetaABC,costhABD; + double thetaABD,costhCBD,thetaCBD,dthABC,dthCBD,dthABD; + double sc1,t1,t3,r12; + double dthetadr[3][4][3],fabcd[4][3]; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // difference vectors + + delxAB = x[i1][0] - x[i2][0]; + delyAB = x[i1][1] - x[i2][1]; + delzAB = x[i1][2] - x[i2][2]; + + delxBC = x[i3][0] - x[i2][0]; + delyBC = x[i3][1] - x[i2][1]; + delzBC = x[i3][2] - x[i2][2]; + + delxBD = x[i4][0] - x[i2][0]; + delyBD = x[i4][1] - x[i2][1]; + delzBD = x[i4][2] - x[i2][2]; + + // bond lengths + + rABmag2 = delxAB*delxAB + delyAB*delyAB + delzAB*delzAB; + rAB = sqrt(rABmag2); + rBCmag2 = delxBC*delxBC + delyBC*delyBC + delzBC*delzBC; + rBC = sqrt(rBCmag2); + rBDmag2 = delxBD*delxBD + delyBD*delyBD + delzBD*delzBD; + rBD = sqrt(rBDmag2); + + // angle ABC, ABD, CBD + + costhABC = (delxAB*delxBC + delyAB*delyBC + delzAB*delzBC) / (rAB * rBC); + if (costhABC > 1.0) costhABC = 1.0; + if (costhABC < -1.0) costhABC = -1.0; + thetaABC = acos(costhABC); + + costhABD = (delxAB*delxBD + delyAB*delyBD + delzAB*delzBD) / (rAB * rBD); + if (costhABD > 1.0) costhABD = 1.0; + if (costhABD < -1.0) costhABD = -1.0; + thetaABD = acos(costhABD); + + costhCBD = (delxBC*delxBD + delyBC*delyBD + delzBC*delzBD) /(rBC * rBD); + if (costhCBD > 1.0) costhCBD = 1.0; + if (costhCBD < -1.0) costhCBD = -1.0; + thetaCBD = acos(costhCBD); + + dthABC = thetaABC - aa_theta0_1[type]; + dthABD = thetaABD - aa_theta0_2[type]; + dthCBD = thetaCBD - aa_theta0_3[type]; + + // energy + + if (EFLAG) eimproper = aa_k2[type] * dthABC * dthABD + + aa_k1[type] * dthABC * dthCBD + + aa_k3[type] * dthABD * dthCBD; + + // d(theta)/d(r) array + // angle i, atom j, coordinate k + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dthetadr[i][j][k] = 0.0; + + // angle ABC + + sc1 = sqrt(1.0/(1.0 - costhABC*costhABC)); + t1 = costhABC / rABmag2; + t3 = costhABC / rBCmag2; + r12 = 1.0 / (rAB * rBC); + + dthetadr[0][0][0] = sc1 * ((t1 * delxAB) - (delxBC * r12)); + dthetadr[0][0][1] = sc1 * ((t1 * delyAB) - (delyBC * r12)); + dthetadr[0][0][2] = sc1 * ((t1 * delzAB) - (delzBC * r12)); + dthetadr[0][1][0] = sc1 * ((-t1 * delxAB) + (delxBC * r12) + + (-t3 * delxBC) + (delxAB * r12)); + dthetadr[0][1][1] = sc1 * ((-t1 * delyAB) + (delyBC * r12) + + (-t3 * delyBC) + (delyAB * r12)); + dthetadr[0][1][2] = sc1 * ((-t1 * delzAB) + (delzBC * r12) + + (-t3 * delzBC) + (delzAB * r12)); + dthetadr[0][2][0] = sc1 * ((t3 * delxBC) - (delxAB * r12)); + dthetadr[0][2][1] = sc1 * ((t3 * delyBC) - (delyAB * r12)); + dthetadr[0][2][2] = sc1 * ((t3 * delzBC) - (delzAB * r12)); + + // angle CBD + + sc1 = sqrt(1.0/(1.0 - costhCBD*costhCBD)); + t1 = costhCBD / rBCmag2; + t3 = costhCBD / rBDmag2; + r12 = 1.0 / (rBC * rBD); + + dthetadr[1][2][0] = sc1 * ((t1 * delxBC) - (delxBD * r12)); + dthetadr[1][2][1] = sc1 * ((t1 * delyBC) - (delyBD * r12)); + dthetadr[1][2][2] = sc1 * ((t1 * delzBC) - (delzBD * r12)); + dthetadr[1][1][0] = sc1 * ((-t1 * delxBC) + (delxBD * r12) + + (-t3 * delxBD) + (delxBC * r12)); + dthetadr[1][1][1] = sc1 * ((-t1 * delyBC) + (delyBD * r12) + + (-t3 * delyBD) + (delyBC * r12)); + dthetadr[1][1][2] = sc1 * ((-t1 * delzBC) + (delzBD * r12) + + (-t3 * delzBD) + (delzBC * r12)); + dthetadr[1][3][0] = sc1 * ((t3 * delxBD) - (delxBC * r12)); + dthetadr[1][3][1] = sc1 * ((t3 * delyBD) - (delyBC * r12)); + dthetadr[1][3][2] = sc1 * ((t3 * delzBD) - (delzBC * r12)); + + // angle ABD + + sc1 = sqrt(1.0/(1.0 - costhABD*costhABD)); + t1 = costhABD / rABmag2; + t3 = costhABD / rBDmag2; + r12 = 1.0 / (rAB * rBD); + + dthetadr[2][0][0] = sc1 * ((t1 * delxAB) - (delxBD * r12)); + dthetadr[2][0][1] = sc1 * ((t1 * delyAB) - (delyBD * r12)); + dthetadr[2][0][2] = sc1 * ((t1 * delzAB) - (delzBD * r12)); + dthetadr[2][1][0] = sc1 * ((-t1 * delxAB) + (delxBD * r12) + + (-t3 * delxBD) + (delxAB * r12)); + dthetadr[2][1][1] = sc1 * ((-t1 * delyAB) + (delyBD * r12) + + (-t3 * delyBD) + (delyAB * r12)); + dthetadr[2][1][2] = sc1 * ((-t1 * delzAB) + (delzBD * r12) + + (-t3 * delzBD) + (delzAB * r12)); + dthetadr[2][3][0] = sc1 * ((t3 * delxBD) - (delxAB * r12)); + dthetadr[2][3][1] = sc1 * ((t3 * delyBD) - (delyAB * r12)); + dthetadr[2][3][2] = sc1 * ((t3 * delzBD) - (delzAB * r12)); + + // angleangle forces + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] = - + ((aa_k1[type] * + (dthABC*dthetadr[1][i][j] + dthCBD*dthetadr[0][i][j])) + + (aa_k2[type] * + (dthABC*dthetadr[2][i][j] + dthABD*dthetadr[0][i][j])) + + (aa_k3[type] * + (dthABD*dthetadr[1][i][j] + dthCBD*dthetadr[2][i][j]))); + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += fabcd[0][0]; + f[i1][1] += fabcd[0][1]; + f[i1][2] += fabcd[0][2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += fabcd[1][0]; + f[i2][1] += fabcd[1][1]; + f[i2][2] += fabcd[1][2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += fabcd[2][0]; + f[i3][1] += fabcd[2][1]; + f[i3][2] += fabcd[2][2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += fabcd[3][0]; + f[i4][1] += fabcd[3][1]; + f[i4][2] += fabcd[3][2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper, + fabcd[0],fabcd[2],fabcd[3],delxAB,delyAB,delzAB, + delxBC,delyBC,delzBC,delxBD,delyBD,delzBD,thr); + } +} diff --git a/src/USER-OMP/improper_class2_omp.h b/src/USER-OMP/improper_class2_omp.h new file mode 100644 index 0000000000..6f668a0078 --- /dev/null +++ b/src/USER-OMP/improper_class2_omp.h @@ -0,0 +1,50 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(class2/omp,ImproperClass2OMP) + +#else + +#ifndef LMP_IMPROPER_CLASS2_OMP_H +#define LMP_IMPROPER_CLASS2_OMP_H + +#include "improper_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperClass2OMP : public ImproperClass2, public ThrOMP { + + public: + ImproperClass2OMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + template + void angleangle_thr(int, int, ThrData * const thr); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_cossq_omp.cpp b/src/USER-OMP/improper_cossq_omp.cpp new file mode 100644 index 0000000000..c49cd0b3ce --- /dev/null +++ b/src/USER-OMP/improper_cossq_omp.cpp @@ -0,0 +1,267 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_cossq_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperCossqOMP::ImproperCossqOMP(class LAMMPS *lmp) + : ImproperCossq(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperCossqOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperCossqOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double rjisq, rji, rlksq, rlk, cosphi, angfac; + double cjiji, clkji, clklk, cfact1, cfact2, cfact3; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + /* separation vector between i1 and i2, (i2-i1) */ + vb1x = x[i2][0] - x[i1][0]; + vb1y = x[i2][1] - x[i1][1]; + vb1z = x[i2][2] - x[i1][2]; + rjisq = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z ; + rji = sqrt(rjisq); + + /* separation vector between i2 and i3 (i3-i2) */ + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + /* separation vector between i3 and i4, (i4-i3) */ + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + rlksq = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z ; + rlk = sqrt(rlksq); + + cosphi = (vb3x*vb1x + vb3y*vb1y + vb3z*vb1z)/(rji * rlk); + + /* Check that cos(phi) is in the correct limits. */ + if (cosphi > 1.0 + TOLERANCE || cosphi < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + + + /* Apply corrections to round-off errors. */ + if (cosphi > 1.0) cosphi -= SMALL; + if (cosphi < -1.0) cosphi += SMALL; + + /* Calculate the angle: */ + double torangle = acos(cosphi); + cosphi = cos(torangle - chi[type]); + + if (EFLAG) eimproper = 0.5 * k[type] * cosphi * cosphi; + + /* + printf("The tags: %d-%d-%d-%d, of type %d .\n",atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4],type); + printf("The ji vector: %f, %f, %f.\nThe lk vector: %f, %f, %f.\n", vb1x,vb1y,vb1z,vb3x,vb3y,vb3z); + printf("The cosine of the angle: %-1.16e.\n", cosphi); + printf("The energy of the improper: %-1.16e with prefactor %-1.16e.\n", eimproper, 0.5*k[type]); + */ + + /* Work out forces. */ + angfac = - k[type] * cosphi; + + cjiji = rjisq; + clklk = rlksq; + /*CLKJI = RXLK * RXJI + RYLK * RYJI + RZLK * RZJI */ + clkji = vb3x*vb1x + vb3y*vb1y + vb3z*vb1z; + + /*CFACT1 = CLKLK * CJIJI + CFACT1 = SQRT(CFACT1) + CFACT1 = ANGFAC / CFACT1*/ + cfact1 = angfac / sqrt(clklk * cjiji); + /*CFACT2 = CLKJI / CLKLK*/ + cfact2 = clkji / clklk; + /*CFACT3 = CLKJI / CJIJI*/ + cfact3 = clkji / cjiji; + + /*FIX = -RXLK + CFACT3 * RXJI + FIY = -RYLK + CFACT3 * RYJI + FIZ = -RZLK + CFACT3 * RZJI*/ + f1[0] = - vb3x + cfact3 * vb1x; + f1[1] = - vb3y + cfact3 * vb1y; + f1[2] = - vb3z + cfact3 * vb1z; + + /*FJX = -FIX + FJY = -FIY + FJZ = -FIZ*/ + f2[0] = - f1[0]; + f2[1] = - f1[1]; + f2[2] = - f1[2]; + + /*FKX = CFACT2 * RXLK - RXJI + FKY = CFACT2 * RYLK - RYJI + FKZ = CFACT2 * RZLK - RZJI*/ + f3[0] = cfact2 * vb3x - vb1x; + f3[1] = cfact2 * vb3y - vb1y; + f3[2] = cfact2 * vb3z - vb1z; + + /*FLX = -FKX + FLY = -FKY + FLZ = -FKZ*/ + f4[0] = - f3[0]; + f4[1] = - f3[1]; + f4[2] = - f3[2]; + + /*FIX = FIX * CFACT1 + FIY = FIY * CFACT1 + FIZ = FIZ * CFACT1*/ + f1[0] *= cfact1; + f1[1] *= cfact1; + f1[2] *= cfact1; + + /*FJX = FJX * CFACT1 + FJY = FJY * CFACT1 + FJZ = FJZ * CFACT1*/ + f2[0] *= cfact1; + f2[1] *= cfact1; + f2[2] *= cfact1; + + /*FKX = FKX * CFACT1 + FKY = FKY * CFACT1 + FKZ = FKZ * CFACT1*/ + f3[0] *= cfact1; + f3[1] *= cfact1; + f3[2] *= cfact1; + + /*FLX = FLX * CFACT1 + FLY = FLY * CFACT1 + FLZ = FLZ * CFACT1*/ + f4[0] *= cfact1; + f4[1] *= cfact1; + f4[2] *= cfact1; + + /* Apply force to each of 4 atoms */ + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + -vb1x,-vb1y,-vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } + } +} diff --git a/src/USER-OMP/improper_cossq_omp.h b/src/USER-OMP/improper_cossq_omp.h new file mode 100644 index 0000000000..8fd1bc09a0 --- /dev/null +++ b/src/USER-OMP/improper_cossq_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(cossq/omp,ImproperCossqOMP) + +#else + +#ifndef LMP_IMPROPER_COSSQ_OMP_H +#define LMP_IMPROPER_COSSQ_OMP_H + +#include "improper_cossq.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperCossqOMP : public ImproperCossq, public ThrOMP { + + public: + ImproperCossqOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_cvff_omp.cpp b/src/USER-OMP/improper_cvff_omp.cpp new file mode 100644 index 0000000000..819a22b66e --- /dev/null +++ b/src/USER-OMP/improper_cvff_omp.cpp @@ -0,0 +1,298 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_cvff_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperCvffOMP::ImproperCvffOMP(class LAMMPS *lmp) + : ImproperCvff(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperCvffOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperCvffOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s2,s12,c,p,pd,rc2,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sc1 = sqrt(1.0 - c1mag*c1mag); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sc2 = sqrt(1.0 - c2mag*c2mag); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = 1 + cos(n*phi) for d = 1 + // p = 1 - cos(n*phi) for d = -1 + // pd = dp/dc / 2 + + m = multiplicity[type]; + + if (m == 2) { + p = 2.0*c*c; + pd = 2.0*c; + } else if (m == 3) { + rc2 = c*c; + p = (4.0*rc2-3.0)*c + 1.0; + pd = 6.0*rc2 - 1.5; + } else if (m == 4) { + rc2 = c*c; + p = 8.0*(rc2-1)*rc2 + 2.0; + pd = (16.0*rc2-8.0)*c; + } else if (m == 6) { + rc2 = c*c; + p = ((32.0*rc2-48.0)*rc2 + 18.0)*rc2; + pd = (96.0*(rc2-1.0)*rc2 + 18.0)*c; + } else if (m == 1) { + p = c + 1.0; + pd = 0.5; + } else if (m == 5) { + rc2 = c*c; + p = ((16.0*rc2-20.0)*rc2 + 5.0)*c + 1.0; + pd = (40.0*rc2-30.0)*rc2 + 2.5; + } else if (m == 0) { + p = 2.0; + pd = 0.0; + } + + if (sign[type] == -1) { + p = 2.0 - p; + pd = -pd; + } + + if (EFLAG) eimproper = k[type]*p; + + a = 2.0 * k[type] * pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2*(2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1*(c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2*(c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_cvff_omp.h b/src/USER-OMP/improper_cvff_omp.h new file mode 100644 index 0000000000..8ab8f1615c --- /dev/null +++ b/src/USER-OMP/improper_cvff_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(cvff/omp,ImproperCvffOMP) + +#else + +#ifndef LMP_IMPROPER_CVFF_OMP_H +#define LMP_IMPROPER_CVFF_OMP_H + +#include "improper_cvff.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperCvffOMP : public ImproperCvff, public ThrOMP { + + public: + ImproperCvffOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_harmonic_omp.cpp b/src/USER-OMP/improper_harmonic_omp.cpp new file mode 100644 index 0000000000..4ac8351a80 --- /dev/null +++ b/src/USER-OMP/improper_harmonic_omp.cpp @@ -0,0 +1,240 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperHarmonicOMP::ImproperHarmonicOMP(class LAMMPS *lmp) + : ImproperHarmonic(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double ss1,ss2,ss3,r1,r2,r3,c0,c1,c2,s1,s2; + double s12,c,s,domega,a,a11,a22,a33,a12,a13,a23; + double sx2,sy2,sz2; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // geometry of 4-body + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + + ss1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + ss2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + ss3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + r1 = sqrt(ss1); + r2 = sqrt(ss2); + r3 = sqrt(ss3); + + // sin and cos of angle + + c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * r1 * r3; + c1 = (vb1x * vb2x + vb1y * vb2y + vb1z * vb2z) * r1 * r2; + c2 = -(vb3x * vb2x + vb3y * vb2y + vb3z * vb2z) * r3 * r2; + + s1 = 1.0 - c1*c1; + if (s1 < SMALL) s1 = SMALL; + s1 = 1.0 / s1; + + s2 = 1.0 - c2*c2; + if (s2 < SMALL) s2 = SMALL; + s2 = 1.0 / s2; + + s12 = sqrt(s1*s2); + c = (c1*c2 + c0) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // force & energy + + domega = acos(c) - chi[type]; + a = k[type] * domega; + + if (EFLAG) eimproper = a*domega; + + a = -a * 2.0/s; + c = c * a; + s12 = s12 * a; + a11 = c*ss1*s1; + a22 = -ss2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*ss3*s2; + a12 = -r1*r2*(c1*c*s1 + c2*s12); + a13 = -r1*r3*s12; + a23 = r2*r3*(c2*c*s2 + c1*s12); + + sx2 = a22*vb2x + a23*vb3x + a12*vb1x; + sy2 = a22*vb2y + a23*vb3y + a12*vb1y; + sz2 = a22*vb2z + a23*vb3z + a12*vb1z; + + f1[0] = a12*vb2x + a13*vb3x + a11*vb1x; + f1[1] = a12*vb2y + a13*vb3y + a11*vb1y; + f1[2] = a12*vb2z + a13*vb3z + a11*vb1z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a23*vb2x + a33*vb3x + a13*vb1x; + f4[1] = a23*vb2y + a33*vb3y + a13*vb1y; + f4[2] = a23*vb2z + a33*vb3z + a13*vb1z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_harmonic_omp.h b/src/USER-OMP/improper_harmonic_omp.h new file mode 100644 index 0000000000..d6325016a7 --- /dev/null +++ b/src/USER-OMP/improper_harmonic_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(harmonic/omp,ImproperHarmonicOMP) + +#else + +#ifndef LMP_IMPROPER_HARMONIC_OMP_H +#define LMP_IMPROPER_HARMONIC_OMP_H + +#include "improper_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperHarmonicOMP : public ImproperHarmonic, public ThrOMP { + + public: + ImproperHarmonicOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_ring_omp.cpp b/src/USER-OMP/improper_ring_omp.cpp new file mode 100644 index 0000000000..78b15a0453 --- /dev/null +++ b/src/USER-OMP/improper_ring_omp.cpp @@ -0,0 +1,261 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_ring_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperRingOMP::ImproperRingOMP(class LAMMPS *lmp) + : ImproperRing(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperRingOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperRingOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + /* Be careful!: "chi" is the equilibrium angle in radians. */ + int i1,i2,i3,i4,n,type; + + double eimproper; + + /* Compatibility variables. */ + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + double f1[3], f3[3], f4[3]; + + /* Actual computation variables. */ + int at1[3], at2[3], at3[3], icomb; + double bvec1x[3], bvec1y[3], bvec1z[3], + bvec2x[3], bvec2y[3], bvec2z[3], + bvec1n[3], bvec2n[3], bend_angle[3]; + double angle_summer, angfac, cfact1, cfact2, cfact3; + double cjiji, ckjji, ckjkj, fix, fiy, fiz, fjx, fjy, fjz, fkx, fky, fkz; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + /* A description of the potential can be found in + Macromolecules 35, pp. 1463-1472 (2002). */ + for (n = nfrom; n < nto; n++) { + /* Take the ids of the atoms contributing to the improper potential. */ + i1 = improperlist[n][0]; /* Atom "1" of Figure 1 from the above reference.*/ + i2 = improperlist[n][1]; /* Atom "2" ... */ + i3 = improperlist[n][2]; /* Atom "3" ... */ + i4 = improperlist[n][3]; /* Atom "9" ... */ + type = improperlist[n][4]; + + /* Calculate the necessary variables for LAMMPS implementation. + if (evflag) ev_tally(i1,i2,i3,i4,nlocal,newton_bond,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z); + Although, they are irrelevant to the calculation of the potential, we keep + them for maximal compatibility. */ + vb1x = x[i1][0] - x[i2][0]; vb1y = x[i1][1] - x[i2][1]; vb1z = x[i1][2] - x[i2][2]; + + vb2x = x[i3][0] - x[i2][0]; vb2y = x[i3][1] - x[i2][1]; vb2z = x[i3][2] - x[i2][2]; + + vb3x = x[i4][0] - x[i3][0]; vb3y = x[i4][1] - x[i3][1]; vb3z = x[i4][2] - x[i3][2]; + + + /* Pass the atom tags to form the necessary combinations. */ + at1[0] = i1; at2[0] = i2; at3[0] = i4; /* ids: 1-2-9 */ + at1[1] = i1; at2[1] = i2; at3[1] = i3; /* ids: 1-2-3 */ + at1[2] = i4; at2[2] = i2; at3[2] = i3; /* ids: 9-2-3 */ + + + /* Initialize the sum of the angles differences. */ + angle_summer = 0.0; + /* Take a loop over the three angles, defined by each triad: */ + for (icomb = 0; icomb < 3; icomb ++) { + + /* Bond vector connecting the first and the second atom. */ + bvec1x[icomb] = x[at2[icomb]][0] - x[at1[icomb]][0]; + bvec1y[icomb] = x[at2[icomb]][1] - x[at1[icomb]][1]; + bvec1z[icomb] = x[at2[icomb]][2] - x[at1[icomb]][2]; + /* also calculate the norm of the vector: */ + bvec1n[icomb] = sqrt( bvec1x[icomb]*bvec1x[icomb] + + bvec1y[icomb]*bvec1y[icomb] + + bvec1z[icomb]*bvec1z[icomb]); + /* Bond vector connecting the second and the third atom. */ + bvec2x[icomb] = x[at3[icomb]][0] - x[at2[icomb]][0]; + bvec2y[icomb] = x[at3[icomb]][1] - x[at2[icomb]][1]; + bvec2z[icomb] = x[at3[icomb]][2] - x[at2[icomb]][2]; + /* also calculate the norm of the vector: */ + bvec2n[icomb] = sqrt( bvec2x[icomb]*bvec2x[icomb] + + bvec2y[icomb]*bvec2y[icomb] + + bvec2z[icomb]*bvec2z[icomb]); + + /* Calculate the bending angle of the atom triad: */ + bend_angle[icomb] = ( bvec2x[icomb]*bvec1x[icomb] + + bvec2y[icomb]*bvec1y[icomb] + + bvec2z[icomb]*bvec1z[icomb]); + bend_angle[icomb] /= (bvec1n[icomb] * bvec2n[icomb]); + if (bend_angle[icomb] > 1.0) bend_angle[icomb] -= SMALL; + if (bend_angle[icomb] < -1.0) bend_angle[icomb] += SMALL; + + /* Append the current angle to the sum of angle differences. */ + angle_summer += (bend_angle[icomb] - chi[type]); + } + if (EFLAG) eimproper = (1.0/6.0) *k[type] * pow(angle_summer,6.0); + /* + printf("The tags: %d-%d-%d-%d, of type %d .\n",atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4],type); + // printf("The coordinates of the first: %f, %f, %f.\n", x[i1][0], x[i1][1], x[i1][2]); + // printf("The coordinates of the second: %f, %f, %f.\n", x[i2][0], x[i2][1], x[i2][2]); + // printf("The coordinates of the third: %f, %f, %f.\n", x[i3][0], x[i3][1], x[i3][2]); + // printf("The coordinates of the fourth: %f, %f, %f.\n", x[i4][0], x[i4][1], x[i4][2]); + printf("The angles are: %f / %f / %f equilibrium: %f.\n", bend_angle[0], bend_angle[1], bend_angle[2],chi[type]); + printf("The energy of the improper: %f with prefactor %f.\n", eimproper,(1.0/6.0)*k[type]); + printf("The sum of the angles: %f.\n", angle_summer); + */ + + /* Force calculation acting on all atoms. + Calculate the derivatives of the potential. */ + angfac = k[type] * pow(angle_summer,5.0); + + f1[0] = 0.0; f1[1] = 0.0; f1[2] = 0.0; + f3[0] = 0.0; f3[1] = 0.0; f3[2] = 0.0; + f4[0] = 0.0; f4[1] = 0.0; f4[2] = 0.0; + + /* Take a loop over the three angles, defined by each triad: */ + for (icomb = 0; icomb < 3; icomb ++) + { + /* Calculate the squares of the distances. */ + cjiji = bvec1n[icomb] * bvec1n[icomb]; ckjkj = bvec2n[icomb] * bvec2n[icomb]; + + ckjji = bvec2x[icomb] * bvec1x[icomb] + + bvec2y[icomb] * bvec1y[icomb] + + bvec2z[icomb] * bvec1z[icomb] ; + + cfact1 = angfac / (sqrt(ckjkj * cjiji)); + cfact2 = ckjji / ckjkj; + cfact3 = ckjji / cjiji; + + /* Calculate the force acted on the thrid atom of the angle. */ + fkx = cfact2 * bvec2x[icomb] - bvec1x[icomb]; + fky = cfact2 * bvec2y[icomb] - bvec1y[icomb]; + fkz = cfact2 * bvec2z[icomb] - bvec1z[icomb]; + + /* Calculate the force acted on the first atom of the angle. */ + fix = bvec2x[icomb] - cfact3 * bvec1x[icomb]; + fiy = bvec2y[icomb] - cfact3 * bvec1y[icomb]; + fiz = bvec2z[icomb] - cfact3 * bvec1z[icomb]; + + /* Finally, calculate the force acted on the middle atom of the angle.*/ + fjx = - fix - fkx; fjy = - fiy - fky; fjz = - fiz - fkz; + + /* Consider the appropriate scaling of the forces: */ + fix *= cfact1; fiy *= cfact1; fiz *= cfact1; + fjx *= cfact1; fjy *= cfact1; fjz *= cfact1; + fkx *= cfact1; fky *= cfact1; fkz *= cfact1; + + if (at1[icomb] == i1) {f1[0] += fix; f1[1] += fiy; f1[2] += fiz;} + else if (at2[icomb] == i1) {f1[0] += fjx; f1[1] += fjy; f1[2] += fjz;} + else if (at3[icomb] == i1) {f1[0] += fkx; f1[1] += fky; f1[2] += fkz;} + + if (at1[icomb] == i3) {f3[0] += fix; f3[1] += fiy; f3[2] += fiz;} + else if (at2[icomb] == i3) {f3[0] += fjx; f3[1] += fjy; f3[2] += fjz;} + else if (at3[icomb] == i3) {f3[0] += fkx; f3[1] += fky; f3[2] += fkz;} + + if (at1[icomb] == i4) {f4[0] += fix; f4[1] += fiy; f4[2] += fiz;} + else if (at2[icomb] == i4) {f4[0] += fjx; f4[1] += fjy; f4[2] += fjz;} + else if (at3[icomb] == i4) {f4[0] += fkx; f4[1] += fky; f4[2] += fkz;} + + + /* Store the contribution to the global arrays: */ + /* Take the id of the atom from the at1[icomb] element, i1 = at1[icomb]. */ + if (NEWTON_BOND || at1[icomb] < nlocal) { + f[at1[icomb]][0] += fix; + f[at1[icomb]][1] += fiy; + f[at1[icomb]][2] += fiz; + } + /* Take the id of the atom from the at2[icomb] element, i2 = at2[icomb]. */ + if (NEWTON_BOND || at2[icomb] < nlocal) { + f[at2[icomb]][0] += fjx; + f[at2[icomb]][1] += fjy; + f[at2[icomb]][2] += fjz; + } + /* Take the id of the atom from the at3[icomb] element, i3 = at3[icomb]. */ + if (NEWTON_BOND || at3[icomb] < nlocal) { + f[at3[icomb]][0] += fkx; + f[at3[icomb]][1] += fky; + f[at3[icomb]][2] += fkz; + } + + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_ring_omp.h b/src/USER-OMP/improper_ring_omp.h new file mode 100644 index 0000000000..2b2616ab03 --- /dev/null +++ b/src/USER-OMP/improper_ring_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(ring/omp,ImproperRingOMP) + +#else + +#ifndef LMP_IMPROPER_RING_OMP_H +#define LMP_IMPROPER_RING_OMP_H + +#include "improper_ring.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperRingOMP : public ImproperRing, public ThrOMP { + + public: + ImproperRingOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp new file mode 100644 index 0000000000..9a64da1fb1 --- /dev/null +++ b/src/USER-OMP/improper_umbrella_omp.cpp @@ -0,0 +1,256 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "improper_umbrella_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +ImproperUmbrellaOMP::ImproperUmbrellaOMP(class LAMMPS *lmp) + : ImproperUmbrella(lmp), ThrOMP(lmp,THR_IMPROPER) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +void ImproperUmbrellaOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + double domega,c,a,s,projhfg,dhax,dhay,dhaz,dahx,dahy,dahz,cotphi; + double ax,ay,az,ra2,rh2,ra,rh,rar,rhr,arx,ary,arz,hrx,hry,hrz; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // 1st bond + + vb1x = x[i2][0] - x[i1][0]; + vb1y = x[i2][1] - x[i1][1]; + vb1z = x[i2][2] - x[i1][2]; + + // 2nd bond + + vb2x = x[i3][0] - x[i1][0]; + vb2y = x[i3][1] - x[i1][1]; + vb2z = x[i3][2] - x[i1][2]; + + // 3rd bond + + vb3x = x[i4][0] - x[i1][0]; + vb3y = x[i4][1] - x[i1][1]; + vb3z = x[i4][2] - x[i1][2]; + + // c0 calculation + // A = vb1 X vb2 is perpendicular to IJK plane + + ax = vb1y*vb2z-vb1z*vb2y; + ay = vb1z*vb2x-vb1x*vb2z; + az = vb1x*vb2y-vb1y*vb2x; + ra2 = ax*ax+ay*ay+az*az; + rh2 = vb3x*vb3x+vb3y*vb3y+vb3z*vb3z; + ra = sqrt(ra2); + rh = sqrt(rh2); + if (ra < SMALL) ra = SMALL; + if (rh < SMALL) rh = SMALL; + + rar = 1/ra; + rhr = 1/rh; + arx = ax*rar; + ary = ay*rar; + arz = az*rar; + hrx = vb3x*rhr; + hry = vb3y*rhr; + hrz = vb3z*rhr; + + c = arx*hrx+ary*hry+arz*hrz; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) s = 1.0; + if (c < -1.0) s = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + cotphi = c/s; + + projhfg = (vb3x*vb1x+vb3y*vb1y+vb3z*vb1z) / + sqrt(vb1x*vb1x+vb1y*vb1y+vb1z*vb1z); + projhfg += (vb3x*vb2x+vb3y*vb2y+vb3z*vb2z) / + sqrt(vb2x*vb2x+vb2y*vb2y+vb2z*vb2z); + if (projhfg > 0.0) { + s *= -1.0; + cotphi *= -1.0; + } + + // force and energy + // if w0 = 0: E = k * (1 - cos w) + // if w0 != 0: E = 0.5 * C (cos w - cos w0)^2, C = k/(sin(w0)^2 + + if (w0[type] == 0.0) { + if (EFLAG) eimproper = kw[type] * (1.0-s); + a = -kw[type]; + } else { + domega = s - cos(w0[type]); + a = 0.5 * C[type] * domega; + if (EFLAG) eimproper = a * domega; + a *= 2.0; + } + + // dhax = diffrence between H and A in X direction, etc + + a = a*cotphi; + dhax = hrx-c*arx; + dhay = hry-c*ary; + dhaz = hrz-c*arz; + + dahx = arx-c*hrx; + dahy = ary-c*hry; + dahz = arz-c*hrz; + + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + + f4[0] = dahx*rhr; + f4[1] = dahy*rhr; + f4[2] = dahz*rhr; + + f1[0] = -(f2[0] + f3[0] + f4[0]); + f1[1] = -(f2[1] + f3[1] + f4[1]); + f1[2] = -(f2[2] + f3[2] + f4[2]); + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]*a; + f[i1][1] += f1[1]*a; + f[i1][2] += f1[2]*a; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f3[0]*a; + f[i2][1] += f3[1]*a; + f[i2][2] += f3[2]*a; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f2[0]*a; + f[i3][1] += f2[1]*a; + f[i3][2] += f2[2]*a; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]*a; + f[i4][1] += f4[1]*a; + f[i4][2] += f4[2]*a; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_umbrella_omp.h b/src/USER-OMP/improper_umbrella_omp.h new file mode 100644 index 0000000000..41bdfed4d5 --- /dev/null +++ b/src/USER-OMP/improper_umbrella_omp.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(umbrella/omp,ImproperUmbrellaOMP) + +#else + +#ifndef LMP_IMPROPER_UMBRELLA_OMP_H +#define LMP_IMPROPER_UMBRELLA_OMP_H + +#include "improper_umbrella.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperUmbrellaOMP : public ImproperUmbrella, public ThrOMP { + + public: + ImproperUmbrellaOMP(class LAMMPS *lmp); + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/neigh_derive_omp.cpp b/src/USER-OMP/neigh_derive_omp.cpp new file mode 100644 index 0000000000..4356f17b62 --- /dev/null +++ b/src/USER-OMP/neigh_derive_omp.cpp @@ -0,0 +1,183 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + build half list from full list + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) + works if full list is a skip list +------------------------------------------------------------------------- */ + +void Neighbor::half_from_full_no_newton_omp(NeighList *list) +{ + const int inum_full = list->listfull->inum; + + NEIGH_OMP_INIT; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(inum_full); + + int i,j,ii,jj,n,jnum,joriginal; + int *neighptr,*jlist; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int *ilist_full = list->listfull->ilist; + int *numneigh_full = list->listfull->numneigh; + int **firstneigh_full = list->listfull->firstneigh; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + // loop over atoms in full list + + for (ii = ifrom; ii < ito; ii++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + // only one thread at a time may check whether we + // need new neighbor list pages and then add to them. + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + // loop over parent full list + + i = ilist_full[ii]; + jlist = firstneigh_full[i]; + jnum = numneigh_full[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (j > i) neighptr[n++] = joriginal; + } + + ilist[ii] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = inum_full; +} + +/* ---------------------------------------------------------------------- + build half list from full list + pair stored once if i,j are both owned and i < j + if j is ghost, only store if j coords are "above and to the right" of i + works if full list is a skip list +------------------------------------------------------------------------- */ + +void Neighbor::half_from_full_newton_omp(NeighList *list) +{ + const int inum_full = list->listfull->inum; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(inum_full); + + int i,j,ii,jj,n,jnum,joriginal; + int *neighptr,*jlist; + double xtmp,ytmp,ztmp; + + double **x = atom->x; + int nlocal = atom->nlocal; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int *ilist_full = list->listfull->ilist; + int *numneigh_full = list->listfull->numneigh; + int **firstneigh_full = list->listfull->firstneigh; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + // loop over parent full list + + for (ii = ifrom; ii < ito; ii++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + // only one thread at a time may check whether we + // need new neighbor list pages and then add to them. + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + i = ilist_full[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over full neighbor list + + jlist = firstneigh_full[i]; + jnum = numneigh_full[i]; + + for (jj = 0; jj < jnum; jj++) { + joriginal = jlist[jj]; + j = joriginal & NEIGHMASK; + if (j < nlocal) { + if (i > j) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + neighptr[n++] = joriginal; + } + + ilist[ii] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = inum_full; +} diff --git a/src/USER-OMP/neigh_full_omp.cpp b/src/USER-OMP/neigh_full_omp.cpp new file mode 100644 index 0000000000..8ba2ffaf2b --- /dev/null +++ b/src/USER-OMP/neigh_full_omp.cpp @@ -0,0 +1,579 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "group.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + N^2 search for all neighbors + every neighbor pair appears in list of both atoms i and j +------------------------------------------------------------------------- */ + +void Neighbor::full_nsq_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,n,itype,jtype,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int nall = atom->nlocal + atom->nghost; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + int npage = tid; + int npnt = 0; + + // loop over owned atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms, owned and ghost + // skip i = j + + for (j = 0; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + if (i == j) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + list->gnum = 0; +} + +/* ---------------------------------------------------------------------- + N^2 search for all neighbors + include neighbors of ghost atoms, but no "special neighbors" for ghosts + every neighbor pair appears in list of both atoms i and j +------------------------------------------------------------------------- */ + +void Neighbor::full_nsq_ghost_omp(NeighList *list) +{ + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nall); + + int i,j,n,itype,jtype,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + int npage = tid; + int npnt = 0; + + // loop over owned & ghost atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms, owned and ghost + // skip i = j + // no molecular test when i = ghost atom + + if (i < nlocal) { + for (j = 0; j < nall; j++) { + if (i == j) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } else { + for (j = 0; j < nall; j++) { + if (i == j) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + list->gnum = nall - nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction for all neighbors + every neighbor pair appears in list of both atoms i and j +------------------------------------------------------------------------- */ + +void Neighbor::full_bin_omp(NeighList *list) +{ + // bin owned & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + int npage = tid; + int npnt = 0; + + // loop over owned atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in surrounding bins in stencil including self + // skip i = j + + ibin = coord2bin(x[i]); + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (i == j) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + list->gnum = 0; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction for all neighbors + include neighbors of ghost atoms, but no "special neighbors" for ghosts + every neighbor pair appears in list of both atoms i and j +------------------------------------------------------------------------- */ + +void Neighbor::full_bin_ghost_omp(NeighList *list) +{ + // bin owned & ghost atoms + + bin_atoms(); + + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nall); + + int i,j,k,n,itype,jtype,ibin,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int xbin,ybin,zbin,xbin2,ybin2,zbin2; + int *neighptr; + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + int **stencilxyz = list->stencilxyz; + + int npage = tid; + int npnt = 0; + + // loop over owned & ghost atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in surrounding bins in stencil including self + // when i is a ghost atom, must check if stencil bin is out of bounds + // skip i = j + // no molecular test when i = ghost atom + + if (i < nlocal) { + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (i == j) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + } else { + ibin = coord2bin(x[i],xbin,ybin,zbin); + for (k = 0; k < nstencil; k++) { + xbin2 = xbin + stencilxyz[k][0]; + ybin2 = ybin + stencilxyz[k][1]; + zbin2 = zbin + stencilxyz[k][2]; + if (xbin2 < 0 || xbin2 >= mbinx || + ybin2 < 0 || ybin2 >= mbiny || + zbin2 < 0 || zbin2 >= mbinz) continue; + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (i == j) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + list->gnum = nall - nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction for all neighbors + multi-type stencil is itype dependent and is distance checked + every neighbor pair appears in list of both atoms i and j +------------------------------------------------------------------------- */ + +void Neighbor::full_multi_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which,ns; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*s; + double *cutsq,*distsq; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int *nstencil_multi = list->nstencil_multi; + int **stencil_multi = list->stencil_multi; + double **distsq_multi = list->distsq_multi; + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in other bins in stencil, including self + // skip if i,j neighbor cutoff is less than bin distance + // skip i = j + + ibin = coord2bin(x[i]); + s = stencil_multi[itype]; + distsq = distsq_multi[itype]; + cutsq = cutneighsq[itype]; + ns = nstencil_multi[itype]; + for (k = 0; k < ns; k++) { + for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (cutsq[jtype] < distsq[k]) continue; + if (i == j) continue; + + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + list->gnum = 0; +} diff --git a/src/USER-OMP/neigh_gran_omp.cpp b/src/USER-OMP/neigh_gran_omp.cpp new file mode 100644 index 0000000000..71fee4ec4f --- /dev/null +++ b/src/USER-OMP/neigh_gran_omp.cpp @@ -0,0 +1,657 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "group.h" +#include "fix_shear_history.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + granular particles + N^2 / 2 search for neighbor pairs with partial Newton's 3rd law + shear history must be accounted for when a neighbor pair is added + pair added to list if atoms i and j are both owned and i < j + pair added if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::granular_nsq_no_newton_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + + FixShearHistory * const fix_history = list->fix_history; + NeighList * listgranhistory = list->listgranhistory; + + NEIGH_OMP_INIT; + + if (fix_history) + if (nthreads > listgranhistory->maxpage) + listgranhistory->add_pages(nthreads - listgranhistory->maxpage); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listgranhistory) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,m,n,nn; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + double radi,radsum,cutsq; + int *neighptr,*touchptr; + double *shearptr; + + int *npartner,**partner; + double ***shearpartner; + int **firsttouch; + double **firstshear; + + double **x = atom->x; + double *radius = atom->radius; + int *tag = atom->tag; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int nall = atom->nlocal + atom->nghost; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + if (fix_history) { + npartner = fix_history->npartner; + partner = fix_history->partner; + shearpartner = fix_history->shearpartner; + firsttouch = listgranhistory->firstneigh; + firstshear = listgranhistory->firstdouble; + } + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) { + list->add_pages(nthreads); + if (fix_history) + listgranhistory->add_pages(nthreads); + } + } + + n = nn = 0; + neighptr = &(list->pages[npage][npnt]); + if (fix_history) { + touchptr = &(listgranhistory->pages[npage][npnt]); + shearptr = &(listgranhistory->dpages[npage][3*npnt]); + } + } + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + + // loop over remaining atoms, owned and ghost + + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutsq) { + neighptr[n] = j; + + if (fix_history) { + if (rsq < radsum*radsum) { + for (m = 0; m < npartner[i]; m++) + if (partner[i][m] == tag[j]) break; + if (m < npartner[i]) { + touchptr[n] = 1; + shearptr[nn++] = shearpartner[i][m][0]; + shearptr[nn++] = shearpartner[i][m][1]; + shearptr[nn++] = shearpartner[i][m][2]; + } else { + touchptr[n] = 0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + } + } else { + touchptr[n] = 0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + } + } + + n++; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + if (fix_history) { + firsttouch[i] = touchptr; + firstshear[i] = shearptr; + } + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + granular particles + N^2 / 2 search for neighbor pairs with full Newton's 3rd law + no shear history is allowed for this option + pair added to list if atoms i and j are both owned and i < j + if j is ghost only me or other proc adds pair + decision based on itag,jtag tests +------------------------------------------------------------------------- */ + +void Neighbor::granular_nsq_newton_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,n,itag,jtag; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + double radi,radsum,cutsq; + int *neighptr; + + double **x = atom->x; + double *radius = atom->radius; + int *tag = atom->tag; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int nall = atom->nlocal + atom->nghost; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itag = tag[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + + // loop over remaining atoms, owned and ghost + + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + } + + if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutsq) neighptr[n++] = j; + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + granular particles + binned neighbor list construction with partial Newton's 3rd law + shear history must be accounted for when a neighbor pair is added + each owned atom i checks own bin and surrounding bins in non-Newton stencil + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::granular_bin_no_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + FixShearHistory * const fix_history = list->fix_history; + NeighList * listgranhistory = list->listgranhistory; + + NEIGH_OMP_INIT; + + if (fix_history) + if (nthreads > listgranhistory->maxpage) + listgranhistory->add_pages(nthreads - listgranhistory->maxpage); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listgranhistory) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,m,n,nn,ibin; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + double radi,radsum,cutsq; + int *neighptr,*touchptr; + double *shearptr; + + int *npartner,**partner; + double ***shearpartner; + int **firsttouch; + double **firstshear; + + // loop over each atom, storing neighbors + + double **x = atom->x; + double *radius = atom->radius; + int *tag = atom->tag; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + if (fix_history) { + npartner = fix_history->npartner; + partner = fix_history->partner; + shearpartner = fix_history->shearpartner; + firsttouch = listgranhistory->firstneigh; + firstshear = listgranhistory->firstdouble; + } + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) { + list->add_pages(nthreads); + if (fix_history) + listgranhistory->add_pages(nthreads); + } + } + + n = nn = 0; + neighptr = &(list->pages[npage][npnt]); + if (fix_history) { + touchptr = &(listgranhistory->pages[npage][npnt]); + shearptr = &(listgranhistory->dpages[npage][3*npnt]); + } + } + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + ibin = coord2bin(x[i]); + + // loop over all atoms in surrounding bins in stencil including self + // only store pair if i < j + // stores own/own pairs only once + // stores own/ghost pairs on both procs + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutsq) { + neighptr[n] = j; + + if (fix_history) { + if (rsq < radsum*radsum) { + for (m = 0; m < npartner[i]; m++) + if (partner[i][m] == tag[j]) break; + if (m < npartner[i]) { + touchptr[n] = 1; + shearptr[nn++] = shearpartner[i][m][0]; + shearptr[nn++] = shearpartner[i][m][1]; + shearptr[nn++] = shearpartner[i][m][2]; + } else { + touchptr[n] = 0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + } + } else { + touchptr[n] = 0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + shearptr[nn++] = 0.0; + } + } + + n++; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + if (fix_history) { + firsttouch[i] = touchptr; + firstshear[i] = shearptr; + } + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + granular particles + binned neighbor list construction with full Newton's 3rd law + no shear history is allowed for this option + each owned atom i checks its own bin and other bins in Newton stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::granular_bin_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,ibin; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + double radi,radsum,cutsq; + int *neighptr; + + // loop over each atom, storing neighbors + + double **x = atom->x; + double *radius = atom->radius; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + n = 0; + neighptr = &(list->pages[npage][npnt]); + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + + // loop over rest of atoms in i's bin, ghosts are at end of linked list + // if j is owned atom, store it, since j is beyond i in linked list + // if j is ghost, only store if j coords are "above and to the right" of i + + for (j = bins[i]; j >= 0; j = bins[j]) { + if (j >= nlocal) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + + if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutsq) neighptr[n++] = j; + } + + // loop over all atoms in other bins in stencil, store every pair + + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutsq) neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + granular particles + binned neighbor list construction with Newton's 3rd law for triclinic + no shear history is allowed for this option + each owned atom i checks its own bin and other bins in triclinic stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::granular_bin_newton_tri_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,ibin; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + double radi,radsum,cutsq; + int *neighptr; + + // loop over each atom, storing neighbors + + double **x = atom->x; + double *radius = atom->radius; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + n = 0; + neighptr = &(list->pages[npage][npnt]); + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + + // loop over all atoms in bins in stencil + // pairs for atoms j "below" i are excluded + // below = lower z or (equal z and lower y) or (equal zy and lower x) + // (equal zyx and j <= i) + // latter excludes self-self interaction but allows superposed atoms + + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp) { + if (x[j][0] < xtmp) continue; + if (x[j][0] == xtmp && j <= i) continue; + } + } + + if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutsq) neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} diff --git a/src/USER-OMP/neigh_half_bin_omp.cpp b/src/USER-OMP/neigh_half_bin_omp.cpp new file mode 100644 index 0000000000..ba0a180410 --- /dev/null +++ b/src/USER-OMP/neigh_half_bin_omp.cpp @@ -0,0 +1,522 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + binned neighbor list construction with partial Newton's 3rd law + each owned atom i checks own bin and other bins in stencil + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_no_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in other bins in stencil including self + // only store pair if i < j + // stores own/own pairs only once + // stores own/ghost pairs on both procs + + ibin = coord2bin(x[i]); + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with partial Newton's 3rd law + include neighbors of ghost atoms, but no "special neighbors" for ghosts + owned and ghost atoms check own bin and other bins in stencil + pair stored once if i,j are both owned and i < j + pair stored by me if i owned and j ghost (also stored by proc owning j) + pair stored once if i,j are both ghost and i < j +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_no_newton_ghost_omp(NeighList *list) +{ + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nall); + + int i,j,k,n,itype,jtype,ibin,which; + int xbin,ybin,zbin,xbin2,ybin2,zbin2; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + // bin local & ghost atoms + + bin_atoms(); + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + int **stencilxyz = list->stencilxyz; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in other bins in stencil including self + // when i is a ghost atom, must check if stencil bin is out of bounds + // only store pair if i < j + // stores own/own pairs only once + // stores own/ghost pairs with owned atom only, on both procs + // stores ghost/ghost pairs only once + // no molecular test when i = ghost atom + + if (i < nlocal) { + ibin = coord2bin(x[i]); + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + } else { + ibin = coord2bin(x[i],xbin,ybin,zbin); + for (k = 0; k < nstencil; k++) { + xbin2 = xbin + stencilxyz[k][0]; + ybin2 = ybin + stencilxyz[k][1]; + zbin2 = zbin + stencilxyz[k][2]; + if (xbin2 < 0 || xbin2 >= mbinx || + ybin2 < 0 || ybin2 >= mbiny || + zbin2 < 0 || zbin2 >= mbinz) continue; + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + list->gnum = nall - atom->nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with full Newton's 3rd law + each owned atom i checks its own bin and other bins in Newton stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over rest of atoms in i's bin, ghosts are at end of linked list + // if j is owned atom, store it, since j is beyond i in linked list + // if j is ghost, only store if j coords are "above and to the right" of i + + for (j = bins[i]; j >= 0; j = bins[j]) { + if (j >= nlocal) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + + // loop over all atoms in other bins in stencil, store every pair + + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with Newton's 3rd law for triclinic + each owned atom i checks its own bin and other bins in triclinic stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_newton_tri_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in bins in stencil + // pairs for atoms j "below" i are excluded + // below = lower z or (equal z and lower y) or (equal zy and lower x) + // (equal zyx and j <= i) + // latter excludes self-self interaction but allows superposed atoms + + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp) { + if (x[j][0] < xtmp) continue; + if (x[j][0] == xtmp && j <= i) continue; + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} diff --git a/src/USER-OMP/neigh_half_multi_omp.cpp b/src/USER-OMP/neigh_half_multi_omp.cpp new file mode 100644 index 0000000000..5cc6ba1401 --- /dev/null +++ b/src/USER-OMP/neigh_half_multi_omp.cpp @@ -0,0 +1,405 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + binned neighbor list construction with partial Newton's 3rd law + each owned atom i checks own bin and other bins in stencil + multi-type stencil is itype dependent and is distance checked + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::half_multi_no_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which,ns; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*s; + double *cutsq,*distsq; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int *nstencil_multi = list->nstencil_multi; + int **stencil_multi = list->stencil_multi; + double **distsq_multi = list->distsq_multi; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in other bins in stencil including self + // only store pair if i < j + // skip if i,j neighbor cutoff is less than bin distance + // stores own/own pairs only once + // stores own/ghost pairs on both procs + + ibin = coord2bin(x[i]); + s = stencil_multi[itype]; + distsq = distsq_multi[itype]; + cutsq = cutneighsq[itype]; + ns = nstencil_multi[itype]; + for (k = 0; k < ns; k++) { + for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + jtype = type[j]; + if (cutsq[jtype] < distsq[k]) continue; + + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with full Newton's 3rd law + each owned atom i checks its own bin and other bins in Newton stencil + multi-type stencil is itype dependent and is distance checked + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::half_multi_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which,ns; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*s; + double *cutsq,*distsq; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int *nstencil_multi = list->nstencil_multi; + int **stencil_multi = list->stencil_multi; + double **distsq_multi = list->distsq_multi; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over rest of atoms in i's bin, ghosts are at end of linked list + // if j is owned atom, store it, since j is beyond i in linked list + // if j is ghost, only store if j coords are "above and to the right" of i + + for (j = bins[i]; j >= 0; j = bins[j]) { + if (j >= nlocal) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + + // loop over all atoms in other bins in stencil, store every pair + // skip if i,j neighbor cutoff is less than bin distance + + ibin = coord2bin(x[i]); + s = stencil_multi[itype]; + distsq = distsq_multi[itype]; + cutsq = cutneighsq[itype]; + ns = nstencil_multi[itype]; + for (k = 0; k < ns; k++) { + for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (cutsq[jtype] < distsq[k]) continue; + + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with Newton's 3rd law for triclinic + each owned atom i checks its own bin and other bins in triclinic stencil + multi-type stencil is itype dependent and is distance checked + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::half_multi_newton_tri_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,which,ns; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*s; + double *cutsq,*distsq; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int *nstencil_multi = list->nstencil_multi; + int **stencil_multi = list->stencil_multi; + double **distsq_multi = list->distsq_multi; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in bins, including self, in stencil + // skip if i,j neighbor cutoff is less than bin distance + // bins below self are excluded from stencil + // pairs for atoms j "below" i are excluded + // below = lower z or (equal z and lower y) or (equal zy and lower x) + // (equal zyx and j <= i) + // latter excludes self-self interaction but allows superposed atoms + + ibin = coord2bin(x[i]); + s = stencil_multi[itype]; + distsq = distsq_multi[itype]; + cutsq = cutneighsq[itype]; + ns = nstencil_multi[itype]; + for (k = 0; k < ns; k++) { + for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (cutsq[jtype] < distsq[k]) continue; + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp) { + if (x[j][0] < xtmp) continue; + if (x[j][0] == xtmp && j <= i) continue; + } + } + + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} diff --git a/src/USER-OMP/neigh_half_nsq_omp.cpp b/src/USER-OMP/neigh_half_nsq_omp.cpp new file mode 100644 index 0000000000..6a8bee491a --- /dev/null +++ b/src/USER-OMP/neigh_half_nsq_omp.cpp @@ -0,0 +1,350 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "group.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + N^2 / 2 search for neighbor pairs with partial Newton's 3rd law + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::half_nsq_no_newton_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + const int nall = atom->nlocal + atom->nghost; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,n,itype,jtype,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + // loop over owned atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over remaining atoms, owned and ghost + // only store pair if i < j + + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + N^2 / 2 search for neighbor pairs with partial Newton's 3rd law + include neighbors of ghost atoms, but no "special neighbors" for ghosts + pair stored once if i,j are both owned and i < j + pair stored by me if i owned and j ghost (also stored by proc owning j) + pair stored once if i,j are both ghost and i < j +------------------------------------------------------------------------- */ + +void Neighbor::half_nsq_no_newton_ghost_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + const int nall = nlocal + atom->nghost; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nall); + + int i,j,n,itype,jtype,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + // each thread works on its own page + int npage = tid; + int npnt = 0; + + // loop over owned & ghost atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over remaining atoms, owned and ghost + // only store pair if i < j + // stores own/own pairs only once + // stores own/ghost pairs with owned atom only, on both procs + // stores ghost/ghost pairs only once + // no molecular test when i = ghost atom + + if (i < nlocal) { + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + + } else { + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = atom->nlocal; + list->gnum = nall - atom->nlocal; +} + +/* ---------------------------------------------------------------------- + N^2 / 2 search for neighbor pairs with full Newton's 3rd law + every pair stored exactly once by some processor + decision on ghost atoms based on itag,jtag tests +------------------------------------------------------------------------- */ + +void Neighbor::half_nsq_newton_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + + NEIGH_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,n,itype,jtype,itag,jtag,which; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int nall = atom->nlocal + atom->nghost; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + int npage = tid; + int npnt = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= list->maxpage) list->add_pages(nthreads); + } + + neighptr = &(list->pages[npage][npnt]); + n = 0; + + itag = tag[i]; + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over remaining atoms, owned and ghost + // itag = jtag is possible for long cutoffs that include images of self + + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; +} diff --git a/src/USER-OMP/neigh_respa_omp.cpp b/src/USER-OMP/neigh_respa_omp.cpp new file mode 100644 index 0000000000..8c4b15d505 --- /dev/null +++ b/src/USER-OMP/neigh_respa_omp.cpp @@ -0,0 +1,1031 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neighbor_omp.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "group.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- + multiple respa lists + N^2 / 2 search for neighbor pairs with partial Newton's 3rd law + pair added to list if atoms i and j are both owned and i < j + pair added if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::respa_nsq_no_newton_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + + NEIGH_OMP_INIT; + + NeighList *listinner = list->listinner; + if (nthreads > listinner->maxpage) + listinner->add_pages(nthreads - listinner->maxpage); + + NeighList *listmiddle; + const int respamiddle = list->respamiddle; + if (respamiddle) { + listmiddle = list->listmiddle; + if (nthreads > listmiddle->maxpage) + listmiddle->add_pages(nthreads - listmiddle->maxpage); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,n,itype,jtype,n_inner,n_middle; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*neighptr_inner,*neighptr_middle; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int nall = atom->nlocal + atom->nghost; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + int *ilist_inner = listinner->ilist; + int *numneigh_inner = listinner->numneigh; + int **firstneigh_inner = listinner->firstneigh; + + int *ilist_middle,*numneigh_middle,**firstneigh_middle; + if (respamiddle) { + ilist_middle = listmiddle->ilist; + numneigh_middle = listmiddle->numneigh; + firstneigh_middle = listmiddle->firstneigh; + } + + int npage = tid; + int npnt = 0; + int npage_inner = tid; + int npnt_inner = 0; + int npage_middle = tid; + int npnt_middle = 0; + + int which = 0; + int minchange = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage == list->maxpage) list->add_pages(nthreads); + } + neighptr = &(list->pages[npage][npnt]); + n = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt_inner < oneatom) { + npnt_inner = 0; + npage_inner += nthreads; + if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + } + neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); + n_inner = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (respamiddle) { + if (pgsize - npnt_middle < oneatom) { + npnt_middle = 0; + npage_middle += nthreads; + if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + } + neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); + n_middle = 0; + } + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over remaining atoms, owned and ghost + + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (minchange = domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + + if (rsq < cut_inner_sq) { + if (which == 0) neighptr_inner[n_inner++] = j; + else if (minchange) neighptr_inner[n_inner++] = j; + else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS); + } + + if (respamiddle && rsq < cut_middle_sq && rsq > cut_middle_inside_sq) { + if (which == 0) neighptr_middle[n_middle++] = j; + else if (minchange) neighptr_middle[n_middle++] = j; + else if (which > 0) + neighptr_middle[n_middle++] = j ^ (which << SBBITS); + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + ilist_inner[i] = i; + firstneigh_inner[i] = neighptr_inner; + numneigh_inner[i] = n_inner; + npnt_inner += n_inner; + if (n_inner > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + if (respamiddle) { + ilist_middle[i] = i; + firstneigh_middle[i] = neighptr_middle; + numneigh_middle[i] = n_middle; + npnt_middle += n_middle; + if (n_middle > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + listinner->inum = nlocal; + if (respamiddle) listmiddle->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + multiple respa lists + N^2 / 2 search for neighbor pairs with full Newton's 3rd law + pair added to list if atoms i and j are both owned and i < j + if j is ghost only me or other proc adds pair + decision based on itag,jtag tests +------------------------------------------------------------------------- */ + +void Neighbor::respa_nsq_newton_omp(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; + + NEIGH_OMP_INIT; + + NeighList *listinner = list->listinner; + if (nthreads > listinner->maxpage) + listinner->add_pages(nthreads - listinner->maxpage); + + NeighList *listmiddle; + const int respamiddle = list->respamiddle; + if (respamiddle) { + listmiddle = list->listmiddle; + if (nthreads > listmiddle->maxpage) + listmiddle->add_pages(nthreads - listmiddle->maxpage); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,n,itype,jtype,itag,jtag,n_inner,n_middle; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*neighptr_inner,*neighptr_middle; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int nall = atom->nlocal + atom->nghost; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + int *ilist_inner = listinner->ilist; + int *numneigh_inner = listinner->numneigh; + int **firstneigh_inner = listinner->firstneigh; + + int *ilist_middle,*numneigh_middle,**firstneigh_middle; + if (respamiddle) { + ilist_middle = listmiddle->ilist; + numneigh_middle = listmiddle->numneigh; + firstneigh_middle = listmiddle->firstneigh; + } + + int npage = tid; + int npnt = 0; + int npage_inner = tid; + int npnt_inner = 0; + int npage_middle = tid; + int npnt_middle = 0; + + int which = 0; + int minchange = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage == list->maxpage) list->add_pages(nthreads); + } + neighptr = &(list->pages[npage][npnt]); + n = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt_inner < oneatom) { + npnt_inner = 0; + npage_inner += nthreads; + if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + } + neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); + n_inner = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (respamiddle) { + if (pgsize - npnt_middle < oneatom) { + npnt_middle = 0; + npage_middle += nthreads; + if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + } + neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); + n_middle = 0; + } + + itag = tag[i]; + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over remaining atoms, owned and ghost + + for (j = i+1; j < nall; j++) { + if (includegroup && !(mask[j] & bitmask)) continue; + + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (minchange = domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + + if (rsq < cut_inner_sq) { + if (which == 0) neighptr_inner[n_inner++] = j; + else if (minchange) neighptr_inner[n_inner++] = j; + else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS); + } + + if (respamiddle && + rsq < cut_middle_sq && rsq > cut_middle_inside_sq) { + if (which == 0) neighptr_middle[n_middle++] = j; + else if (minchange) neighptr_middle[n_middle++] = j; + else if (which > 0) + neighptr_middle[n_middle++] = j ^ (which << SBBITS); + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + ilist_inner[i] = i; + firstneigh_inner[i] = neighptr_inner; + numneigh_inner[i] = n_inner; + npnt_inner += n_inner; + if (n_inner > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + if (respamiddle) { + ilist_middle[i] = i; + firstneigh_middle[i] = neighptr_middle; + numneigh_middle[i] = n_middle; + npnt_middle += n_middle; + if (n_middle > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + listinner->inum = nlocal; + if (respamiddle) listmiddle->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + multiple respa lists + binned neighbor list construction with partial Newton's 3rd law + each owned atom i checks own bin and surrounding bins in non-Newton stencil + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::respa_bin_no_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; + + NeighList *listinner = list->listinner; + if (nthreads > listinner->maxpage) + listinner->add_pages(nthreads - listinner->maxpage); + + NeighList *listmiddle; + const int respamiddle = list->respamiddle; + if (respamiddle) { + listmiddle = list->listmiddle; + if (nthreads > listmiddle->maxpage) + listmiddle->add_pages(nthreads - listmiddle->maxpage); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,n_inner,n_middle; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*neighptr_inner,*neighptr_middle; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + int *ilist_inner = listinner->ilist; + int *numneigh_inner = listinner->numneigh; + int **firstneigh_inner = listinner->firstneigh; + + int *ilist_middle,*numneigh_middle,**firstneigh_middle; + if (respamiddle) { + ilist_middle = listmiddle->ilist; + numneigh_middle = listmiddle->numneigh; + firstneigh_middle = listmiddle->firstneigh; + } + + int npage = tid; + int npnt = 0; + int npage_inner = tid; + int npnt_inner = 0; + int npage_middle = tid; + int npnt_middle = 0; + + int which = 0; + int minchange = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage == list->maxpage) list->add_pages(nthreads); + } + neighptr = &(list->pages[npage][npnt]); + n = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt_inner < oneatom) { + npnt_inner = 0; + npage_inner += nthreads; + if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + } + neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); + n_inner = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (respamiddle) { + if (pgsize - npnt_middle < oneatom) { + npnt_middle = 0; + npage_middle += nthreads; + if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + } + neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); + n_middle = 0; + } + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + ibin = coord2bin(x[i]); + + // loop over all atoms in surrounding bins in stencil including self + // only store pair if i < j + // stores own/own pairs only once + // stores own/ghost pairs on both procs + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (minchange = domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + + if (rsq < cut_inner_sq) { + if (which == 0) neighptr_inner[n_inner++] = j; + else if (minchange) neighptr_inner[n_inner++] = j; + else if (which > 0) + neighptr_inner[n_inner++] = j ^ (which << SBBITS); + } + + if (respamiddle && + rsq < cut_middle_sq && rsq > cut_middle_inside_sq) { + if (which == 0) neighptr_middle[n_middle++] = j; + else if (minchange) neighptr_middle[n_middle++] = j; + else if (which > 0) + neighptr_middle[n_middle++] = j ^ (which << SBBITS); + } + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + ilist_inner[i] = i; + firstneigh_inner[i] = neighptr_inner; + numneigh_inner[i] = n_inner; + npnt_inner += n_inner; + if (n_inner > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + if (respamiddle) { + ilist_middle[i] = i; + firstneigh_middle[i] = neighptr_middle; + numneigh_middle[i] = n_middle; + npnt_middle += n_middle; + if (n_middle > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + listinner->inum = nlocal; + if (respamiddle) listmiddle->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + multiple respa lists + binned neighbor list construction with full Newton's 3rd law + each owned atom i checks its own bin and other bins in Newton stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::respa_bin_newton_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; + + NeighList *listinner = list->listinner; + if (nthreads > listinner->maxpage) + listinner->add_pages(nthreads - listinner->maxpage); + + NeighList *listmiddle; + const int respamiddle = list->respamiddle; + if (respamiddle) { + listmiddle = list->listmiddle; + if (nthreads > listmiddle->maxpage) + listmiddle->add_pages(nthreads - listmiddle->maxpage); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,n_inner,n_middle; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*neighptr_inner,*neighptr_middle; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + int *ilist_inner = listinner->ilist; + int *numneigh_inner = listinner->numneigh; + int **firstneigh_inner = listinner->firstneigh; + + int *ilist_middle,*numneigh_middle,**firstneigh_middle; + if (respamiddle) { + ilist_middle = listmiddle->ilist; + numneigh_middle = listmiddle->numneigh; + firstneigh_middle = listmiddle->firstneigh; + } + + int npage = tid; + int npnt = 0; + int npage_inner = tid; + int npnt_inner = 0; + int npage_middle = tid; + int npnt_middle = 0; + + int which = 0; + int minchange = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage == list->maxpage) list->add_pages(nthreads); + } + neighptr = &(list->pages[npage][npnt]); + n = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt_inner < oneatom) { + npnt_inner = 0; + npage_inner += nthreads; + if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + } + neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); + n_inner = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (respamiddle) { + if (pgsize - npnt_middle < oneatom) { + npnt_middle = 0; + npage_middle += nthreads; + if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + } + neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); + n_middle = 0; + } + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over rest of atoms in i's bin, ghosts are at end of linked list + // if j is owned atom, store it, since j is beyond i in linked list + // if j is ghost, only store if j coords are "above and to the right" of i + + for (j = bins[i]; j >= 0; j = bins[j]) { + if (j >= nlocal) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (minchange = domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + + if (rsq < cut_inner_sq) { + if (which == 0) neighptr_inner[n_inner++] = j; + else if (minchange) neighptr_inner[n_inner++] = j; + else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS); + } + + if (respamiddle && + rsq < cut_middle_sq && rsq > cut_middle_inside_sq) { + if (which == 0) neighptr_middle[n_middle++] = j; + else if (minchange) neighptr_middle[n_middle++] = j; + else if (which > 0) + neighptr_middle[n_middle++] = j ^ (which << SBBITS); + } + } + } + + // loop over all atoms in other bins in stencil, store every pair + + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (minchange = domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + + if (rsq < cut_inner_sq) { + if (which == 0) neighptr_inner[n_inner++] = j; + else if (minchange) neighptr_inner[n_inner++] = j; + else if (which > 0) + neighptr_inner[n_inner++] = j ^ (which << SBBITS); + } + + if (respamiddle && + rsq < cut_middle_sq && rsq > cut_middle_inside_sq) { + if (which == 0) neighptr_middle[n_middle++] = j; + else if (minchange) neighptr_middle[n_middle++] = j; + else if (which > 0) + neighptr_middle[n_middle++] = j ^ (which << SBBITS); + } + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + ilist_inner[i] = i; + firstneigh_inner[i] = neighptr_inner; + numneigh_inner[i] = n_inner; + npnt_inner += n_inner; + if (n_inner > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + if (respamiddle) { + ilist_middle[i] = i; + firstneigh_middle[i] = neighptr_middle; + numneigh_middle[i] = n_middle; + npnt_middle += n_middle; + if (n_middle > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + listinner->inum = nlocal; + if (respamiddle) listmiddle->inum = nlocal; +} + +/* ---------------------------------------------------------------------- + multiple respa lists + binned neighbor list construction with Newton's 3rd law for triclinic + each owned atom i checks its own bin and other bins in triclinic stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::respa_bin_newton_tri_omp(NeighList *list) +{ + // bin local & ghost atoms + + bin_atoms(); + + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NEIGH_OMP_INIT; + + NeighList *listinner = list->listinner; + if (nthreads > listinner->maxpage) + listinner->add_pages(nthreads - listinner->maxpage); + + NeighList *listmiddle; + const int respamiddle = list->respamiddle; + if (respamiddle) { + listmiddle = list->listmiddle; + if (nthreads > listmiddle->maxpage) + listmiddle->add_pages(nthreads - listmiddle->maxpage); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list,listinner,listmiddle) +#endif + NEIGH_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin,n_inner,n_middle; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr,*neighptr_inner,*neighptr_middle; + + // loop over each atom, storing neighbors + + int **special = atom->special; + int **nspecial = atom->nspecial; + int *tag = atom->tag; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + int *molecule = atom->molecule; + int molecular = atom->molecular; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + int nstencil = list->nstencil; + int *stencil = list->stencil; + + int *ilist_inner = listinner->ilist; + int *numneigh_inner = listinner->numneigh; + int **firstneigh_inner = listinner->firstneigh; + + int *ilist_middle,*numneigh_middle,**firstneigh_middle; + if (respamiddle) { + ilist_middle = listmiddle->ilist; + numneigh_middle = listmiddle->numneigh; + firstneigh_middle = listmiddle->firstneigh; + } + + int npage = tid; + int npnt = 0; + int npage_inner = tid; + int npnt_inner = 0; + int npage_middle = tid; + int npnt_middle = 0; + + int which = 0; + int minchange = 0; + + for (i = ifrom; i < ito; i++) { + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage == list->maxpage) list->add_pages(nthreads); + } + neighptr = &(list->pages[npage][npnt]); + n = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt_inner < oneatom) { + npnt_inner = 0; + npage_inner += nthreads; + if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + } + neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); + n_inner = 0; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (respamiddle) { + if (pgsize - npnt_middle < oneatom) { + npnt_middle = 0; + npage_middle += nthreads; + if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + } + neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); + n_middle = 0; + } + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in bins in stencil + // pairs for atoms j "below" i are excluded + // below = lower z or (equal z and lower y) or (equal zy and lower x) + // (equal zyx and j <= i) + // latter excludes self-self interaction but allows superposed atoms + + ibin = coord2bin(x[i]); + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp) { + if (x[j][0] < xtmp) continue; + if (x[j][0] == xtmp && j <= i) continue; + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular) { + which = find_special(special[i],nspecial[i],tag[j]); + if (which == 0) neighptr[n++] = j; + else if (minchange = domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + + if (rsq < cut_inner_sq) { + if (which == 0) neighptr_inner[n_inner++] = j; + else if (minchange) neighptr_inner[n_inner++] = j; + else if (which > 0) + neighptr_inner[n_inner++] = j ^ (which << SBBITS); + } + + if (respamiddle && + rsq < cut_middle_sq && rsq > cut_middle_inside_sq) { + if (which == 0) neighptr_middle[n_middle++] = j; + else if (minchange) neighptr_middle[n_middle++] = j; + else if (which > 0) + neighptr_middle[n_middle++] = j ^ (which << SBBITS); + } + } + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + npnt += n; + if (n > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + ilist_inner[i] = i; + firstneigh_inner[i] = neighptr_inner; + numneigh_inner[i] = n_inner; + npnt_inner += n_inner; + if (n_inner > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + + if (respamiddle) { + ilist_middle[i] = i; + firstneigh_middle[i] = neighptr_middle; + numneigh_middle[i] = n_middle; + npnt_middle += n_middle; + if (n_middle > oneatom) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + } + NEIGH_OMP_CLOSE; + list->inum = nlocal; + listinner->inum = nlocal; + if (respamiddle) listmiddle->inum = nlocal; +} diff --git a/src/USER-OMP/neighbor_omp.h b/src/USER-OMP/neighbor_omp.h new file mode 100644 index 0000000000..fd27ba8b61 --- /dev/null +++ b/src/USER-OMP/neighbor_omp.h @@ -0,0 +1,59 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_NEIGHBOR_OMP_H +#define LMP_NEIGHBOR_OMP_H + +#if defined(_OPENMP) +#include +#endif + +namespace LAMMPS_NS { + +// these macros hide some ugly and redundant OpenMP related stuff +#if defined(_OPENMP) + +// make sure we have at least one page for each thread +#define NEIGH_OMP_INIT \ + const int nthreads = comm->nthreads; \ + if (nthreads > list->maxpage) \ + list->add_pages(nthreads - list->maxpage) + +// get thread id and then assign each thread a fixed chunk of atoms +#define NEIGH_OMP_SETUP(num) \ + { \ + const int tid = omp_get_thread_num(); \ + const int idelta = 1 + num/nthreads; \ + const int ifrom = tid*idelta; \ + const int ito = ((ifrom + idelta) > num) \ + ? num : (ifrom+idelta); \ + +#define NEIGH_OMP_CLOSE } + +#else /* !defined(_OPENMP) */ + +#define NEIGH_OMP_INIT \ + const int nthreads = comm->nthreads; + +#define NEIGH_OMP_SETUP(num) \ + const int tid = 0; \ + const int ifrom = 0; \ + const int ito = num + +#define NEIGH_OMP_CLOSE + +#endif + +} + +#endif diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp new file mode 100644 index 0000000000..4d05b16c70 --- /dev/null +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -0,0 +1,390 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_adp_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairADPOMP::PairADPOMP(LAMMPS *lmp) : + PairADP(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairADPOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(fp); + memory->destroy(mu); + memory->destroy(lambda); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(fp,nmax,"pair:fp"); + memory->create(mu,nthreads*nmax,3,"pair:mu"); + memory->create(lambda,nthreads*nmax,6,"pair:lambda"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_adp(nall, rho, mu, lambda); + else + thr->init_adp(nlocal, rho, mu, lambda); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairADPOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi; + double u2,u2p,w2,w2p,nu; + double *coeff; + int *ilist,*jlist,*numneigh,**firstneigh; + double delmux,delmuy,delmuz,trdelmu,tradellam; + double adpx,adpy,adpz,fx,fy,fz; + double sumlamxx,sumlamyy,sumlamzz,sumlamyz,sumlamxz,sumlamxy; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const * const mu_t = thr->get_mu(); + double * const * const lambda_t = thr->get_lambda(); + const int tid = thr->get_tid(); + + int *type = atom->type; + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // rho = density at each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = u2r_spline[type2u2r[jtype][itype]][m]; + u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + mu_t[i][0] += u2*delx; + mu_t[i][1] += u2*dely; + mu_t[i][2] += u2*delz; + coeff = w2r_spline[type2w2r[jtype][itype]][m]; + w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + lambda_t[i][0] += w2*delx*delx; + lambda_t[i][1] += w2*dely*dely; + lambda_t[i][2] += w2*delz*delz; + lambda_t[i][3] += w2*dely*delz; + lambda_t[i][4] += w2*delx*delz; + lambda_t[i][5] += w2*delx*dely; + + if (NEWTON_PAIR || j < nlocal) { + // verify sign difference for mu and lambda + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = u2r_spline[type2u2r[itype][jtype]][m]; + u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + mu_t[j][0] -= u2*delx; + mu_t[j][1] -= u2*dely; + mu_t[j][2] -= u2*delz; + coeff = w2r_spline[type2w2r[itype][jtype]][m]; + w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + lambda_t[j][0] += w2*delx*delx; + lambda_t[j][1] += w2*dely*dely; + lambda_t[j][2] += w2*delz*delz; + lambda_t[j][3] += w2*dely*delz; + lambda_t[j][4] += w2*delx*delz; + lambda_t[j][5] += w2*delx*dely; + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid); + data_reduce_thr(&(mu[0][0]), nall, comm->nthreads, 3, tid); + data_reduce_thr(&(lambda[0][0]), nall, comm->nthreads, 6, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + // reduce per thread density + data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid); + data_reduce_thr(&(mu[0][0]), nlocal, comm->nthreads, 3, tid); + data_reduce_thr(&(lambda[0][0]), nlocal, comm->nthreads, 6, tid); + + // wait until reduction is complete + sync_threads(); + } + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + p = rho[i]*rdrho + 1.0; + m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + coeff = frho_spline[type2frho[type[i]]][m]; + fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; + if (EFLAG) { + phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + phi += 0.5*(mu[i][0]*mu[i][0]+mu[i][1]*mu[i][1]+mu[i][2]*mu[i][2]); + phi += 0.5*(lambda[i][0]*lambda[i][0]+lambda[i][1]* + lambda[i][1]+lambda[i][2]*lambda[i][2]); + phi += 1.0*(lambda[i][3]*lambda[i][3]+lambda[i][4]* + lambda[i][4]+lambda[i][5]*lambda[i][5]); + phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])* + (lambda[i][0]+lambda[i][1]+lambda[i][2]); + e_tally_thr(this,i,i,nlocal,/* newton_pair */ 1, phi, 0.0, thr); + } + } + + // wait until all theads are done with computation + sync_threads(); + + // communicate derivative of embedding function + // MPI communication only on master thread +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // u2 = u + // u2p = u' + // w2 = w + // w2p = w' + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = z2r_spline[type2z2r[itype][jtype]][m]; + z2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = u2r_spline[type2u2r[itype][jtype]][m]; + u2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = w2r_spline[type2w2r[itype][jtype]][m]; + w2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + + recip = 1.0/r; + phi = z2*recip; + phip = z2p*recip - phi*recip; + psip = fp[i]*rhojp + fp[j]*rhoip + phip; + fpair = -psip*recip; + + delmux = mu[i][0]-mu[j][0]; + delmuy = mu[i][1]-mu[j][1]; + delmuz = mu[i][2]-mu[j][2]; + trdelmu = delmux*delx+delmuy*dely+delmuz*delz; + sumlamxx = lambda[i][0]+lambda[j][0]; + sumlamyy = lambda[i][1]+lambda[j][1]; + sumlamzz = lambda[i][2]+lambda[j][2]; + sumlamyz = lambda[i][3]+lambda[j][3]; + sumlamxz = lambda[i][4]+lambda[j][4]; + sumlamxy = lambda[i][5]+lambda[j][5]; + tradellam = sumlamxx*delx*delx+sumlamyy*dely*dely+ + sumlamzz*delz*delz+2.0*sumlamxy*delx*dely+ + 2.0*sumlamxz*delx*delz+2.0*sumlamyz*dely*delz; + nu = sumlamxx+sumlamyy+sumlamzz; + + adpx = delmux*u2 + trdelmu*u2p*delx*recip + + 2.0*w2*(sumlamxx*delx+sumlamxy*dely+sumlamxz*delz) + + w2p*delx*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delx; + adpy = delmuy*u2 + trdelmu*u2p*dely*recip + + 2.0*w2*(sumlamxy*delx+sumlamyy*dely+sumlamyz*delz) + + w2p*dely*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*dely; + adpz = delmuz*u2 + trdelmu*u2p*delz*recip + + 2.0*w2*(sumlamxz*delx+sumlamyz*dely+sumlamzz*delz) + + w2p*delz*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delz; + adpx*=-1.0; adpy*=-1.0; adpz*=-1.0; + + fx = delx*fpair+adpx; + fy = dely*fpair+adpy; + fz = delz*fpair+adpz; + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + } + + if (EFLAG) evdwl = phi; + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fx,fy,fz,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairADPOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairADP::memory_usage(); + bytes += (comm->nthreads-1) * nmax * (10*sizeof(double) + 3*sizeof(double *)); + return bytes; +} diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h new file mode 100644 index 0000000000..e5c4c4af9f --- /dev/null +++ b/src/USER-OMP/pair_adp_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(adp/omp,PairADPOMP) + +#else + +#ifndef LMP_PAIR_ADP_OMP_H +#define LMP_PAIR_ADP_OMP_H + +#include "pair_adp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairADPOMP : public PairADP, public ThrOMP { + + public: + PairADPOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int iifrom, int iito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp new file mode 100644 index 0000000000..5000ea2a09 --- /dev/null +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -0,0 +1,2767 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_airebo_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#if defined(_OPENMP) +#include +#endif + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define TOL 1.0e-9 + +/* ---------------------------------------------------------------------- */ + +PairAIREBOOMP::PairAIREBOOMP(LAMMPS *lmp) : + PairAIREBO(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairAIREBOOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + REBO_neigh_thr(); + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + FREBO_thr(ifrom,ito,evflag,eflag,vflag_atom,thr); + if (ljflag) FLJ_thr(ifrom,ito,evflag,eflag,vflag_atom,thr); + if (torflag) TORSION_thr(ifrom,ito,evflag,eflag,thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- + Bij function +------------------------------------------------------------------------- */ + +double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, + double VA, int vflag_atom, ThrData * const thr) +{ + int atomi,atomj,k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4; + int itype,jtype,ktype,ltype,ntype; + double rik[3],rjl[3],rkn[3],rji[3],rki[3],rlj[3],rknmag,dNki,dwjl,bij; + double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl; + double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3; + double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ,Nki,Nlj,dS; + double lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC; + double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3]; + double dN2[2],dN3[3]; + double dcosjikdrj[3],dcosijldrj[3],dcosijldrl[3]; + double Tij; + double r32[3],r32mag,cos321,r43[3],r13[3]; + double dNlj; + double om1234,rln[3]; + double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag; + double w21,dw21,r34[3],r34mag,cos234,w34,dw34; + double cross321[3],cross234[3],prefactor,SpN; + double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc; + double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom; + double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2; + double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i; + double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij; + double F23[3],F12[3],F34[3],F31[3],F24[3],fi[3],fj[3],fk[3],fl[3]; + double f1[3],f2[3],f3[3],f4[4]; + double dcut321,PijS,PjiS; + double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp; + int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + + atomi = i; + atomj = j; + itype = map[type[i]]; + jtype = map[type[j]]; + wij = Sp(rijmag,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + NijC = nC[i]-(wij*kronecker(jtype,0)); + NijH = nH[i]-(wij*kronecker(jtype,1)); + NjiC = nC[j]-(wij*kronecker(itype,0)); + NjiH = nH[j]-(wij*kronecker(itype,1)); + bij = 0.0; + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + dgdc = 0.0; + dgdN = 0.0; + NconjtmpI = 0.0; + NconjtmpJ = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / + (rijmag*rikmag); + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + Etmp = Etmp+(wik*g*exp(lamdajik)); + tmp3 = tmp3+(wik*dgdN*exp(lamdajik)); + NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS)); + } + } + + PijS = 0.0; + dN2[0] = 0.0; + dN2[1] = 0.0; + PijS = PijSpline(NijC,NijH,itype,jtype,dN2); + pij = 1.0/sqrt(1.0+Etmp+PijS); + tmp = -0.5*pij*pij*pij; + + const double invrijm = 1.0/rijmag; + const double invrijm2 = invrijm*invrijm; + + // pij forces + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + + const double invrikm = 1.0/rikmag; + const double invrijkm = invrijm*invrikm; + const double invrikm2 = invrikm*invrikm; + + cosjik = (rij[0]*rik[0] + rij[1]*rik[1] + rij[2]*rik[2]) * invrijkm; + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) - + (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2))); + dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) - + (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2))); + dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) - + (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2))); + dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2)); + dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2)); + dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2)); + dcosjikdrj[0] = (-rik[0]*invrijkm) + (cosjik*(rij[0]*invrijm2)); + dcosjikdrj[1] = (-rik[1]*invrijkm) + (cosjik*(rij[1]*invrijm2)); + dcosjikdrj[2] = (-rik[2]*invrijkm) + (cosjik*(rij[2]*invrijm2)); + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik)); + fj[0] = -tmp2*dcosjikdrj[0]; + fj[1] = -tmp2*dcosjikdrj[1]; + fj[2] = -tmp2*dcosjikdrj[2]; + fi[0] = -tmp2*dcosjikdri[0]; + fi[1] = -tmp2*dcosjikdri[1]; + fi[2] = -tmp2*dcosjikdri[2]; + fk[0] = -tmp2*dcosjikdrk[0]; + fk[1] = -tmp2*dcosjikdrk[1]; + fk[2] = -tmp2*dcosjikdrk[2]; + + tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1)); + fj[0] -= tmp2*(-rij[0]*invrijm); + fj[1] -= tmp2*(-rij[1]*invrijm); + fj[2] -= tmp2*(-rij[2]*invrijm); + fi[0] -= tmp2*((-rik[0]*invrikm)+(rij[0]*invrijm)); + fi[1] -= tmp2*((-rik[1]*invrikm)+(rij[1]*invrijm)); + fi[2] -= tmp2*((-rik[2]*invrikm)+(rij[2]*invrijm)); + fk[0] -= tmp2*(rik[0]*invrikm); + fk[1] -= tmp2*(rik[1]*invrikm); + fk[2] -= tmp2*(rik[2]*invrikm); + + // coordination forces + + // dwik forces + + tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))*invrikm; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)*invrikm; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // dgdN forces + + tmp2 = VA*.5*(tmp*tmp3*dwik)*invrikm; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2]; + + if (vflag_atom) { + rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2]; + rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2]; + v_tally3_thr(atomi,atomj,atomk,fj,fk,rji,rki,thr); + } + } + } + + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0)) + + nH[atoml]-(wjl*kronecker(jtype,1)); + cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / + (rijmag*rjlmag); + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + Etmp = Etmp+(wjl*g*exp(lamdaijl)); + tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl)); + NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS)); + } + } + + PjiS = 0.0; + dN2[0] = 0.0; + dN2[1] = 0.0; + PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2); + pji = 1.0/sqrt(1.0+Etmp+PjiS); + tmp = -0.5*pji*pji*pji; + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + + const double invrjlm = 1.0/rjlmag; + const double invrijlm = invrijm*invrjlm; + const double invrjlm2 = invrjlm*invrjlm; + + cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) + * invrijlm; + + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + dcosijldri[0] = (-rjl[0]*invrijlm) - (cosijl*rij[0]*invrijm2); + dcosijldri[1] = (-rjl[1]*invrijlm) - (cosijl*rij[1]*invrijm2); + dcosijldri[2] = (-rjl[2]*invrijlm) - (cosijl*rij[2]*invrijm2); + dcosijldrj[0] = ((-rij[0]+rjl[0])*invrijlm) + + (cosijl*((rij[0]*invrijm2)-(rjl[0]*invrjlm2))); + dcosijldrj[1] = ((-rij[1]+rjl[1])*invrijlm) + + (cosijl*((rij[1]*invrijm2)-(rjl[1]*invrjlm2))); + dcosijldrj[2] = ((-rij[2]+rjl[2])*invrijlm) + + (cosijl*((rij[2]*invrijm2)-(rjl[2]*invrjlm2))); + dcosijldrl[0] = (rij[0]*invrijlm)+(cosijl*rjl[0]*invrjlm2); + dcosijldrl[1] = (rij[1]*invrijlm)+(cosijl*rjl[1]*invrjlm2); + dcosijldrl[2] = (rij[2]*invrijlm)+(cosijl*rjl[2]*invrjlm2); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl)); + fi[0] = -tmp2*dcosijldri[0]; + fi[1] = -tmp2*dcosijldri[1]; + fi[2] = -tmp2*dcosijldri[2]; + fj[0] = -tmp2*dcosijldrj[0]; + fj[1] = -tmp2*dcosijldrj[1]; + fj[2] = -tmp2*dcosijldrj[2]; + fl[0] = -tmp2*dcosijldrl[0]; + fl[1] = -tmp2*dcosijldrl[1]; + fl[2] = -tmp2*dcosijldrl[2]; + + tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1)); + fi[0] -= tmp2*(rij[0]*invrijm); + fi[1] -= tmp2*(rij[1]*invrijm); + fi[2] -= tmp2*(rij[2]*invrijm); + fj[0] -= tmp2*((-rjl[0]*invrjlm)-(rij[0]*invrijm)); + fj[1] -= tmp2*((-rjl[1]*invrjlm)-(rij[1]*invrijm)); + fj[2] -= tmp2*((-rjl[2]*invrjlm)-(rij[2]*invrijm)); + fl[0] -= tmp2*(rjl[0]*invrjlm); + fl[1] -= tmp2*(rjl[1]*invrjlm); + fl[2] -= tmp2*(rjl[2]*invrjlm); + + // coordination forces + + // dwik forces + + tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // dgdN forces + + tmp2 = VA*.5*(tmp*tmp3*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2]; + + if (vflag_atom) { + rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2]; + v_tally3_thr(atomi,atomj,atoml,fi,fl,rij,rlj,thr); + } + } + } + + // evaluate Nij conj + + Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ); + piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3); + + // piRC forces + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + if (atomk !=atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) > TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + if (atomn != atomi) { + ntype = map[type[atomn]]; + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // piRC forces + + REBO_neighs = REBO_firstneigh[atomj]; + for (l = 0; l < REBO_numneigh[atomj]; l++) { + atoml = REBO_neighs[l]; + if (atoml !=atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + if (atomn != atomj) { + ntype = map[type[atomn]]; + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + + Tij = 0.0; + dN3[0] = 0.0; + dN3[1] = 0.0; + dN3[2] = 0.0; + if (itype == 0 && jtype == 0) + Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3); + Etmp = 0.0; + + if (fabs(Tij) > TOL) { + atom2 = atomi; + atom3 = atomj; + r32[0] = x[atom3][0]-x[atom2][0]; + r32[1] = x[atom3][1]-x[atom2][1]; + r32[2] = x[atom3][2]-x[atom2][2]; + r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2])); + r23[0] = -r32[0]; + r23[1] = -r32[1]; + r23[2] = -r32[2]; + r23mag = r32mag; + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + atom1 = atomk; + ktype = map[type[atomk]]; + if (atomk != atomj) { + r21[0] = x[atom2][0]-x[atom1][0]; + r21[1] = x[atom2][1]-x[atom1][1]; + r21[2] = x[atom2][2]-x[atom1][2]; + r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]); + cos321 = -1.0*((r21[0]*r32[0])+(r21[1]*r32[1])+(r21[2]*r32[2])) / + (r21mag*r32mag); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + Sp2(cos321,thmin,thmax,dcut321); + sin321 = sqrt(1.0 - cos321*cos321); + sink2i = 1.0/(sin321*sin321); + rik2i = 1.0/(r21mag*r21mag); + if (sin321 != 0.0) { + rr = (r23mag*r23mag)-(r21mag*r21mag); + rjk[0] = r21[0]-r23[0]; + rjk[1] = r21[1]-r23[1]; + rjk[2] = r21[2]-r23[2]; + rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]); + rijrik = 2.0*r23mag*r21mag; + rik2 = r21mag*r21mag; + dctik = (-rr+rjk2)/(rijrik*rik2); + dctij = (rr+rjk2)/(rijrik*r23mag*r23mag); + dctjk = -2.0/rijrik; + w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21); + rijmag = r32mag; + rikmag = r21mag; + rij2 = r32mag*r32mag; + rik2 = r21mag*r21mag; + costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + dtsjik = -dtsjik; + + REBO_neighs_j = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs_j[l]; + atom4 = atoml; + ltype = map[type[atoml]]; + if (!(atoml == atomi || atoml == atomk)) { + r34[0] = x[atom3][0]-x[atom4][0]; + r34[1] = x[atom3][1]-x[atom4][1]; + r34[2] = x[atom3][2]-x[atom4][2]; + r34mag = sqrt((r34[0]*r34[0])+(r34[1]*r34[1])+(r34[2]*r34[2])); + cos234 = (r32[0]*r34[0] + r32[1]*r34[1] + r32[2]*r34[2]) / + (r32mag*r34mag); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + sin234 = sqrt(1.0 - cos234*cos234); + sinl2i = 1.0/(sin234*sin234); + rjl2i = 1.0/(r34mag*r34mag); + + if (sin234 != 0.0) { + w34 = Sp(r34mag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dw34); + rr = (r23mag*r23mag)-(r34mag*r34mag); + ril[0] = r23[0]+r34[0]; + ril[1] = r23[1]+r34[1]; + ril[2] = r23[2]+r34[2]; + ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]); + rijrjl = 2.0*r23mag*r34mag; + rjl2 = r34mag*r34mag; + dctjl = (-rr+ril2)/(rijrjl*rjl2); + dctji = (rr+ril2)/(rijrjl*r23mag*r23mag); + dctil = -2.0/rijrjl; + rjlmag = r34mag; + rjl2 = r34mag*r34mag; + costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + dtsijl = -dtsijl; + prefactor = VA*Tij; + + cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]); + cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]); + cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]); + cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]); + cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]); + cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]); + + cwnum = (cross321[0]*cross234[0]) + + (cross321[1]*cross234[1]) + (cross321[2]*cross234[2]); + cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234; + om1234 = cwnum/cwnom; + cw = om1234; + Etmp += ((1.0-(om1234*om1234))*w21*w34) * + (1.0-tspjik)*(1.0-tspijl); + + dt1dik = (rik2i)-(dctik*sink2i*cos321); + dt1djk = (-dctjk*sink2i*cos321); + dt1djl = (rjl2i)-(dctjl*sinl2i*cos234); + dt1dil = (-dctil*sinl2i*cos234); + dt1dij = (2.0/(r23mag*r23mag))-(dctij*sink2i*cos321) - + (dctji*sinl2i*cos234); + + dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]); + dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]); + dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]); + + dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]); + dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]); + dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]); + + dt2dij[0] = (r21[2]*cross234[1])-(r34[2]*cross321[1]) - + (r21[1]*cross234[2])+(r34[1]*cross321[2]); + dt2dij[1] = (r21[0]*cross234[2])-(r34[0]*cross321[2]) - + (r21[2]*cross234[0])+(r34[2]*cross321[0]); + dt2dij[2] = (r21[1]*cross234[0])-(r34[1]*cross321[0]) - + (r21[0]*cross234[1])+(r34[0]*cross321[1]); + + aa = (prefactor*2.0*cw/cwnom)*w21*w34 * + (1.0-tspjik)*(1.0-tspijl); + aaa1 = -prefactor*(1.0-(om1234*om1234)) * + (1.0-tspjik)*(1.0-tspijl); + aaa2 = aaa1*w21*w34; + at2 = aa*cwnum; + + fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) + + (aaa2*dtsijl*dctji*(1.0-tspjik)); + fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl)); + fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik)); + fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl)); + fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik)); + + F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]); + F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]); + F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]); + + F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]); + F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]); + F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]); + + F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]); + F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]); + F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]); + + F31[0] = (fcjkpc*rjk[0]); + F31[1] = (fcjkpc*rjk[1]); + F31[2] = (fcjkpc*rjk[2]); + + F24[0] = (fcilpc*ril[0]); + F24[1] = (fcilpc*ril[1]); + F24[2] = (fcilpc*ril[2]); + + f1[0] = -F12[0]-F31[0]; + f1[1] = -F12[1]-F31[1]; + f1[2] = -F12[2]-F31[2]; + f2[0] = F23[0]+F12[0]+F24[0]; + f2[1] = F23[1]+F12[1]+F24[1]; + f2[2] = F23[2]+F12[2]+F24[2]; + f3[0] = -F23[0]+F34[0]+F31[0]; + f3[1] = -F23[1]+F34[1]+F31[1]; + f3[2] = -F23[2]+F34[2]+F31[2]; + f4[0] = -F34[0]-F24[0]; + f4[1] = -F34[1]-F24[1]; + f4[2] = -F34[2]-F24[2]; + + // coordination forces + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag; + f2[0] -= tmp2*r21[0]; + f2[1] -= tmp2*r21[1]; + f2[2] -= tmp2*r21[2]; + f1[0] += tmp2*r21[0]; + f1[1] += tmp2*r21[1]; + f1[2] += tmp2*r21[2]; + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag; + f3[0] -= tmp2*r34[0]; + f3[1] -= tmp2*r34[1]; + f3[2] -= tmp2*r34[2]; + f4[0] += tmp2*r34[0]; + f4[1] += tmp2*r34[1]; + f4[2] += tmp2*r34[2]; + + f[atom1][0] += f1[0]; f[atom1][1] += f1[1]; + f[atom1][2] += f1[2]; + f[atom2][0] += f2[0]; f[atom2][1] += f2[1]; + f[atom2][2] += f2[2]; + f[atom3][0] += f3[0]; f[atom3][1] += f3[1]; + f[atom3][2] += f3[2]; + f[atom4][0] += f4[0]; f[atom4][1] += f4[1]; + f[atom4][2] += f4[2]; + + if (vflag_atom) { + r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2]; + r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2]; + v_tally4_thr(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43,thr); + } + } + } + } + } + } + } + + // Tij forces now that we have Etmp + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) > TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + ntype = map[type[atomn]]; + if (atomn != atomi) { + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // Tij forces + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + ntype = map[type[atomn]]; + if (atomn !=atomj) { + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + } + + bij = (0.5*(pij+pji))+piRC+(Tij*Etmp); + return bij; +} + +/* ---------------------------------------------------------------------- + Bij* function +------------------------------------------------------------------------- */ + +double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag, + double VA, double rij0[3], double rij0mag, + int vflag_atom, ThrData * const thr) +{ + int k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4; + int atomi,atomj,itype,jtype,ktype,ltype,ntype; + double rik[3], rjl[3], rkn[3],rknmag,dNki; + double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl; + double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3; + double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ; + double Nki,Nlj,dS,lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC; + double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3]; + double dN2[2],dN3[3]; + double dcosijldrj[3],dcosijldrl[3],dcosjikdrj[3],dwjl; + double Tij,crosskij[3],crosskijmag; + double crossijl[3],crossijlmag,omkijl; + double tmppij,tmppji,dN2PIJ[2],dN2PJI[2],dN3piRC[3],dN3Tij[3]; + double bij,tmp3pij,tmp3pji,Stb,dStb; + double r32[3],r32mag,cos321; + double om1234,rln[3]; + double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag; + double w21,dw21,r34[3],r34mag,cos234,w34,dw34; + double cross321[3],cross234[3],prefactor,SpN; + double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc; + double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom; + double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2; + double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i; + double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij; + double dNlj; + double PijS,PjiS; + double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp; + int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l; + double F12[3],F23[3],F34[3],F31[3],F24[3]; + double fi[3],fj[3],fk[3],fl[3],f1[3],f2[3],f3[3],f4[4]; + double rji[3],rki[3],rlj[3],r13[3],r43[3]; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + + atomi = i; + atomj = j; + itype = map[type[atomi]]; + jtype = map[type[atomj]]; + wij = Sp(rij0mag,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + NijC = nC[atomi]-(wij*kronecker(jtype,0)); + NijH = nH[atomi]-(wij*kronecker(jtype,1)); + NjiC = nC[atomj]-(wij*kronecker(itype,0)); + NjiH = nH[atomj]-(wij*kronecker(itype,1)); + + bij = 0.0; + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + dgdc = 0.0; + dgdN = 0.0; + NconjtmpI = 0.0; + NconjtmpJ = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS); + Nki = nC[atomk]-(wik*kronecker(itype,0)) + + nH[atomk]-(wik*kronecker(itype,1)); + cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / + (rijmag*rikmag); + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + Etmp += (wik*g*exp(lamdajik)); + tmp3 += (wik*dgdN*exp(lamdajik)); + NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS)); + } + } + + PijS = 0.0; + dN2PIJ[0] = 0.0; + dN2PIJ[1] = 0.0; + PijS = PijSpline(NijC,NijH,itype,jtype,dN2PIJ); + pij = 1.0/sqrt(1.0+Etmp+PijS); + tmppij = -.5*pij*pij*pij; + tmp3pij = tmp3; + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / + (rijmag*rjlmag); + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + Etmp = Etmp+(wjl*g*exp(lamdaijl)); + tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl)); + NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS)); + } + } + + PjiS = 0.0; + dN2PJI[0] = 0.0; + dN2PJI[1] = 0.0; + PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2PJI); + pji = 1.0/sqrt(1.0+Etmp+PjiS); + tmppji = -.5*pji*pji*pji; + tmp3pji = tmp3; + + // evaluate Nij conj + + Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ); + piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3piRC); + Tij = 0.0; + dN3Tij[0] = 0.0; + dN3Tij[1] = 0.0; + dN3Tij[2] = 0.0; + if (itype == 0 && jtype == 0) + Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3Tij); + + Etmp = 0.0; + if (fabs(Tij) > TOL) { + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + ktype = map[type[atomk]]; + if (atomk != atomj) { + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + cos321 = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / + (rijmag*rikmag); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + + rjk[0] = rik[0]-rij[0]; + rjk[1] = rik[1]-rij[1]; + rjk[2] = rik[2]-rij[2]; + rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]); + rij2 = rijmag*rijmag; + rik2 = rikmag*rikmag; + costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + + if (sqrt(1.0 - cos321*cos321) > sqrt(TOL)) { + wik = Sp(rikmag,rcmin[itype][ktype],rcmaxp[itype][ktype],dwik); + REBO_neighs_j = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs_j[l]; + ltype = map[type[atoml]]; + if (!(atoml == atomi || atoml == atomk)) { + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt(rjl[0]*rjl[0] + rjl[1]*rjl[1] + rjl[2]*rjl[2]); + cos234 = -((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / + (rijmag*rjlmag); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + + ril[0] = rij[0]+rjl[0]; + ril[1] = rij[1]+rjl[1]; + ril[2] = rij[2]+rjl[2]; + ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]); + rijrjl = 2.0*rijmag*rjlmag; + rjl2 = rjlmag*rjlmag; + costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + + if (sqrt(1.0 - cos234*cos234) > sqrt(TOL)) { + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dS); + crosskij[0] = (rij[1]*rik[2]-rij[2]*rik[1]); + crosskij[1] = (rij[2]*rik[0]-rij[0]*rik[2]); + crosskij[2] = (rij[0]*rik[1]-rij[1]*rik[0]); + crosskijmag = sqrt(crosskij[0]*crosskij[0] + + crosskij[1]*crosskij[1] + + crosskij[2]*crosskij[2]); + crossijl[0] = (rij[1]*rjl[2]-rij[2]*rjl[1]); + crossijl[1] = (rij[2]*rjl[0]-rij[0]*rjl[2]); + crossijl[2] = (rij[0]*rjl[1]-rij[1]*rjl[0]); + crossijlmag = sqrt(crossijl[0]*crossijl[0] + + crossijl[1]*crossijl[1] + + crossijl[2]*crossijl[2]); + omkijl = -1.0*(((crosskij[0]*crossijl[0]) + + (crosskij[1]*crossijl[1]) + + (crosskij[2]*crossijl[2])) / + (crosskijmag*crossijlmag)); + Etmp += ((1.0-(omkijl*omkijl))*wik*wjl) * + (1.0-tspjik)*(1.0-tspijl); + } + } + } + } + } + } + } + + bij = (.5*(pij+pji))+piRC+(Tij*Etmp); + Stb = Sp2(bij,bLJmin[itype][jtype],bLJmax[itype][jtype],dStb); + VA = VA*dStb; + + if (dStb != 0.0) { + tmp = tmppij; + dN2[0] = dN2PIJ[0]; + dN2[1] = dN2PIJ[1]; + tmp3 = tmp3pij; + + const double invrijm = 1.0/rijmag; + const double invrijm2 = invrijm*invrijm; + + // pij forces + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + if (atomk != atomj) { + lamdajik = 0.0; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt(rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2]); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + + const double invrikm = 1.0/rikmag; + const double invrijkm = invrijm*invrikm; + const double invrikm2 = invrikm*invrikm; + + cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) + * invrijkm; + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) - + (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2))); + dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) - + (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2))); + dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) - + (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2))); + dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2)); + dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2)); + dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2)); + dcosjikdrj[0] = (-rik[0]*invrijkm) + (cosjik*(rij[0]*invrijm2)); + dcosjikdrj[1] = (-rik[1]*invrijkm) + (cosjik*(rij[1]*invrijm2)); + dcosjikdrj[2] = (-rik[2]*invrijkm) + (cosjik*(rij[2]*invrijm2)); + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + + tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik)); + fj[0] = -tmp2*dcosjikdrj[0]; + fj[1] = -tmp2*dcosjikdrj[1]; + fj[2] = -tmp2*dcosjikdrj[2]; + fi[0] = -tmp2*dcosjikdri[0]; + fi[1] = -tmp2*dcosjikdri[1]; + fi[2] = -tmp2*dcosjikdri[2]; + fk[0] = -tmp2*dcosjikdrk[0]; + fk[1] = -tmp2*dcosjikdrk[1]; + fk[2] = -tmp2*dcosjikdrk[2]; + + tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1)); + fj[0] -= tmp2*(-rij[0]*invrijm); + fj[1] -= tmp2*(-rij[1]*invrijm); + fj[2] -= tmp2*(-rij[2]*invrijm); + fi[0] -= tmp2*((-rik[0]/rikmag)+(rij[0]*invrijm)); + fi[1] -= tmp2*((-rik[1]/rikmag)+(rij[1]*invrijm)); + fi[2] -= tmp2*((-rik[2]/rikmag)+(rij[2]*invrijm)); + fk[0] -= tmp2*(rik[0]/rikmag); + fk[1] -= tmp2*(rik[1]/rikmag); + fk[2] -= tmp2*(rik[2]/rikmag); + + // coordination forces + + // dwik forces + + tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))/rikmag; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)/rikmag; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // dgdN forces + + tmp2 = VA*.5*(tmp*tmp3*dwik)/rikmag; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2]; + + if (vflag_atom) { + rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2]; + rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2]; + v_tally3_thr(atomi,atomj,atomk,fj,fk,rji,rki,thr); + } + } + } + + tmp = tmppji; + tmp3 = tmp3pji; + dN2[0] = dN2PJI[0]; + dN2[1] = dN2PJI[1]; + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml !=atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + + const double invrjlm = 1.0/rjlmag; + const double invrijlm = invrijm*invrjlm; + const double invrjlm2 = invrjlm*invrjlm; + + cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) * + invrijlm; + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + dcosijldri[0] = (-rjl[0]*invrijlm) - (cosijl*rij[0]*invrijm2); + dcosijldri[1] = (-rjl[1]*invrijlm) - (cosijl*rij[1]*invrijm2); + dcosijldri[2] = (-rjl[2]*invrijlm) - (cosijl*rij[2]*invrijm2); + dcosijldrj[0] = ((-rij[0]+rjl[0])*invrijlm) + + (cosijl*((rij[0]*invrijm2)-(rjl[0]*invrjlm2))); + dcosijldrj[1] = ((-rij[1]+rjl[1])*invrijlm) + + (cosijl*((rij[1]*invrijm2)-(rjl[1]*invrjlm2))); + dcosijldrj[2] = ((-rij[2]+rjl[2])*invrijlm) + + (cosijl*((rij[2]*invrijm2)-(rjl[2]*invrjlm2))); + dcosijldrl[0] = (rij[0]*invrijlm) + (cosijl*rjl[0]*invrjlm2); + dcosijldrl[1] = (rij[1]*invrijlm) + (cosijl*rjl[1]*invrjlm2); + dcosijldrl[2] = (rij[2]*invrijlm) + (cosijl*rjl[2]*invrjlm2); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl)); + fi[0] = -tmp2*dcosijldri[0]; + fi[1] = -tmp2*dcosijldri[1]; + fi[2] = -tmp2*dcosijldri[2]; + fj[0] = -tmp2*dcosijldrj[0]; + fj[1] = -tmp2*dcosijldrj[1]; + fj[2] = -tmp2*dcosijldrj[2]; + fl[0] = -tmp2*dcosijldrl[0]; + fl[1] = -tmp2*dcosijldrl[1]; + fl[2] = -tmp2*dcosijldrl[2]; + + tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1)); + fi[0] -= tmp2*(rij[0]*invrijm); + fi[1] -= tmp2*(rij[1]*invrijm); + fi[2] -= tmp2*(rij[2]*invrijm); + fj[0] -= tmp2*((-rjl[0]*invrjlm)-(rij[0]*invrijm)); + fj[1] -= tmp2*((-rjl[1]*invrjlm)-(rij[1]*invrijm)); + fj[2] -= tmp2*((-rjl[2]*invrjlm)-(rij[2]*invrijm)); + fl[0] -= tmp2*(rjl[0]*invrjlm); + fl[1] -= tmp2*(rjl[1]*invrjlm); + fl[2] -= tmp2*(rjl[2]*invrjlm); + + // coordination forces + // dwik forces + + tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // dgdN forces + + tmp2=VA*.5*(tmp*tmp3*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2]; + + if (vflag_atom) { + rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2]; + v_tally3_thr(atomi,atomj,atoml,fi,fl,rij,rlj,thr); + } + } + } + + // piRC forces + + dN3[0] = dN3piRC[0]; + dN3[1] = dN3piRC[1]; + dN3[2] = dN3piRC[2]; + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) > TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + if (atomn != atomi) { + ntype = map[type[atomn]]; + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // piRC forces to J side + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + if (atomn != atomj) { + ntype = map[type[atomn]]; + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + + if (fabs(Tij) > TOL) { + dN3[0] = dN3Tij[0]; + dN3[1] = dN3Tij[1]; + dN3[2] = dN3Tij[2]; + atom2 = atomi; + atom3 = atomj; + r32[0] = x[atom3][0]-x[atom2][0]; + r32[1] = x[atom3][1]-x[atom2][1]; + r32[2] = x[atom3][2]-x[atom2][2]; + r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2])); + r23[0] = -r32[0]; + r23[1] = -r32[1]; + r23[2] = -r32[2]; + r23mag = r32mag; + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + atom1 = atomk; + ktype = map[type[atomk]]; + if (atomk != atomj) { + r21[0] = x[atom2][0]-x[atom1][0]; + r21[1] = x[atom2][1]-x[atom1][1]; + r21[2] = x[atom2][2]-x[atom1][2]; + r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]); + cos321 = ((r21[0]*rij[0])+(r21[1]*rij[1])+(r21[2]*rij[2])) / + (r21mag*rijmag); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + sin321 = sqrt(1.0 - cos321*cos321); + sink2i = 1.0/(sin321*sin321); + rik2i = 1.0/(r21mag*r21mag); + + if (sin321 != 0.0) { + rr = (rijmag*rijmag)-(r21mag*r21mag); + rjk[0] = r21[0]-rij[0]; + rjk[1] = r21[1]-rij[1]; + rjk[2] = r21[2]-rij[2]; + rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]); + rijrik = 2.0*rijmag*r21mag; + rik2 = r21mag*r21mag; + dctik = (-rr+rjk2)/(rijrik*rik2); + dctij = (rr+rjk2)/(rijrik*rijmag*rijmag); + dctjk = -2.0/rijrik; + w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21); + rikmag = r21mag; + rij2 = r32mag*r32mag; + rik2 = r21mag*r21mag; + costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + dtsjik = -dtsjik; + + REBO_neighs_j = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs_j[l]; + atom4 = atoml; + ltype = map[type[atoml]]; + if (!(atoml == atomi || atoml == atomk)) { + r34[0] = x[atom3][0]-x[atom4][0]; + r34[1] = x[atom3][1]-x[atom4][1]; + r34[2] = x[atom3][2]-x[atom4][2]; + r34mag = sqrt(r34[0]*r34[0] + r34[1]*r34[1] + r34[2]*r34[2]); + cos234 = -1.0*((rij[0]*r34[0])+(rij[1]*r34[1]) + + (rij[2]*r34[2]))/(rijmag*r34mag); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + sin234 = sqrt(1.0 - cos234*cos234); + sinl2i = 1.0/(sin234*sin234); + rjl2i = 1.0/(r34mag*r34mag); + + if (sin234 != 0.0) { + w34 = Sp(r34mag,rcmin[jtype][ltype], + rcmaxp[jtype][ltype],dw34); + rr = (r23mag*r23mag)-(r34mag*r34mag); + ril[0] = r23[0]+r34[0]; + ril[1] = r23[1]+r34[1]; + ril[2] = r23[2]+r34[2]; + ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]); + rijrjl = 2.0*r23mag*r34mag; + rjl2 = r34mag*r34mag; + dctjl = (-rr+ril2)/(rijrjl*rjl2); + dctji = (rr+ril2)/(rijrjl*r23mag*r23mag); + dctil = -2.0/rijrjl; + rjlmag = r34mag; + rjl2 = r34mag*r34mag; + costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + dtsijl = -dtsijl; //need minus sign + prefactor = VA*Tij; + + cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]); + cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]); + cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]); + cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]); + cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]); + cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]); + + cwnum = (cross321[0]*cross234[0]) + + (cross321[1]*cross234[1])+(cross321[2]*cross234[2]); + cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234; + om1234 = cwnum/cwnom; + cw = om1234; + Etmp += ((1.0-(om1234*om1234))*w21*w34) * + (1.0-tspjik)*(1.0-tspijl); + + dt1dik = (rik2i)-(dctik*sink2i*cos321); + dt1djk = (-dctjk*sink2i*cos321); + dt1djl = (rjl2i)-(dctjl*sinl2i*cos234); + dt1dil = (-dctil*sinl2i*cos234); + dt1dij = (2.0/(r23mag*r23mag)) - + (dctij*sink2i*cos321)-(dctji*sinl2i*cos234); + + dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]); + dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]); + dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]); + + dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]); + dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]); + dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]); + + dt2dij[0] = (r21[2]*cross234[1]) - + (r34[2]*cross321[1])-(r21[1]*cross234[2]) + + (r34[1]*cross321[2]); + dt2dij[1] = (r21[0]*cross234[2]) - + (r34[0]*cross321[2])-(r21[2]*cross234[0]) + + (r34[2]*cross321[0]); + dt2dij[2] = (r21[1]*cross234[0]) - + (r34[1]*cross321[0])-(r21[0]*cross234[1]) + + (r34[0]*cross321[1]); + + aa = (prefactor*2.0*cw/cwnom)*w21*w34 * + (1.0-tspjik)*(1.0-tspijl); + aaa1 = -prefactor*(1.0-(om1234*om1234)) * + (1.0-tspjik)*(1.0-tspijl); + aaa2 = aaa1*w21*w34; + at2 = aa*cwnum; + + fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) + + (aaa2*dtsijl*dctji*(1.0-tspjik)); + fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl)); + fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik)); + fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl)); + fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik)); + + F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]); + F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]); + F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]); + + F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]); + F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]); + F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]); + + F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]); + F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]); + F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]); + + F31[0] = (fcjkpc*rjk[0]); + F31[1] = (fcjkpc*rjk[1]); + F31[2] = (fcjkpc*rjk[2]); + + F24[0] = (fcilpc*ril[0]); + F24[1] = (fcilpc*ril[1]); + F24[2] = (fcilpc*ril[2]); + + f1[0] = -F12[0]-F31[0]; + f1[1] = -F12[1]-F31[1]; + f1[2] = -F12[2]-F31[2]; + f2[0] = F23[0]+F12[0]+F24[0]; + f2[1] = F23[1]+F12[1]+F24[1]; + f2[2] = F23[2]+F12[2]+F24[2]; + f3[0] = -F23[0]+F34[0]+F31[0]; + f3[1] = -F23[1]+F34[1]+F31[1]; + f3[2] = -F23[2]+F34[2]+F31[2]; + f4[0] = -F34[0]-F24[0]; + f4[1] = -F34[1]-F24[1]; + f4[2] = -F34[2]-F24[2]; + + // coordination forces + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag; + f2[0] -= tmp2*r21[0]; + f2[1] -= tmp2*r21[1]; + f2[2] -= tmp2*r21[2]; + f1[0] += tmp2*r21[0]; + f1[1] += tmp2*r21[1]; + f1[2] += tmp2*r21[2]; + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag; + f3[0] -= tmp2*r34[0]; + f3[1] -= tmp2*r34[1]; + f3[2] -= tmp2*r34[2]; + f4[0] += tmp2*r34[0]; + f4[1] += tmp2*r34[1]; + f4[2] += tmp2*r34[2]; + + f[atom1][0] += f1[0]; f[atom1][1] += f1[1]; + f[atom1][2] += f1[2]; + f[atom2][0] += f2[0]; f[atom2][1] += f2[1]; + f[atom2][2] += f2[2]; + f[atom3][0] += f3[0]; f[atom3][1] += f3[1]; + f[atom3][2] += f3[2]; + f[atom4][0] += f4[0]; f[atom4][1] += f4[1]; + f[atom4][2] += f4[2]; + + if (vflag_atom) { + r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2]; + r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2]; + v_tally4_thr(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43,thr); + } + } + } + } + } + } + } + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) >TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + ntype = map[type[atomn]]; + if (atomn !=atomi) { + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // Tij forces + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + ntype = map[type[atomn]]; + if (atomn != atomj) { + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + } + } + + return Stb; +} + +/* ---------------------------------------------------------------------- + REBO forces and energy +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::FREBO_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr) +{ + int i,j,k,m,ii,itype,jtype,itag,jtag; + double delx,dely,delz,evdwl,fpair,xtmp,ytmp,ztmp; + double rsq,rij,wij; + double Qij,Aij,alphaij,VR,pre,dVRdi,VA,term,bij,dVAdi,dVA; + double dwij,del[3]; + int *ilist,*REBO_neighs; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int *tag = atom->tag; + int nlocal = atom->nlocal; + + ilist = list->ilist; + + // two-body interactions from REBO neighbor list, skip half of them + + for (ii = ifrom; ii < ito; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + REBO_neighs = REBO_firstneigh[i]; + + for (k = 0; k < REBO_numneigh[i]; k++) { + j = REBO_neighs[k]; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = x[i][0] - x[j][0]; + dely = x[i][1] - x[j][1]; + delz = x[i][2] - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + rij = sqrt(rsq); + wij = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + if (wij <= TOL) continue; + + Qij = Q[itype][jtype]; + Aij = A[itype][jtype]; + alphaij = alpha[itype][jtype]; + + VR = wij*(1.0+(Qij/rij)) * Aij*exp(-alphaij*rij); + pre = wij*Aij * exp(-alphaij*rij); + dVRdi = pre * ((-alphaij)-(Qij/rsq)-(Qij*alphaij/rij)); + dVRdi += VR/wij * dwij; + + VA = dVA = 0.0; + for (m = 0; m < 3; m++) { + term = -wij * BIJc[itype][jtype][m] * exp(-Beta[itype][jtype][m]*rij); + VA += term; + dVA += -Beta[itype][jtype][m] * term; + } + dVA += VA/wij * dwij; + del[0] = delx; + del[1] = dely; + del[2] = delz; + bij = bondorder_thr(i,j,del,rij,VA,vflag_atom,thr); + dVAdi = bij*dVA; + + fpair = -(dVRdi+dVAdi) / rij; + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (eflag) evdwl = VR + bij*VA; + if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} + +/* ---------------------------------------------------------------------- + compute LJ forces and energy + find 3- and 4-step paths between atoms I,J via REBO neighbor lists +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::FLJ_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr) +{ + int i,j,k,m,ii,jj,kk,mm,jnum,itype,jtype,ktype,mtype,itag,jtag; + int atomi,atomj,atomk,atomm; + int testpath,npath,done; + double evdwl,fpair,xtmp,ytmp,ztmp; + double rsq,best,wik,wkm,cij,rij,dwij,dwik,dwkj,dwkm,dwmj; + double delij[3],rijsq,delik[3],rik,deljk[3]; + double rkj,wkj,dC,VLJ,dVLJ,VA,Str,dStr,Stb; + double vdw,slw,dvdw,dslw,drij,swidth,tee,tee2; + double rljmin,rljmax,sigcut,sigmin,sigwid; + double delkm[3],rkm,deljm[3],rmj,wmj,r2inv,r6inv,scale,delscale[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + int *REBO_neighs_i,*REBO_neighs_k; + double delikS[3],deljkS[3],delkmS[3],deljmS[3],delimS[3]; + double rikS,rkjS,rkmS,rmjS,wikS,dwikS; + double wkjS,dwkjS,wkmS,dwkmS,wmjS,dwmjS; + double fpair1,fpair2,fpair3; + double fi[3],fj[3],fk[3],fm[3]; + + // I-J interaction from full neighbor list + // skip 1/2 of interactions since only consider each pair once + + evdwl = 0.0; + rljmin = 0.0; + rljmax = 0.0; + sigcut = 0.0; + sigmin = 0.0; + sigwid = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = ifrom; ii < ito; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + atomi = i; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + atomj = j; + + delij[0] = xtmp - x[j][0]; + delij[1] = ytmp - x[j][1]; + delij[2] = ztmp - x[j][2]; + rijsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; + + // if outside of LJ cutoff, skip + // if outside of 4-path cutoff, best = 0.0, no need to test paths + // if outside of 2-path cutoff but inside 4-path cutoff, + // best = 0.0, test 3-,4-paths + // if inside 2-path cutoff, best = wij, only test 3-,4-paths if best < 1 + + if (rijsq >= cutljsq[itype][jtype]) continue; + rij = sqrt(rijsq); + if (rij >= cut3rebo) { + best = 0.0; + testpath = 0; + } else if (rij >= rcmax[itype][jtype]) { + best = 0.0; + testpath = 1; + } else { + best = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + npath = 2; + if (best < 1.0) testpath = 1; + else testpath = 0; + } + + done = 0; + if (testpath) { + + // test all 3-body paths = I-K-J + // I-K interactions come from atom I's REBO neighbors + // if wik > current best, compute wkj + // if best = 1.0, done + + REBO_neighs_i = REBO_firstneigh[i]; + for (kk = 0; kk < REBO_numneigh[i] && done==0; kk++) { + k = REBO_neighs_i[kk]; + if (k == j) continue; + ktype = map[type[k]]; + + delik[0] = x[i][0] - x[k][0]; + delik[1] = x[i][1] - x[k][1]; + delik[2] = x[i][2] - x[k][2]; + rsq = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2]; + if (rsq < rcmaxsq[itype][ktype]) { + rik = sqrt(rsq); + wik = Sp(rik,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + } else wik = 0.0; + + if (wik > best) { + deljk[0] = x[j][0] - x[k][0]; + deljk[1] = x[j][1] - x[k][1]; + deljk[2] = x[j][2] - x[k][2]; + rsq = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2]; + if (rsq < rcmaxsq[ktype][jtype]) { + rkj = sqrt(rsq); + wkj = Sp(rkj,rcmin[ktype][jtype],rcmax[ktype][jtype],dwkj); + if (wik*wkj > best) { + best = wik*wkj; + npath = 3; + atomk = k; + delikS[0] = delik[0]; + delikS[1] = delik[1]; + delikS[2] = delik[2]; + rikS = rik; + wikS = wik; + dwikS = dwik; + deljkS[0] = deljk[0]; + deljkS[1] = deljk[1]; + deljkS[2] = deljk[2]; + rkjS = rkj; + wkjS = wkj; + dwkjS = dwkj; + if (best == 1.0) { + done = 1; + break; + } + } + } + + // test all 4-body paths = I-K-M-J + // K-M interactions come from atom K's REBO neighbors + // if wik*wkm > current best, compute wmj + // if best = 1.0, done + + REBO_neighs_k = REBO_firstneigh[k]; + for (mm = 0; mm < REBO_numneigh[k] && done==0; mm++) { + m = REBO_neighs_k[mm]; + if (m == i || m == j) continue; + mtype = map[type[m]]; + delkm[0] = x[k][0] - x[m][0]; + delkm[1] = x[k][1] - x[m][1]; + delkm[2] = x[k][2] - x[m][2]; + rsq = delkm[0]*delkm[0] + delkm[1]*delkm[1] + delkm[2]*delkm[2]; + if (rsq < rcmaxsq[ktype][mtype]) { + rkm = sqrt(rsq); + wkm = Sp(rkm,rcmin[ktype][mtype],rcmax[ktype][mtype],dwkm); + } else wkm = 0.0; + + if (wik*wkm > best) { + deljm[0] = x[j][0] - x[m][0]; + deljm[1] = x[j][1] - x[m][1]; + deljm[2] = x[j][2] - x[m][2]; + rsq = deljm[0]*deljm[0] + deljm[1]*deljm[1] + + deljm[2]*deljm[2]; + if (rsq < rcmaxsq[mtype][jtype]) { + rmj = sqrt(rsq); + wmj = Sp(rmj,rcmin[mtype][jtype],rcmax[mtype][jtype],dwmj); + if (wik*wkm*wmj > best) { + best = wik*wkm*wmj; + npath = 4; + atomk = k; + delikS[0] = delik[0]; + delikS[1] = delik[1]; + delikS[2] = delik[2]; + rikS = rik; + wikS = wik; + dwikS = dwik; + atomm = m; + delkmS[0] = delkm[0]; + delkmS[1] = delkm[1]; + delkmS[2] = delkm[2]; + rkmS = rkm; + wkmS = wkm; + dwkmS = dwkm; + deljmS[0] = deljm[0]; + deljmS[1] = deljm[1]; + deljmS[2] = deljm[2]; + rmjS = rmj; + wmjS = wmj; + dwmjS = dwmj; + if (best == 1.0) { + done = 1; + break; + } + } + } + } + } + } + } + } + + cij = 1.0 - best; + if (cij == 0.0) continue; + + // compute LJ forces and energy + + sigwid = 0.84; + sigcut = 3.0; + sigmin = sigcut - sigwid; + + rljmin = sigma[itype][jtype]; + rljmax = sigcut * rljmin; + rljmin = sigmin * rljmin; + + if (rij > rljmax) { + slw = 0.0; + dslw = 0.0; + } else if (rij > rljmin) { + drij = rij - rljmin; + swidth = rljmax - rljmin; + tee = drij / swidth; + tee2 = tee*tee; + slw = 1.0 - tee2 * (3.0 - 2.0 * tee); + dslw = 6.0 * tee * (1.0 - tee) / rij / swidth; + } else { + slw = 1.0; + dslw = 0.0; + } + + r2inv = 1.0/rijsq; + r6inv = r2inv*r2inv*r2inv; + + vdw = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + dvdw = -r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) / rij; + + // VLJ now becomes vdw * slw, derivaties, etc. + + VLJ = vdw * slw; + dVLJ = dvdw * slw + vdw * dslw; + + Str = Sp2(rij,rcLJmin[itype][jtype],rcLJmax[itype][jtype],dStr); + VA = Str*cij*VLJ; + if (Str > 0.0) { + scale = rcmin[itype][jtype] / rij; + delscale[0] = scale * delij[0]; + delscale[1] = scale * delij[1]; + delscale[2] = scale * delij[2]; + Stb = bondorderLJ_thr(i,j,delscale,rcmin[itype][jtype],VA, + delij,rij,vflag_atom,thr); + } else Stb = 0.0; + + fpair = -(dStr * (Stb*cij*VLJ - cij*VLJ) + + dVLJ * (Str*Stb*cij + cij - Str*cij)) / rij; + + f[i][0] += delij[0]*fpair; + f[i][1] += delij[1]*fpair; + f[i][2] += delij[2]*fpair; + f[j][0] -= delij[0]*fpair; + f[j][1] -= delij[1]*fpair; + f[j][2] -= delij[2]*fpair; + + if (eflag) evdwl = VA*Stb + (1.0-Str)*cij*VLJ; + if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delij[0],delij[1],delij[2],thr); + + if (cij < 1.0) { + dC = Str*Stb*VLJ + (1.0-Str)*VLJ; + if (npath == 2) { + fpair = dC*dwij / rij; + f[atomi][0] += delij[0]*fpair; + f[atomi][1] += delij[1]*fpair; + f[atomi][2] += delij[2]*fpair; + f[atomj][0] -= delij[0]*fpair; + f[atomj][1] -= delij[1]*fpair; + f[atomj][2] -= delij[2]*fpair; + + if (vflag_atom) v_tally2_thr(atomi,atomj,fpair,delij,thr); + + } else if (npath == 3) { + fpair1 = dC*dwikS*wkjS / rikS; + fi[0] = delikS[0]*fpair1; + fi[1] = delikS[1]*fpair1; + fi[2] = delikS[2]*fpair1; + fpair2 = dC*wikS*dwkjS / rkjS; + fj[0] = deljkS[0]*fpair2; + fj[1] = deljkS[1]*fpair2; + fj[2] = deljkS[2]*fpair2; + + f[atomi][0] += fi[0]; + f[atomi][1] += fi[1]; + f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; + f[atomj][1] += fj[1]; + f[atomj][2] += fj[2]; + f[atomk][0] -= fi[0] + fj[0]; + f[atomk][1] -= fi[1] + fj[1]; + f[atomk][2] -= fi[2] + fj[2]; + + if (vflag_atom) + v_tally3_thr(atomi,atomj,atomk,fi,fj,delikS,deljkS,thr); + + } else { + fpair1 = dC*dwikS*wkmS*wmjS / rikS; + fi[0] = delikS[0]*fpair1; + fi[1] = delikS[1]*fpair1; + fi[2] = delikS[2]*fpair1; + + fpair2 = dC*wikS*dwkmS*wmjS / rkmS; + fk[0] = delkmS[0]*fpair2 - fi[0]; + fk[1] = delkmS[1]*fpair2 - fi[1]; + fk[2] = delkmS[2]*fpair2 - fi[2]; + + fpair3 = dC*wikS*wkmS*dwmjS / rmjS; + fj[0] = deljmS[0]*fpair3; + fj[1] = deljmS[1]*fpair3; + fj[2] = deljmS[2]*fpair3; + + fm[0] = -delkmS[0]*fpair2 - fj[0]; + fm[1] = -delkmS[1]*fpair2 - fj[1]; + fm[2] = -delkmS[2]*fpair2 - fj[2]; + + f[atomi][0] += fi[0]; + f[atomi][1] += fi[1]; + f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; + f[atomj][1] += fj[1]; + f[atomj][2] += fj[2]; + f[atomk][0] += fk[0]; + f[atomk][1] += fk[1]; + f[atomk][2] += fk[2]; + f[atomm][0] += fm[0]; + f[atomm][1] += fm[1]; + f[atomm][2] += fm[2]; + + if (vflag_atom) { + delimS[0] = delikS[0] + delkmS[0]; + delimS[1] = delikS[1] + delkmS[1]; + delimS[2] = delikS[2] + delkmS[2]; + v_tally4_thr(atomi,atomj,atomk,atomm,fi,fj,fk,delimS,deljmS,delkmS,thr); + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + torsional forces and energy +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::TORSION_thr(int ifrom, int ito, + int evflag, int eflag, ThrData * const thr) +{ + int i,j,k,l,ii,itag,jtag; + double evdwl,fpair,xtmp,ytmp,ztmp; + double cos321; + double w21,dw21,cos234,w34,dw34; + double cross321[3],cross321mag,cross234[3],cross234mag; + double w23,dw23,cw2,ekijl,Ec; + double cw,cwnum,cwnom; + double rij,rij2,rik,rjl,tspjik,dtsjik,tspijl,dtsijl,costmp,fcpc; + double sin321,sin234,rjk2,rik2,ril2,rjl2; + double rjk,ril; + double Vtors; + double dndij[3],tmpvec[3],dndik[3],dndjl[3]; + double dcidij,dcidik,dcidjk,dcjdji,dcjdjl,dcjdil; + double dsidij,dsidik,dsidjk,dsjdji,dsjdjl,dsjdil; + double dxidij,dxidik,dxidjk,dxjdji,dxjdjl,dxjdil; + double ddndij,ddndik,ddndjk,ddndjl,ddndil,dcwddn,dcwdn,dvpdcw,Ftmp[3]; + double del32[3],rsq,r32,del23[3],del21[3],r21; + double deljk[3],del34[3],delil[3],delkl[3],r23,r34; + double fi[3],fj[3],fk[3],fl[3]; + int itype,jtype,ktype,ltype,kk,ll,jj; + int *ilist,*REBO_neighs_i,*REBO_neighs_j; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int *tag = atom->tag; + + ilist = list->ilist; + + for (ii = ifrom; ii < ito; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + if (itype != 0) continue; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + REBO_neighs_i = REBO_firstneigh[i]; + + for (jj = 0; jj < REBO_numneigh[i]; jj++) { + j = REBO_neighs_i[jj]; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + if (jtype != 0) continue; + + del32[0] = x[j][0]-x[i][0]; + del32[1] = x[j][1]-x[i][1]; + del32[2] = x[j][2]-x[i][2]; + rsq = del32[0]*del32[0] + del32[1]*del32[1] + del32[2]*del32[2]; + r32 = sqrt(rsq); + del23[0] = -del32[0]; + del23[1] = -del32[1]; + del23[2] = -del32[2]; + r23 = r32; + w23 = Sp(r23,rcmin[itype][jtype],rcmax[itype][jtype],dw23); + + for (kk = 0; kk < REBO_numneigh[i]; kk++) { + k = REBO_neighs_i[kk]; + ktype = map[type[k]]; + if (k == j) continue; + del21[0] = x[i][0]-x[k][0]; + del21[1] = x[i][1]-x[k][1]; + del21[2] = x[i][2]-x[k][2]; + rsq = del21[0]*del21[0] + del21[1]*del21[1] + del21[2]*del21[2]; + r21 = sqrt(rsq); + cos321 = - ((del21[0]*del32[0]) + (del21[1]*del32[1]) + + (del21[2]*del32[2])) / (r21*r32); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + sin321 = sqrt(1.0 - cos321*cos321); + if (sin321 < TOL) continue; + + deljk[0] = del21[0]-del23[0]; + deljk[1] = del21[1]-del23[1]; + deljk[2] = del21[2]-del23[2]; + rjk2 = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2]; + rjk=sqrt(rjk2); + rik2 = r21*r21; + w21 = Sp(r21,rcmin[itype][ktype],rcmax[itype][ktype],dw21); + + rij = r32; + rik = r21; + rij2 = r32*r32; + rik2 = r21*r21; + costmp = 0.5*(rij2+rik2-rjk2)/rij/rik; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + dtsjik = -dtsjik; + + REBO_neighs_j = REBO_firstneigh[j]; + for (ll = 0; ll < REBO_numneigh[j]; ll++) { + l = REBO_neighs_j[ll]; + ltype = map[type[l]]; + if (l == i || l == k) continue; + del34[0] = x[j][0]-x[l][0]; + del34[1] = x[j][1]-x[l][1]; + del34[2] = x[j][2]-x[l][2]; + rsq = del34[0]*del34[0] + del34[1]*del34[1] + del34[2]*del34[2]; + r34 = sqrt(rsq); + cos234 = (del32[0]*del34[0] + del32[1]*del34[1] + + del32[2]*del34[2]) / (r32*r34); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + sin234 = sqrt(1.0 - cos234*cos234); + if (sin234 < TOL) continue; + w34 = Sp(r34,rcmin[jtype][ltype],rcmax[jtype][ltype],dw34); + delil[0] = del23[0] + del34[0]; + delil[1] = del23[1] + del34[1]; + delil[2] = del23[2] + del34[2]; + ril2 = delil[0]*delil[0] + delil[1]*delil[1] + delil[2]*delil[2]; + ril=sqrt(ril2); + rjl2 = r34*r34; + + rjl = r34; + rjl2 = r34*r34; + costmp = 0.5*(rij2+rjl2-ril2)/rij/rjl; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + dtsijl = -dtsijl; //need minus sign + cross321[0] = (del32[1]*del21[2])-(del32[2]*del21[1]); + cross321[1] = (del32[2]*del21[0])-(del32[0]*del21[2]); + cross321[2] = (del32[0]*del21[1])-(del32[1]*del21[0]); + cross321mag = sqrt(cross321[0]*cross321[0]+ + cross321[1]*cross321[1]+ + cross321[2]*cross321[2]); + cross234[0] = (del23[1]*del34[2])-(del23[2]*del34[1]); + cross234[1] = (del23[2]*del34[0])-(del23[0]*del34[2]); + cross234[2] = (del23[0]*del34[1])-(del23[1]*del34[0]); + cross234mag = sqrt(cross234[0]*cross234[0]+ + cross234[1]*cross234[1]+ + cross234[2]*cross234[2]); + cwnum = (cross321[0]*cross234[0]) + + (cross321[1]*cross234[1])+(cross321[2]*cross234[2]); + cwnom = r21*r34*r32*r32*sin321*sin234; + cw = cwnum/cwnom; + + cw2 = (.5*(1.0-cw)); + ekijl = epsilonT[ktype][ltype]; + Ec = 256.0*ekijl/405.0; + Vtors = (Ec*(pow(cw2,5.0)))-(ekijl/10.0); + + if (eflag) evdwl = Vtors*w21*w23*w34*(1.0-tspjik)*(1.0-tspijl); + + dndij[0] = (cross234[1]*del21[2])-(cross234[2]*del21[1]); + dndij[1] = (cross234[2]*del21[0])-(cross234[0]*del21[2]); + dndij[2] = (cross234[0]*del21[1])-(cross234[1]*del21[0]); + + tmpvec[0] = (del34[1]*cross321[2])-(del34[2]*cross321[1]); + tmpvec[1] = (del34[2]*cross321[0])-(del34[0]*cross321[2]); + tmpvec[2] = (del34[0]*cross321[1])-(del34[1]*cross321[0]); + + dndij[0] = dndij[0]+tmpvec[0]; + dndij[1] = dndij[1]+tmpvec[1]; + dndij[2] = dndij[2]+tmpvec[2]; + + dndik[0] = (del23[1]*cross234[2])-(del23[2]*cross234[1]); + dndik[1] = (del23[2]*cross234[0])-(del23[0]*cross234[2]); + dndik[2] = (del23[0]*cross234[1])-(del23[1]*cross234[0]); + + dndjl[0] = (cross321[1]*del23[2])-(cross321[2]*del23[1]); + dndjl[1] = (cross321[2]*del23[0])-(cross321[0]*del23[2]); + dndjl[2] = (cross321[0]*del23[1])-(cross321[1]*del23[0]); + + dcidij = ((r23*r23)-(r21*r21)+(rjk*rjk))/(2.0*r23*r23*r21); + dcidik = ((r21*r21)-(r23*r23)+(rjk*rjk))/(2.0*r23*r21*r21); + dcidjk = (-rjk)/(r23*r21); + dcjdji = ((r23*r23)-(r34*r34)+(ril*ril))/(2.0*r23*r23*r34); + dcjdjl = ((r34*r34)-(r23*r23)+(ril*ril))/(2.0*r23*r34*r34); + dcjdil = (-ril)/(r23*r34); + + dsidij = (-cos321/sin321)*dcidij; + dsidik = (-cos321/sin321)*dcidik; + dsidjk = (-cos321/sin321)*dcidjk; + + dsjdji = (-cos234/sin234)*dcjdji; + dsjdjl = (-cos234/sin234)*dcjdjl; + dsjdil = (-cos234/sin234)*dcjdil; + + dxidij = (r21*sin321)+(r23*r21*dsidij); + dxidik = (r23*sin321)+(r23*r21*dsidik); + dxidjk = (r23*r21*dsidjk); + + dxjdji = (r34*sin234)+(r23*r34*dsjdji); + dxjdjl = (r23*sin234)+(r23*r34*dsjdjl); + dxjdil = (r23*r34*dsjdil); + + ddndij = (dxidij*cross234mag)+(cross321mag*dxjdji); + ddndik = dxidik*cross234mag; + ddndjk = dxidjk*cross234mag; + ddndjl = cross321mag*dxjdjl; + ddndil = cross321mag*dxjdil; + dcwddn = -cwnum/(cwnom*cwnom); + dcwdn = 1.0/cwnom; + dvpdcw = (-1.0)*Ec*(-.5)*5.0*pow(cw2,4.0) * + w23*w21*w34*(1.0-tspjik)*(1.0-tspijl); + + Ftmp[0] = dvpdcw*((dcwdn*dndij[0])+(dcwddn*ddndij*del23[0]/r23)); + Ftmp[1] = dvpdcw*((dcwdn*dndij[1])+(dcwddn*ddndij*del23[1]/r23)); + Ftmp[2] = dvpdcw*((dcwdn*dndij[2])+(dcwddn*ddndij*del23[2]/r23)); + fi[0] = Ftmp[0]; + fi[1] = Ftmp[1]; + fi[2] = Ftmp[2]; + fj[0] = -Ftmp[0]; + fj[1] = -Ftmp[1]; + fj[2] = -Ftmp[2]; + + Ftmp[0] = dvpdcw*((dcwdn*dndik[0])+(dcwddn*ddndik*del21[0]/r21)); + Ftmp[1] = dvpdcw*((dcwdn*dndik[1])+(dcwddn*ddndik*del21[1]/r21)); + Ftmp[2] = dvpdcw*((dcwdn*dndik[2])+(dcwddn*ddndik*del21[2]/r21)); + fi[0] += Ftmp[0]; + fi[1] += Ftmp[1]; + fi[2] += Ftmp[2]; + fk[0] = -Ftmp[0]; + fk[1] = -Ftmp[1]; + fk[2] = -Ftmp[2]; + + Ftmp[0] = (dvpdcw*dcwddn*ddndjk*deljk[0])/rjk; + Ftmp[1] = (dvpdcw*dcwddn*ddndjk*deljk[1])/rjk; + Ftmp[2] = (dvpdcw*dcwddn*ddndjk*deljk[2])/rjk; + fj[0] += Ftmp[0]; + fj[1] += Ftmp[1]; + fj[2] += Ftmp[2]; + fk[0] -= Ftmp[0]; + fk[1] -= Ftmp[1]; + fk[2] -= Ftmp[2]; + + Ftmp[0] = dvpdcw*((dcwdn*dndjl[0])+(dcwddn*ddndjl*del34[0]/r34)); + Ftmp[1] = dvpdcw*((dcwdn*dndjl[1])+(dcwddn*ddndjl*del34[1]/r34)); + Ftmp[2] = dvpdcw*((dcwdn*dndjl[2])+(dcwddn*ddndjl*del34[2]/r34)); + fj[0] += Ftmp[0]; + fj[1] += Ftmp[1]; + fj[2] += Ftmp[2]; + fl[0] = -Ftmp[0]; + fl[1] = -Ftmp[1]; + fl[2] = -Ftmp[2]; + + Ftmp[0] = (dvpdcw*dcwddn*ddndil*delil[0])/ril; + Ftmp[1] = (dvpdcw*dcwddn*ddndil*delil[1])/ril; + Ftmp[2] = (dvpdcw*dcwddn*ddndil*delil[2])/ril; + fi[0] += Ftmp[0]; + fi[1] += Ftmp[1]; + fi[2] += Ftmp[2]; + fl[0] -= Ftmp[0]; + fl[1] -= Ftmp[1]; + fl[2] -= Ftmp[2]; + + // coordination forces + + fpair = Vtors*dw21*w23*w34*(1.0-tspjik)*(1.0-tspijl) / r21; + fi[0] -= del21[0]*fpair; + fi[1] -= del21[1]*fpair; + fi[2] -= del21[2]*fpair; + fk[0] += del21[0]*fpair; + fk[1] += del21[1]*fpair; + fk[2] += del21[2]*fpair; + + fpair = Vtors*w21*dw23*w34*(1.0-tspjik)*(1.0-tspijl) / r23; + fi[0] -= del23[0]*fpair; + fi[1] -= del23[1]*fpair; + fi[2] -= del23[2]*fpair; + fj[0] += del23[0]*fpair; + fj[1] += del23[1]*fpair; + fj[2] += del23[2]*fpair; + + fpair = Vtors*w21*w23*dw34*(1.0-tspjik)*(1.0-tspijl) / r34; + fj[0] -= del34[0]*fpair; + fj[1] -= del34[1]*fpair; + fj[2] -= del34[2]*fpair; + fl[0] += del34[0]*fpair; + fl[1] += del34[1]*fpair; + fl[2] += del34[2]*fpair; + + // additional cut off function forces + + fcpc = -Vtors*w21*w23*w34*dtsjik*(1.0-tspijl); + fpair = fcpc*dcidij/rij; + fi[0] += fpair*del23[0]; + fi[1] += fpair*del23[1]; + fi[2] += fpair*del23[2]; + fj[0] -= fpair*del23[0]; + fj[1] -= fpair*del23[1]; + fj[2] -= fpair*del23[2]; + + fpair = fcpc*dcidik/rik; + fi[0] += fpair*del21[0]; + fi[1] += fpair*del21[1]; + fi[2] += fpair*del21[2]; + fk[0] -= fpair*del21[0]; + fk[1] -= fpair*del21[1]; + fk[2] -= fpair*del21[2]; + + fpair = fcpc*dcidjk/rjk; + fj[0] += fpair*deljk[0]; + fj[1] += fpair*deljk[1]; + fj[2] += fpair*deljk[2]; + fk[0] -= fpair*deljk[0]; + fk[1] -= fpair*deljk[1]; + fk[2] -= fpair*deljk[2]; + + fcpc = -Vtors*w21*w23*w34*(1.0-tspjik)*dtsijl; + fpair = fcpc*dcjdji/rij; + fi[0] += fpair*del23[0]; + fi[1] += fpair*del23[1]; + fi[2] += fpair*del23[2]; + fj[0] -= fpair*del23[0]; + fj[1] -= fpair*del23[1]; + fj[2] -= fpair*del23[2]; + + fpair = fcpc*dcjdjl/rjl; + fj[0] += fpair*del34[0]; + fj[1] += fpair*del34[1]; + fj[2] += fpair*del34[2]; + fl[0] -= fpair*del34[0]; + fl[1] -= fpair*del34[1]; + fl[2] -= fpair*del34[2]; + + fpair = fcpc*dcjdil/ril; + fi[0] += fpair*delil[0]; + fi[1] += fpair*delil[1]; + fi[2] += fpair*delil[2]; + fl[0] -= fpair*delil[0]; + fl[1] -= fpair*delil[1]; + fl[2] -= fpair*delil[2]; + + // sum per-atom forces into atom force array + + f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2]; + f[j][0] += fj[0]; f[j][1] += fj[1]; f[j][2] += fj[2]; + f[k][0] += fk[0]; f[k][1] += fk[1]; f[k][2] += fk[2]; + f[l][0] += fl[0]; f[l][1] += fl[1]; f[l][2] += fl[2]; + + if (evflag) { + delkl[0] = delil[0] - del21[0]; + delkl[1] = delil[1] - del21[1]; + delkl[2] = delil[2] - del21[2]; + ev_tally4_thr(this,i,j,k,l,evdwl,fi,fj,fk,delil,del34,delkl,thr); + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create REBO neighbor list from main neighbor list + REBO neighbor list stores neighbors of ghost atoms +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::REBO_neigh_thr() +{ + const int nlocal = atom->nlocal; + const int nthreads = comm->nthreads; + + if (atom->nmax > maxlocal) { + maxlocal = atom->nmax; + memory->destroy(REBO_numneigh); + memory->sfree(REBO_firstneigh); + memory->destroy(nC); + memory->destroy(nH); + memory->create(REBO_numneigh,maxlocal,"AIREBO:numneigh"); + REBO_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *), + "AIREBO:firstneigh"); + memory->create(nC,maxlocal,"AIREBO:nC"); + memory->create(nH,maxlocal,"AIREBO:nH"); + } + + if (nthreads > maxpage) + add_pages(nthreads - maxpage); + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int i,j,ii,jj,n,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS; + int *ilist,*jlist,*numneigh,**firstneigh; + int *neighptr; + + double **x = atom->x; + int *type = atom->type; + + const int allnum = list->inum + list->gnum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + const int iidelta = 1 + allnum/nthreads; + const int iifrom = tid*iidelta; + const int iito = ((iifrom+iidelta)>allnum) ? allnum : (iifrom+iidelta); + + // store all REBO neighs of owned and ghost atoms + // scan full neighbor list of I + + int npage = tid; + int npnt = 0; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= maxpage) add_pages(nthreads); + } + neighptr = &(pages[npage][npnt]); + n = 0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = map[type[i]]; + nC[i] = nH[i] = 0.0; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < rcmaxsq[itype][jtype]) { + neighptr[n++] = j; + if (jtype == 0) + nC[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS); + else + nH[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS); + } + } + + REBO_firstneigh[i] = neighptr; + REBO_numneigh[i] = n; + npnt += n; + + if (npnt >= pgsize) + error->one(FLERR,"REBO list overflow, boost neigh_modify one or page"); + } + } +} + + +/* ---------------------------------------------------------------------- */ + +double PairAIREBOOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairAIREBO::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_airebo_omp.h b/src/USER-OMP/pair_airebo_omp.h new file mode 100644 index 0000000000..03bc5e5f8c --- /dev/null +++ b/src/USER-OMP/pair_airebo_omp.h @@ -0,0 +1,53 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(airebo/omp,PairAIREBOOMP) + +#else + +#ifndef LMP_PAIR_AIREBO_OMP_H +#define LMP_PAIR_AIREBO_OMP_H + +#include "pair_airebo.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairAIREBOOMP : public PairAIREBO, public ThrOMP { + + public: + PairAIREBOOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + double bondorder_thr(int i, int j, double rij[3], double rijmag, + double VA, int vflag_atom, ThrData * const thr); + double bondorderLJ_thr(int i, int j, double rij[3], double rijmag, + double VA, double rij0[3], double rijmag0, + int vflag_atom, ThrData * const thr); + + void FREBO_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr); + void FLJ_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr); + void TORSION_thr(int ifrom, int ito, int evflag, int eflag, ThrData * const thr); + void REBO_neigh_thr(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_beck_omp.cpp b/src/USER-OMP/pair_beck_omp.cpp new file mode 100644 index 0000000000..9bec6c0db2 --- /dev/null +++ b/src/USER-OMP/pair_beck_omp.cpp @@ -0,0 +1,173 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_beck_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBeckOMP::PairBeckOMP(LAMMPS *lmp) : + PairBeck(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBeckOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairBeckOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r5,force_beck,factor_lj; + double r,rinv; + double aaij,alphaij,betaij; + double term1,term1inv,term2,term3,term4,term5,term6; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + r5 = rsq*rsq*r; + aaij = aa[itype][jtype]; + alphaij = alpha[itype][jtype]; + betaij = beta[itype][jtype]; + term1 = aaij*aaij + rsq; + term2 = 1.0/pow(term1,5.0); + term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq; + term4 = alphaij + r5*betaij; + term5 = alphaij + 6.0*r5*betaij; + rinv = 1.0/r; + force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5; + force_beck -= BB[itype][jtype]*r*term2*term3; + + fpair = factor_lj*force_beck*rinv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + term6 = 1.0/pow(term1,3.0); + term1inv = 1.0/term1; + evdwl = AA[itype][jtype]*exp(-1.0*r*term4); + evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv); + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBeckOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBeck::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_beck_omp.h b/src/USER-OMP/pair_beck_omp.h new file mode 100644 index 0000000000..18a4401bbc --- /dev/null +++ b/src/USER-OMP/pair_beck_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(beck/omp,PairBeckOMP) + +#else + +#ifndef LMP_PAIR_BECK_OMP_H +#define LMP_PAIR_BECK_OMP_H + +#include "pair_beck.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBeckOMP : public PairBeck, public ThrOMP { + + public: + PairBeckOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_born_coul_long_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp new file mode 100644 index 0000000000..f627f19bac --- /dev/null +++ b/src/USER-OMP/pair_born_coul_long_omp.cpp @@ -0,0 +1,197 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_born_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) : + PairBornCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBornCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBornCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,r,rexp,forcecoul,forceborn,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (rsq < cut_coulsq) { + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + } else forceborn = 0.0; + + fpair = (forcecoul + factor_lj*forceborn)*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBornCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBornCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h new file mode 100644 index 0000000000..bdf81e514e --- /dev/null +++ b/src/USER-OMP/pair_born_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/coul/long/omp,PairBornCoulLongOMP) + +#else + +#ifndef LMP_PAIR_BORN_COUL_LONG_OMP_H +#define LMP_PAIR_BORN_COUL_LONG_OMP_H + +#include "pair_born_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP { + + public: + PairBornCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_born_coul_wolf_omp.cpp b/src/USER-OMP/pair_born_coul_wolf_omp.cpp new file mode 100644 index 0000000000..35e0ac7443 --- /dev/null +++ b/src/USER-OMP/pair_born_coul_wolf_omp.cpp @@ -0,0 +1,204 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_born_coul_wolf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairBornCoulWolfOMP::PairBornCoulWolfOMP(LAMMPS *lmp) : + PairBornCoulWolf(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBornCoulWolfOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBornCoulWolfOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj; + double prefactor; + double r,rexp; + int *ilist,*jlist,*numneigh,**firstneigh; + double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + // self and shifted coulombic energy + + e_self = v_sh = 0.0; + e_shift = erfc(alf*cut_coul)/cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + qisq = qtmp*qtmp; + e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e; + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (rsq < cut_coulsq) { + prefactor = qqrd2e*qtmp*q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + v_sh = (erfcc - e_shift*r) * prefactor; + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + } else forceborn = 0.0; + + fpair = (forcecoul + factor_lj*forceborn)*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = v_sh; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBornCoulWolfOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBornCoulWolf::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_born_coul_wolf_omp.h b/src/USER-OMP/pair_born_coul_wolf_omp.h new file mode 100644 index 0000000000..d714b32d29 --- /dev/null +++ b/src/USER-OMP/pair_born_coul_wolf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/coul/wolf/omp,PairBornCoulWolfOMP) + +#else + +#ifndef LMP_PAIR_BORN_COUL_WOLF_OMP_H +#define LMP_PAIR_BORN_COUL_WOLF_OMP_H + +#include "pair_born_coul_wolf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBornCoulWolfOMP : public PairBornCoulWolf, public ThrOMP { + + public: + PairBornCoulWolfOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_born_omp.cpp b/src/USER-OMP/pair_born_omp.cpp new file mode 100644 index 0000000000..847efaa013 --- /dev/null +++ b/src/USER-OMP/pair_born_omp.cpp @@ -0,0 +1,161 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_born_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBornOMP::PairBornOMP(LAMMPS *lmp) : + PairBorn(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBornOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairBornOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,r,rexp,forceborn,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]); + forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + + born3[itype][jtype]*r2inv*r6inv; + fpair = factor_lj*forceborn*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBornOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBorn::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h new file mode 100644 index 0000000000..db3c768f7a --- /dev/null +++ b/src/USER-OMP/pair_born_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(born/omp,PairBornOMP) + +#else + +#ifndef LMP_PAIR_BORN_OMP_H +#define LMP_PAIR_BORN_OMP_H + +#include "pair_born.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBornOMP : public PairBorn, public ThrOMP { + + public: + PairBornOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_brownian_omp.cpp b/src/USER-OMP/pair_brownian_omp.cpp new file mode 100644 index 0000000000..561c82abf5 --- /dev/null +++ b/src/USER-OMP/pair_brownian_omp.cpp @@ -0,0 +1,398 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_brownian_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "input.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" +#include "variable.h" +#include "random_mars.h" +#include "math_const.h" + +#include "fix_wall.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define EPSILON 1.0e-10 + +// same as fix_wall.cpp + +enum{EDGE,CONSTANT,VARIABLE}; + +/* ---------------------------------------------------------------------- */ + +PairBrownianOMP::PairBrownianOMP(LAMMPS *lmp) : + PairBrownian(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + random_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairBrownianOMP::~PairBrownianOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairBrownianOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // This section of code adjusts R0/RT0/RS0 if necessary due to changes + // in the volume fraction as a result of fix deform or moving walls + + double dims[3], wallcoord; + if (flagVF) // Flag for volume fraction corrections + if (flagdeform || flagwall == 2){ // Possible changes in volume fraction + if (flagdeform && !flagwall) + for (int j = 0; j < 3; j++) + dims[j] = domain->prd[j]; + else if (flagwall == 2 || (flagdeform && flagwall == 1)){ + double wallhi[3], walllo[3]; + for (int j = 0; j < 3; j++){ + wallhi[j] = domain->prd[j]; + walllo[j] = 0; + } + for (int m = 0; m < wallfix->nwall; m++){ + int dim = wallfix->wallwhich[m] / 2; + int side = wallfix->wallwhich[m] % 2; + if (wallfix->wallstyle[m] == VARIABLE){ + wallcoord = input->variable->compute_equal(wallfix->varindex[m]); + } + else wallcoord = wallfix->coord0[m]; + if (side == 0) walllo[dim] = wallcoord; + else wallhi[dim] = wallcoord; + } + for (int j = 0; j < 3; j++) + dims[j] = wallhi[j] - walllo[j]; + } + double vol_T = dims[0]*dims[1]*dims[2]; + double vol_f = vol_P/vol_T; + if (flaglog == 0) { + R0 = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f); + RT0 = 8*MY_PI*mu*pow(rad,3.0); + //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); + } else { + R0 = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); + RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); + //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); + } + } + + + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + // to ensure full compatibility with the serial Brownian style + // we use is random number generator instance for thread 0 + random_thr[0] = random; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + if (flaglog) { + if (evflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr); + else eval<0,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairBrownianOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz; + double rsq,r,h_sep,radi; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + RanMars &rng = *random_thr[thr->get_tid()]; + + double vxmu2f = force->vxmu2f; + double randr; + double prethermostat; + double xl[3],a_sq,a_sh,a_pu,Fbmag; + double p1[3],p2[3],p3[3]; + int overlaps = 0; + + // scale factor for Brownian moments + + prethermostat = sqrt(24.0*force->boltz*t_target/update->dt); + prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e); + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + radi = radius[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // FLD contribution to force and torque due to isotropic terms + + if (flagfld) { + f[i][0] += prethermostat*sqrt(R0)*(rng.uniform()-0.5); + f[i][1] += prethermostat*sqrt(R0)*(rng.uniform()-0.5); + f[i][2] += prethermostat*sqrt(R0)*(rng.uniform()-0.5); + if (FLAGLOG) { + torque[i][0] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5); + torque[i][1] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5); + torque[i][2] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5); + } + } + + if (!flagHI) continue; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + + // scalar resistances a_sq and a_sh + + h_sep = r - 2.0*radi; + + // check for overlaps + + if (h_sep < 0.0) overlaps++; + + // if less than minimum gap, use minimum gap instead + + if (r < cut_inner[itype][jtype]) + h_sep = cut_inner[itype][jtype] - 2.0*radi; + + // scale h_sep by radi + + h_sep = h_sep/radi; + + // scalar resistances + + if (FLAGLOG) { + a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep)); + a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep)); + a_pu = 8.0*MY_PI*mu*pow(radi,3.0)*(3.0/160.0*log(1.0/h_sep)); + } else + a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep); + + // generate the Pairwise Brownian Force: a_sq + + Fbmag = prethermostat*sqrt(a_sq); + + // generate a random number + + randr = rng.uniform()-0.5; + + // contribution due to Brownian motion + + fx = Fbmag*randr*delx/r; + fy = Fbmag*randr*dely/r; + fz = Fbmag*randr*delz/r; + + // add terms due to a_sh + + if (FLAGLOG) { + + // generate two orthogonal vectors to the line of centers + + p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r; + set_3_orthogonal_vectors(p1,p2,p3); + + // magnitude + + Fbmag = prethermostat*sqrt(a_sh); + + // force in each of the two directions + + randr = rng.uniform()-0.5; + fx += Fbmag*randr*p2[0]; + fy += Fbmag*randr*p2[1]; + fz += Fbmag*randr*p2[2]; + + randr = rng.uniform()-0.5; + fx += Fbmag*randr*p3[0]; + fy += Fbmag*randr*p3[1]; + fz += Fbmag*randr*p3[2]; + } + + // scale forces to appropriate units + + fx = vxmu2f*fx; + fy = vxmu2f*fy; + fz = vxmu2f*fz; + + // sum to total force + + f[i][0] -= fx; + f[i][1] -= fy; + f[i][2] -= fz; + + if (NEWTON_PAIR || j < nlocal) { + //randr = rng.uniform()-0.5; + //fx = Fbmag*randr*delx/r; + //fy = Fbmag*randr*dely/r; + //fz = Fbmag*randr*delz/r; + + f[j][0] += fx; + f[j][1] += fy; + f[j][2] += fz; + } + + // torque due to the Brownian Force + + if (FLAGLOG) { + + // location of the point of closest approach on I from its center + + xl[0] = -delx/r*radi; + xl[1] = -dely/r*radi; + xl[2] = -delz/r*radi; + + // torque = xl_cross_F + + tx = xl[1]*fz - xl[2]*fy; + ty = xl[2]*fx - xl[0]*fz; + tz = xl[0]*fy - xl[1]*fx; + + // torque is same on both particles + + torque[i][0] -= tx; + torque[i][1] -= ty; + torque[i][2] -= tz; + + if (NEWTON_PAIR || j < nlocal) { + torque[j][0] -= tx; + torque[j][1] -= ty; + torque[j][2] -= tz; + } + + // torque due to a_pu + + Fbmag = prethermostat*sqrt(a_pu); + + // force in each direction + + randr = rng.uniform()-0.5; + tx = Fbmag*randr*p2[0]; + ty = Fbmag*randr*p2[1]; + tz = Fbmag*randr*p2[2]; + + randr = rng.uniform()-0.5; + tx += Fbmag*randr*p3[0]; + ty += Fbmag*randr*p3[1]; + tz += Fbmag*randr*p3[2]; + + // torque has opposite sign on two particles + + torque[i][0] -= tx; + torque[i][1] -= ty; + torque[i][2] -= tz; + + if (NEWTON_PAIR || j < nlocal) { + torque[j][0] += tx; + torque[j][1] += ty; + torque[j][2] += tz; + } + } + + if (EVFLAG) ev_tally_xyz(i,j,nlocal,NEWTON_PAIR, + 0.0,0.0,-fx,-fy,-fz,delx,dely,delz); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBrownianOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBrownian::memory_usage(); + bytes += comm->nthreads * sizeof(RanMars*); + bytes += comm->nthreads * sizeof(RanMars); + + return bytes; +} diff --git a/src/USER-OMP/pair_brownian_omp.h b/src/USER-OMP/pair_brownian_omp.h new file mode 100644 index 0000000000..cdabe08ba1 --- /dev/null +++ b/src/USER-OMP/pair_brownian_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(brownian/omp,PairBrownianOMP) + +#else + +#ifndef LMP_PAIR_BROWNIAN_OMP_H +#define LMP_PAIR_BROWNIAN_OMP_H + +#include "pair_brownian.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBrownianOMP : public PairBrownian, public ThrOMP { + + public: + PairBrownianOMP(class LAMMPS *); + virtual ~PairBrownianOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_brownian_poly_omp.cpp b/src/USER-OMP/pair_brownian_poly_omp.cpp new file mode 100644 index 0000000000..6affbf96cf --- /dev/null +++ b/src/USER-OMP/pair_brownian_poly_omp.cpp @@ -0,0 +1,387 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_brownian_poly_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "input.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" +#include "variable.h" +#include "random_mars.h" +#include "fix_wall.h" + +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define EPSILON 1.0e-10 + +// same as fix_wall.cpp + +enum{EDGE,CONSTANT,VARIABLE}; + +/* ---------------------------------------------------------------------- */ + +PairBrownianPolyOMP::PairBrownianPolyOMP(LAMMPS *lmp) : + PairBrownianPoly(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + random_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairBrownianPolyOMP::~PairBrownianPolyOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairBrownianPolyOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // This section of code adjusts R0/RT0/RS0 if necessary due to changes + // in the volume fraction as a result of fix deform or moving walls + + double dims[3], wallcoord; + if (flagVF) // Flag for volume fraction corrections + if (flagdeform || flagwall == 2){ // Possible changes in volume fraction + if (flagdeform && !flagwall) + for (int j = 0; j < 3; j++) + dims[j] = domain->prd[j]; + else if (flagwall == 2 || (flagdeform && flagwall == 1)){ + double wallhi[3], walllo[3]; + for (int j = 0; j < 3; j++){ + wallhi[j] = domain->prd[j]; + walllo[j] = 0; + } + for (int m = 0; m < wallfix->nwall; m++){ + int dim = wallfix->wallwhich[m] / 2; + int side = wallfix->wallwhich[m] % 2; + if (wallfix->wallstyle[m] == VARIABLE){ + wallcoord = input->variable->compute_equal(wallfix->varindex[m]); + } + else wallcoord = wallfix->coord0[m]; + if (side == 0) walllo[dim] = wallcoord; + else wallhi[dim] = wallcoord; + } + for (int j = 0; j < 3; j++) + dims[j] = wallhi[j] - walllo[j]; + } + double vol_T = dims[0]*dims[1]*dims[2]; + double vol_f = vol_P/vol_T; + if (flaglog == 0) { + R0 = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f); + RT0 = 8*MY_PI*mu*pow(rad,3.0); + //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); + } else { + R0 = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); + RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); + //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); + } + } + + + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + // to ensure full compatibility with the serial BrownianPoly style + // we use is random number generator instance for thread 0 + random_thr[0] = random; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + if (flaglog) { + if (evflag) + eval<1,1>(ifrom, ito, thr); + else + eval<1,0>(ifrom, ito, thr); + } else { + if (evflag) + eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairBrownianPolyOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz; + double rsq,r,h_sep,beta0,beta1,radi,radj; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + RanMars &rng = *random_thr[thr->get_tid()]; + + double vxmu2f = force->vxmu2f; + int overlaps = 0; + double randr; + double prethermostat; + double xl[3],a_sq,a_sh,a_pu,Fbmag; + double p1[3],p2[3],p3[3]; + + // scale factor for Brownian moments + + prethermostat = sqrt(24.0*force->boltz*t_target/update->dt); + prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e); + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + radi = radius[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // FLD contribution to force and torque due to isotropic terms + + if (flagfld) { + f[i][0] += prethermostat*sqrt(R0*radi)*(rng.uniform()-0.5); + f[i][1] += prethermostat*sqrt(R0*radi)*(rng.uniform()-0.5); + f[i][2] += prethermostat*sqrt(R0*radi)*(rng.uniform()-0.5); + if (FLAGLOG) { + const double rad3 = radi*radi*radi; + torque[i][0] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5); + torque[i][1] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5); + torque[i][2] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5); + } + } + + if (!flagHI) continue; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + radj = radius[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + + // scalar resistances a_sq and a_sh + + h_sep = r - radi-radj; + + // check for overlaps + + if (h_sep < 0.0) overlaps++; + + // if less than minimum gap, use minimum gap instead + + if (r < cut_inner[itype][jtype]) + h_sep = cut_inner[itype][jtype] - radi-radj; + + // scale h_sep by radi + + h_sep = h_sep/radi; + beta0 = radj/radi; + beta1 = 1.0 + beta0; + + // scalar resistances + + if (FLAGLOG) { + a_sq = beta0*beta0/beta1/beta1/h_sep + + (1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3.0)*log(1.0/h_sep); + a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*pow(beta0,3.0) + + pow(beta0,4.0))/21.0/pow(beta1,4.0)*h_sep*log(1.0/h_sep); + a_sq *= 6.0*MY_PI*mu*radi; + a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3.0) * + log(1.0/h_sep); + a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3.0) + + 16.0*pow(beta0,4.0))/375.0/pow(beta1,4.0) * + h_sep*log(1.0/h_sep); + a_sh *= 6.0*MY_PI*mu*radi; + a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep); + a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 * + pow(beta0,3.0))/250.0/pow(beta1,3.0)*h_sep*log(1.0/h_sep); + a_pu *= 8.0*MY_PI*mu*pow(radi,3.0); + + } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep); + + // generate the Pairwise Brownian Force: a_sq + + Fbmag = prethermostat*sqrt(a_sq); + + // generate a random number + + randr = rng.uniform()-0.5; + + // contribution due to Brownian motion + + fx = Fbmag*randr*delx/r; + fy = Fbmag*randr*dely/r; + fz = Fbmag*randr*delz/r; + + // add terms due to a_sh + + if (FLAGLOG) { + + // generate two orthogonal vectors to the line of centers + + p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r; + set_3_orthogonal_vectors(p1,p2,p3); + + // magnitude + + Fbmag = prethermostat*sqrt(a_sh); + + // force in each of the two directions + + randr = rng.uniform()-0.5; + fx += Fbmag*randr*p2[0]; + fy += Fbmag*randr*p2[1]; + fz += Fbmag*randr*p2[2]; + + randr = rng.uniform()-0.5; + fx += Fbmag*randr*p3[0]; + fy += Fbmag*randr*p3[1]; + fz += Fbmag*randr*p3[2]; + } + + // scale forces to appropriate units + + fx = vxmu2f*fx; + fy = vxmu2f*fy; + fz = vxmu2f*fz; + + // sum to total force + + f[i][0] -= fx; + f[i][1] -= fy; + f[i][2] -= fz; + + // torque due to the Brownian Force + + if (FLAGLOG) { + + // location of the point of closest approach on I from its center + + xl[0] = -delx/r*radi; + xl[1] = -dely/r*radi; + xl[2] = -delz/r*radi; + + // torque = xl_cross_F + + tx = xl[1]*fz - xl[2]*fy; + ty = xl[2]*fx - xl[0]*fz; + tz = xl[0]*fy - xl[1]*fx; + + // torque is same on both particles + + torque[i][0] -= tx; + torque[i][1] -= ty; + torque[i][2] -= tz; + + // torque due to a_pu + + Fbmag = prethermostat*sqrt(a_pu); + + // force in each direction + + randr = rng.uniform()-0.5; + tx = Fbmag*randr*p2[0]; + ty = Fbmag*randr*p2[1]; + tz = Fbmag*randr*p2[2]; + + randr = rng.uniform()-0.5; + tx += Fbmag*randr*p3[0]; + ty += Fbmag*randr*p3[1]; + tz += Fbmag*randr*p3[2]; + + // torque has opposite sign on two particles + + torque[i][0] -= tx; + torque[i][1] -= ty; + torque[i][2] -= tz; + + } + + // set j = nlocal so that only I gets tallied + + if (EVFLAG) ev_tally_xyz(i,nlocal,nlocal,/* newton_pair */ 0, + 0.0,0.0,-fx,-fy,-fz,delx,dely,delz); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBrownianPolyOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBrownianPoly::memory_usage(); + bytes += comm->nthreads * sizeof(RanMars*); + bytes += comm->nthreads * sizeof(RanMars); + + return bytes; +} diff --git a/src/USER-OMP/pair_brownian_poly_omp.h b/src/USER-OMP/pair_brownian_poly_omp.h new file mode 100644 index 0000000000..663696c6c6 --- /dev/null +++ b/src/USER-OMP/pair_brownian_poly_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(brownian/poly/omp,PairBrownianPolyOMP) + +#else + +#ifndef LMP_PAIR_BROWNIAN_POLY_OMP_H +#define LMP_PAIR_BROWNIAN_POLY_OMP_H + +#include "pair_brownian_poly.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBrownianPolyOMP : public PairBrownianPoly, public ThrOMP { + + public: + PairBrownianPolyOMP(class LAMMPS *); + virtual ~PairBrownianPolyOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp new file mode 100644 index 0000000000..8b570ff1a0 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp @@ -0,0 +1,179 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_buck_coul_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) : + PairBuckCoulCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBuckCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,r,rexp,forcecoul,forcebuck,factor_coul,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (rsq < cut_coulsq[itype][jtype]) + forcecoul = qqrd2e * qtmp*q[j]/r; + else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + } else forcebuck = 0.0; + + fpair = (forcecoul + factor_lj*forcebuck)*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]/r; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckCoulCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBuckCoulCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h new file mode 100644 index 0000000000..cf142b022a --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/cut/omp,PairBuckCoulCutOMP) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_CUT_OMP_H +#define LMP_PAIR_BUCK_COUL_CUT_OMP_H + +#include "pair_buck_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP { + + public: + PairBuckCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_buck_coul_long_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp new file mode 100644 index 0000000000..85f2b48db9 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp @@ -0,0 +1,197 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_buck_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) : + PairBuckCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + // reduce per thread forces into global force array. + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBuckCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,r,rexp,forcecoul,forcebuck,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (rsq < cut_coulsq) { + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + } else forcebuck = 0.0; + + fpair = (forcecoul + factor_lj*forcebuck)*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBuckCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h new file mode 100644 index 0000000000..1309bc4c4b --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/long/omp,PairBuckCoulLongOMP) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_LONG_OMP_H +#define LMP_PAIR_BUCK_COUL_LONG_OMP_H + +#include "pair_buck_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP { + + public: + PairBuckCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp new file mode 100644 index 0000000000..d1e54f1b11 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_omp.cpp @@ -0,0 +1,229 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_buck_coul_omp.h" +#include "atom.h" +#include "comm.h" +#include "math_vector.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) : + PairBuckCoul(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairBuckCoulOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + double evdwl,ecoul,fpair; + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + const double *x0 = x[0]; + double *f0 = f[0], *fi = f0; + + int *ilist = list->ilist; + + // loop over neighbors of my atoms + + int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); + int *jneigh, *jneighn, typei, typej, ni; + double qi, qri, *cutsqi, *cut_bucksqi, + *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti; + double r, rsq, r2inv, force_coul, force_buck; + double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2; + vector xi, d; + + for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + if (order1) qri = (qi = q[i])*qqrd2e; // initialize constants + offseti = offset[typei = type[i]]; + buck1i = buck1[typei]; buck2i = buck2[typei]; + buckai = buck_a[typei]; buckci = buck_c[typei], rhoinvi = rhoinv[typei]; + cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei]; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + + for (; jneigh= cutsqi[typej = type[j]]) continue; + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (order1 && (rsq < cut_coulsq)) { // coulombic + if (!ncoultablebits || rsq <= tabinnersq) { // series real space + register double x = g_ewald*r; + register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x); + if (ni == 0) { + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s; + if (EFLAG) ecoul = t; + } else { // special case + register double f = s*(1.0-special_coul[ni])/r; + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f; + if (EFLAG) ecoul = t-f; + } // table real space + } else { + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; + register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; + if (ni == 0) { + force_coul = qiqj*(ftable[k]+f*dftable[k]); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]); + } + else { // special case + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); + } + } + } else force_coul = ecoul = 0.0; + + if (rsq < cut_bucksqi[typej]) { // buckingham + register double rn = r2inv*r2inv*r2inv, + expr = exp(-r*rhoinvi[typej]); + if (order6) { // long-range + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*buckci[typej]; + if (ni == 0) { + force_buck = + r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (EFLAG) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2; + } else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_buck = f*r*expr*buck1i[typej]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej]; + if (EFLAG) evdwl = f*expr*buckai[typej] - + g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej]; + } + } else { // cut + if (ni == 0) { + force_buck = r*expr*buck1i[typej]-rn*buck2i[typej]; + if (EFLAG) evdwl = expr*buckai[typej] - + rn*buckci[typej]-offseti[typej]; + } else { // special case + register double f = special_lj[ni]; + force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej]); + if (EFLAG) + evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]); + } + } + } else force_buck = evdwl = 0.0; + + fpair = (force_coul+force_buck)*r2inv; + + if (NEWTON_PAIR || j < nlocal) { + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,d[0],d[1],d[2],thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckCoulOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBuckCoul::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h new file mode 100644 index 0000000000..9fc11818a6 --- /dev/null +++ b/src/USER-OMP/pair_buck_coul_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/omp,PairBuckCoulOMP) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_OMP_H +#define LMP_PAIR_BUCK_COUL_OMP_H + +#include "pair_buck_coul.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP { + + public: + PairBuckCoulOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_buck_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp new file mode 100644 index 0000000000..8b32e2cfa5 --- /dev/null +++ b/src/USER-OMP/pair_buck_omp.cpp @@ -0,0 +1,163 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_buck_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBuckOMP::PairBuckOMP(LAMMPS *lmp) : + PairBuck(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairBuckOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,r,rexp,forcebuck,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + fpair = factor_lj*forcebuck*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBuck::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h new file mode 100644 index 0000000000..72e1178803 --- /dev/null +++ b/src/USER-OMP/pair_buck_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/omp,PairBuckOMP) + +#else + +#ifndef LMP_PAIR_BUCK_OMP_H +#define LMP_PAIR_BUCK_OMP_H + +#include "pair_buck.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBuckOMP : public PairBuck, public ThrOMP { + + public: + PairBuckOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp new file mode 100644 index 0000000000..bec3ce4b25 --- /dev/null +++ b/src/USER-OMP/pair_cdeam_omp.cpp @@ -0,0 +1,531 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_cdeam_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +// This is for debugging purposes. The ASSERT() macro is used in the code to check +// if everything runs as expected. Change this to #if 0 if you don't need the checking. +#if 0 + #define ASSERT(cond) ((!(cond)) ? my_failure(error,__FILE__,__LINE__) : my_noop()) + + inline void my_noop() {} + inline void my_failure(Error* error, const char* file, int line) { + char str[1024]; + sprintf(str,"Assertion failure: File %s, line %i", file, line); + error->one(FLERR,str); + } +#else + #define ASSERT(cond) +#endif + +/* ---------------------------------------------------------------------- */ + +PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) : + PairEAM(lmp), PairCDEAM(lmp,_cdeamVersion), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCDEAMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(rhoB); + memory->destroy(D_values); + memory->destroy(fp); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(rhoB,nthreads*nmax,"pair:mu"); + memory->create(D_values,nthreads*nmax,"pair:D_values"); + memory->create(fp,nmax,"pair:fp"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_cdeam(nall, rho, rhoB, D_values); + else + thr->init_cdeam(atom->nlocal, rho, rhoB, D_values); + + switch (cdeamVersion) { + + case 1: + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr); + else eval<1,1,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr); + else eval<1,0,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr); + else eval<0,0,0,1>(ifrom, ito, thr); + } + break; + + case 2: + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr); + else eval<1,1,0,2>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr); + else eval<1,0,0,2>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr); + else eval<0,0,0,2>(ifrom, ito, thr); + } + break; + + default: +#if defined(_OPENMP) +#pragma omp master +#endif + error->all(FLERR,"unsupported eam/cd pair style variant"); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rhoip,rhojp,recip,phi; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const rhoB_t = thr->get_rhoB(); + double * const D_values_t = thr->get_D_values(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // Stage I + + // Compute rho and rhoB at each local atom site. + // Additionally calculate the D_i values here if we are using the one-site formulation. + // For the two-site formulation we have to calculate the D values in an extra loop (Stage II). + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + jtype = type[j]; + double r = sqrt(rsq); + const EAMTableIndex index = radiusToTableIndex(r); + double localrho = RhoOfR(index, jtype, itype); + rho_t[i] += localrho; + if(jtype == speciesB) rhoB_t[i] += localrho; + if(NEWTON_PAIR || j < nlocal) { + localrho = RhoOfR(index, itype, jtype); + rho_t[j] += localrho; + if(itype == speciesB) rhoB_t[j] += localrho; + } + + if(CDEAMVERSION == 1 && itype != jtype) { + // Note: if the i-j interaction is not concentration dependent (because either + // i or j are not species A or B) then its contribution to D_i and D_j should + // be ignored. + // This if-clause is only required for a ternary. + if((itype == speciesA && jtype == speciesB) + || (jtype == speciesA && itype == speciesB)) { + double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); + D_values_t[i] += Phi_AB; + if(NEWTON_PAIR || j < nlocal) + D_values_t[j] += Phi_AB; + } + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(rho, nall, nthreads, 1, tid); + data_reduce_thr(rhoB, nall, nthreads, 1, tid); + if (CDEAMVERSION==1) + data_reduce_thr(D_values, nall, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 1; + comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + // reduce per thread density + data_reduce_thr(rho, nlocal, nthreads, 1, tid); + data_reduce_thr(rhoB, nlocal, nthreads, 1, tid); + if (CDEAMVERSION==1) + data_reduce_thr(D_values, nlocal, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + EAMTableIndex index = rhoToTableIndex(rho[i]); + fp[i] = FPrimeOfRho(index, type[i]); + if(EFLAG) { + phi = FofRho(index, type[i]); + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); + } + } + + // wait until all theads are done with computation + sync_threads(); + + // Communicate derivative of embedding function and densities + // and D_values (this for one-site formulation only). +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 2; + comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + + // The electron densities may not drop to zero because then the concentration would no longer be defined. + // But the concentration is not needed anyway if there is no interaction with another atom, which is the case + // if the electron density is exactly zero. That's why the following lines have been commented out. + // + //for(i = 0; i < nlocal + atom->nghost; i++) { + // if(rho[i] == 0 && (type[i] == speciesA || type[i] == speciesB)) + // error->one(FLERR,"CD-EAM potential routine: Detected atom with zero electron density."); + //} + + // Stage II + // This is only required for the original two-site formulation of the CD-EAM potential. + + if(CDEAMVERSION == 2) { + // Compute intermediate value D_i for each atom. + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // This code line is required for ternary alloys. + if(itype != speciesA && itype != speciesB) continue; + + double x_i = rhoB[i] / rho[i]; // Concentration at atom i. + + for(jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + if(itype == jtype) continue; + + // This code line is required for ternary alloys. + if(jtype != speciesA && jtype != speciesB) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + double r = sqrt(rsq); + const EAMTableIndex index = radiusToTableIndex(r); + + // The concentration independent part of the cross pair potential. + double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); + + // Average concentration of two sites + double x_ij = 0.5 * (x_i + rhoB[j]/rho[j]); + + // Calculate derivative of h(x_ij) polynomial function. + double h_prime = evalHprime(x_ij); + + D_values_t[i] += h_prime * Phi_AB / (2.0 * rho[i] * rho[i]); + if(NEWTON_PAIR || j < nlocal) + D_values_t[j] += h_prime * Phi_AB / (2.0 * rho[j] * rho[j]); + } + } + } + + if (NEWTON_PAIR) { + data_reduce_thr(D_values, nall, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 3; + comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + data_reduce_thr(D_values, nlocal, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { communicationStage = 4; + comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + } + + // Stage III + + // Compute force acting on each atom. + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // Concentration at site i + double x_i = -1.0; // The value -1 indicates: no concentration dependence for all interactions of atom i. + // It will be replaced by the concentration at site i if atom i is either A or B. + + double D_i, h_prime_i; + + // This if-clause is only required for ternary alloys. + if((itype == speciesA || itype == speciesB) && rho[i] != 0.0) { + + // Compute local concentration at site i. + x_i = rhoB[i]/rho[i]; + ASSERT(x_i >= 0 && x_i<=1.0); + + if(CDEAMVERSION == 1) { + // Calculate derivative of h(x_i) polynomial function. + h_prime_i = evalHprime(x_i); + D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]); + } else if(CDEAMVERSION == 2) { + D_i = D_values[i]; + } else ASSERT(false); + } + + for(jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + jtype = type[j]; + double r = sqrt(rsq); + const EAMTableIndex index = radiusToTableIndex(r); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + rhoip = RhoPrimeOfR(index, itype, jtype); + rhojp = RhoPrimeOfR(index, jtype, itype); + fpair = fp[i]*rhojp + fp[j]*rhoip; + recip = 1.0/r; + + double x_j = -1; // The value -1 indicates: no concentration dependence for this i-j pair + // because atom j is not of species A nor B. + + // This code line is required for ternary alloy. + if(jtype == speciesA || jtype == speciesB) { + ASSERT(rho[i] != 0.0); + ASSERT(rho[j] != 0.0); + + // Compute local concentration at site j. + x_j = rhoB[j]/rho[j]; + ASSERT(x_j >= 0 && x_j<=1.0); + + double D_j; + if(CDEAMVERSION == 1) { + // Calculate derivative of h(x_j) polynomial function. + double h_prime_j = evalHprime(x_j); + D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]); + } else if(CDEAMVERSION == 2) { + D_j = D_values[j]; + } else ASSERT(false); + + double t2 = -rhoB[j]; + if(itype == speciesB) t2 += rho[j]; + fpair += D_j * rhoip * t2; + } + + // This if-clause is only required for a ternary alloy. + // Actually we don't need it at all because D_i should be zero anyway if + // atom i has no concentration dependent interactions (because it is not species A or B). + if(x_i != -1.0) { + double t1 = -rhoB[i]; + if(jtype == speciesB) t1 += rho[i]; + fpair += D_i * rhojp * t1; + } + + double phip; + double phi = PhiOfR(index, itype, jtype, recip, phip); + if(itype == jtype || x_i == -1.0 || x_j == -1.0) { + // Case of no concentration dependence. + fpair += phip; + } else { + // We have a concentration dependence for the i-j interaction. + double h; + if(CDEAMVERSION == 1) { + // Calculate h(x_i) polynomial function. + double h_i = evalH(x_i); + // Calculate h(x_j) polynomial function. + double h_j = evalH(x_j); + h = 0.5 * (h_i + h_j); + } else if(CDEAMVERSION == 2) { + // Average concentration. + double x_ij = 0.5 * (x_i + x_j); + // Calculate h(x_ij) polynomial function. + h = evalH(x_ij); + } else ASSERT(false); + + fpair += h * phip; + phi *= h; + } + + // Divide by r_ij and negate to get forces from gradient. + fpair /= -r; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if(NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if(EFLAG) evdwl = phi; + if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCDEAMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCDEAM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h new file mode 100644 index 0000000000..2839ab0ed0 --- /dev/null +++ b/src/USER-OMP/pair_cdeam_omp.h @@ -0,0 +1,65 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/cd/omp,PairCDEAM_OneSiteOMP) +PairStyle(eam/cd/old/omp,PairCDEAM_TwoSiteOMP) + +#else + +#ifndef LMP_PAIR_CDEAM_OMP_H +#define LMP_PAIR_CDEAM_OMP_H + +#include "pair_cdeam.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCDEAMOMP : public PairCDEAM, public ThrOMP { + + public: + PairCDEAMOMP(class LAMMPS *, int); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int iifrom, int iito, ThrData * const thr); +}; + + /// The one-site concentration formulation of CD-EAM. + class PairCDEAM_OneSiteOMP : public PairCDEAMOMP + { + public: + /// Constructor. + PairCDEAM_OneSiteOMP(class LAMMPS* lmp) : PairEAM(lmp), PairCDEAMOMP(lmp, 1) {} + }; + + /// The two-site concentration formulation of CD-EAM. + class PairCDEAM_TwoSiteOMP : public PairCDEAMOMP + { + public: + /// Constructor. + PairCDEAM_TwoSiteOMP(class LAMMPS* lmp) : PairEAM(lmp), PairCDEAMOMP(lmp, 2) {} + }; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_colloid_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp new file mode 100644 index 0000000000..e2f5c56d99 --- /dev/null +++ b/src/USER-OMP/pair_colloid_omp.cpp @@ -0,0 +1,226 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_colloid_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairColloidOMP::PairColloidOMP(LAMMPS *lmp) : + PairColloid(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairColloidOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairColloidOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,r2inv,r6inv,forcelj,factor_lj; + double c1,c2,fR,dUR,dUA,K[9],h[4],g[4]; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq >= cutsq[itype][jtype]) continue; + + switch(form[itype][jtype]) { + case SMALL_SMALL: + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj*r2inv; + if (EFLAG) + evdwl = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + break; + + case SMALL_LARGE: + c2 = a2[itype][jtype]; + K[1] = c2*c2; + K[2] = rsq; + K[0] = K[1] - rsq; + K[4] = rsq*rsq; + K[3] = K[1] - K[2]; + K[3] *= K[3]*K[3]; + K[6] = K[3]*K[3]; + fR = sigma3[itype][jtype]*a12[itype][jtype]*c2*K[1]/K[3]; + fpair = 4.0/15.0*fR*factor_lj * + (2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) * + sigma6[itype][jtype]/K[6]-5.0) / K[0]; + if (EFLAG) + evdwl = 2.0/9.0*fR * + (1.0-(K[1]*(K[1]*(K[1]/3.0+3.0*K[2])+4.2*K[4])+K[2]*K[4]) * + sigma6[itype][jtype]/K[6]) - offset[itype][jtype]; + + if (check_error_thr((rsq <= K[1]),tid,FLERR, + "Overlapping small/large in pair colloid")) + return; + + break; + + case LARGE_LARGE: + r = sqrt(rsq); + c1 = a1[itype][jtype]; + c2 = a2[itype][jtype]; + K[0] = c1*c2; + K[1] = c1+c2; + K[2] = c1-c2; + K[3] = K[1]+r; + K[4] = K[1]-r; + K[5] = K[2]+r; + K[6] = K[2]-r; + K[7] = 1.0/(K[3]*K[4]); + K[8] = 1.0/(K[5]*K[6]); + g[0] = pow(K[3],-7.0); + g[1] = pow(K[4],-7.0); + g[2] = pow(K[5],-7.0); + g[3] = pow(K[6],-7.0); + h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0]; + h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1]; + h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2]; + h[3] = ((K[6]+5.0*K[2])*K[6]-30.0*K[0])*g[3]; + g[0] *= 42.0*K[0]/K[3]+6.0*K[1]+K[3]; + g[1] *= 42.0*K[0]/K[4]+6.0*K[1]+K[4]; + g[2] *= -42.0*K[0]/K[5]+6.0*K[2]+K[5]; + g[3] *= -42.0*K[0]/K[6]+6.0*K[2]+K[6]; + + fR = a12[itype][jtype]*sigma6[itype][jtype]/r/37800.0; + evdwl = fR * (h[0]-h[1]-h[2]+h[3]); + dUR = evdwl/r + 5.0*fR*(g[0]+g[1]-g[2]-g[3]); + dUA = -a12[itype][jtype]/3.0*r*((2.0*K[0]*K[7]+1.0)*K[7] + + (2.0*K[0]*K[8]-1.0)*K[8]); + fpair = factor_lj * (dUR+dUA)/r; + if (EFLAG) + evdwl += a12[itype][jtype]/6.0 * + (2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7])) - offset[itype][jtype]; + if (r <= K[1]) error->one(FLERR,"Overlapping large/large in pair colloid"); + break; + } + + if (EFLAG) evdwl *= factor_lj; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairColloidOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairColloid::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h new file mode 100644 index 0000000000..6605fbce00 --- /dev/null +++ b/src/USER-OMP/pair_colloid_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(colloid/omp,PairColloidOMP) + +#else + +#ifndef LMP_PAIR_COLLOID_OMP_H +#define LMP_PAIR_COLLOID_OMP_H + +#include "pair_colloid.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairColloidOMP : public PairColloid, public ThrOMP { + + public: + PairColloidOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp new file mode 100644 index 0000000000..eea2c6b610 --- /dev/null +++ b/src/USER-OMP/pair_comb_omp.cpp @@ -0,0 +1,638 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_comb_omp.h" +#include "atom.h" +#include "comm.h" +#include "group.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define MAXNEIGH 24 + +/* ---------------------------------------------------------------------- */ + +PairCombOMP::PairCombOMP(LAMMPS *lmp) : + PairComb(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCombOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // Build short range neighbor list + + Short_neigh_thr(); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairCombOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,jj,kk,jnum,iparam_i; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + int mr1,mr2,mr3; + int rsc,inty; + double elp_ij,filp[3],fjlp[3],fklp[3]; + double iq,jq; + double yaself; + double potal,fac11,fac11e; + double vionij,fvionij,sr1,sr2,sr3,Eov,Fov; + int sht_jnum, *sht_jlist, nj; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + yaself = vionij = fvionij = Eov = Fov = 0.0; + + double fxtmp,fytmp,fztmp; + double fjxtmp,fjytmp,fjztmp; + + // self energy correction term: potal + + potal_calc(potal,fac11,fac11e); + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + iq = q[i]; + NCo[i] = 0; + nj = 0; + iparam_i = elem2param[itype][itype][itype]; + + // self energy, only on i atom + + yaself = self(¶ms[iparam_i],iq,potal); + + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,yaself, + 0.0,0.0,0.0,0.0,0.0,thr); + + // two-body interactions (long and short repulsive) + + jlist = firstneigh[i]; + jnum = numneigh[i]; + sht_jlist = sht_first[i]; + sht_jnum = sht_num[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + // Qj calculates 2-body Coulombic + + jtype = map[type[j]]; + jq = q[j]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + + // long range q-dependent + + if (rsq > params[iparam_ij].lcutsq) continue; + + inty = intype[itype][jtype]; + + // polynomial three-point interpolation + + tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype); + + // 1/r energy and forces + + direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq, + potal,fac11,fac11e,vionij,fvionij); + + // field correction to self energy + + field(¶ms[iparam_ij],rsq,iq,jq,vionij,fvionij); + + // polarization field + // sums up long range forces + + fxtmp += delx*fvionij; + fytmp += dely*fvionij; + fztmp += delz*fvionij; + f[j][0] -= delx*fvionij; + f[j][1] -= dely*fvionij; + f[j][2] -= delz*fvionij; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + 0.0,vionij,fvionij,delx,dely,delz,thr); + + // short range q-independent + + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,EFLAG,evdwl,iq,jq); + + // repulsion is pure two-body, sums up pair repulsive forces + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + + // accumulate coordination number information + + if (cor_flag) { + int numcoor = 0; + for (jj = 0; jj < sht_jnum; jj++) { + j = sht_jlist[jj]; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + if(params[iparam_ij].hfocor > 0.0 ) { + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = vec3_dot(delr1,delr1); + + if (rsq1 > params[iparam_ij].cutsq) continue; + ++numcoor; + } + } + NCo[i] = numcoor; + } + + // three-body interactions + // half i-j loop + + for (jj = 0; jj < sht_jnum; jj++) { + j = sht_jlist[jj]; + + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + // this Qj for q-dependent BSi + + jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = vec3_dot(delr1,delr1); + + if (rsq1 > params[iparam_ij].cutsq) continue; + nj ++; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + fjxtmp = fjytmp = fjztmp = 0.0; + zeta_ij = 0.0; + cuo_flag1 = 0; cuo_flag2 = 0; + + for (kk = 0; kk < sht_jnum; kk++) { + k = sht_jlist[kk]; + if (j == k) continue; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = vec3_dot(delr2,delr2); + + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + + if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1; + if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1; + } + + if (cuo_flag1 && cuo_flag2) cuo_flag = 1; + else cuo_flag = 0; + + force_zeta(¶ms[iparam_ij],EFLAG,i,nj,rsq1,zeta_ij, + iq,jq,fpair,prefactor,evdwl); + + // over-coordination correction for HfO2 + + if (cor_flag && NCo[i] != 0) + Over_cor(¶ms[iparam_ij],rsq1,NCo[i],Eov, Fov); + evdwl += Eov; + fpair += Fov; + + fxtmp += delr1[0]*fpair; + fytmp += delr1[1]*fpair; + fztmp += delr1[2]*fpair; + fjxtmp -= delr1[0]*fpair; + fjytmp -= delr1[1]*fpair; + fjztmp -= delr1[2]*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, + -fpair,-delr1[0],-delr1[1],-delr1[2],thr); + + // attractive term via loop over k (3-body forces) + + for (kk = 0; kk < sht_jnum; kk++) { + k = sht_jlist[kk]; + if (j == k) continue; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = vec3_dot(delr2,delr2); + if (rsq2 > params[iparam_ijk].cutsq) continue; + + for (rsc = 0; rsc < 3; rsc++) + fi[rsc] = fj[rsc] = fk[rsc] = 0.0; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + // 3-body LP and BB correction and forces + + elp_ij = elp(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + flp(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp); + + fxtmp += fi[0] + filp[0]; + fytmp += fi[1] + filp[1]; + fztmp += fi[2] + filp[2]; + fjxtmp += fj[0] + fjlp[0]; + fjytmp += fj[1] + fjlp[1]; + fjztmp += fj[2] + fjlp[2]; + f[k][0] += fk[0] + fklp[0]; + f[k][1] += fk[1] + fklp[1]; + f[k][2] += fk[2] + fklp[2]; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + elp_ij,0.0,0.0,0.0,0.0,0.0, thr); + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + + if (cuo_flag) params[iparam_i].cutsq *= 0.65; + } + cuo_flag = 0; +} + +/* ---------------------------------------------------------------------- */ + +double PairCombOMP::yasu_char(double *qf_fix, int &igroup) +{ + int ii; + double potal,fac11,fac11e; + + const double * const * const x = atom->x; + const double * const q = atom->q; + const int * const type = atom->type; + const int * const tag = atom->tag; + + const int inum = list->inum; + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + const int * const mask = atom->mask; + const int groupbit = group->bitmask[igroup]; + + qf = qf_fix; + for (ii = 0; ii < inum; ii++) { + const int i = ilist[ii]; + if (mask[i] & groupbit) + qf[i] = 0.0; + } + + // communicating charge force to all nodes, first forward then reverse + + comm->forward_comm_pair(this); + + // self energy correction term: potal + + potal_calc(potal,fac11,fac11e); + + // loop over full neighbor list of my atoms +#if defined(_OPENMP) +#pragma omp parallel for private(ii) default(none) shared(potal,fac11e) +#endif + for (ii = 0; ii < inum; ii ++) { + double fqi,fqj,fqij,fqji,fqjj,delr1[3]; + double sr1,sr2,sr3; + int mr1,mr2,mr3; + + const int i = ilist[ii]; + const int itag = tag[i]; + int nj = 0; + + if (mask[i] & groupbit) { + fqi = fqj = fqij = fqji = fqjj = 0.0; // should not be needed. + int itype = map[type[i]]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const double iq = q[i]; + const int iparam_i = elem2param[itype][itype][itype]; + + // charge force from self energy + + fqi = qfo_self(¶ms[iparam_i],iq,potal); + + // two-body interactions + + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + const int jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + const int jtype = map[type[j]]; + double jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + double rsq1 = vec3_dot(delr1,delr1); + + const int iparam_ij = elem2param[itype][jtype][jtype]; + + // long range q-dependent + + if (rsq1 > params[iparam_ij].lcutsq) continue; + + const int inty = intype[itype][jtype]; + + // polynomial three-point interpolation + + tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype); + + // 1/r charge forces + + qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij); + + // field correction to self energy and charge force + + qfo_field(¶ms[iparam_ij],rsq1,iq,jq,fqji,fqjj); + fqi += jq * fqij + fqji; +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[j] += (iq * fqij + fqjj); + } + + // three-body interactions + + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + const int jtype = map[type[j]]; + const double jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + double rsq1 = vec3_dot(delr1,delr1); + + const int iparam_ij = elem2param[itype][jtype][jtype]; + + if (rsq1 > params[iparam_ij].cutsq) continue; + nj ++; + + // charge force in Aij and Bij + + qfo_short(¶ms[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj); + fqi += fqij; +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[j] += fqjj; + } + +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[i] += fqi; + } + } + + comm->reverse_comm_pair(this); + + // sum charge force on each node and return it + + double eneg = 0.0; + for (ii = 0; ii < inum; ii++) { + const int i = ilist[ii]; + if (mask[i] & groupbit) + eneg += qf[i]; + } + double enegtot; + MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world); + return enegtot; +} + +/* ---------------------------------------------------------------------- */ + +double PairCombOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairComb::memory_usage(); + + return bytes; +} +/* ---------------------------------------------------------------------- */ + +void PairCombOMP::Short_neigh_thr() +{ + + if (atom->nmax > nmax) { + nmax = atom->nmax; + memory->sfree(sht_first); + sht_first = (int **) memory->smalloc(nmax*sizeof(int *), + "pair:sht_first"); + memory->grow(sht_num,nmax,"pair:sht_num"); + memory->grow(NCo,nmax,"pair:NCo"); + memory->grow(bbij,nmax,nmax,"pair:bbij"); + } + + const int nthreads = comm->nthreads; + if (nthreads > maxpage) + add_pages(nthreads - maxpage); + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int nj,npntj,*neighptrj; + int iparam_ij,*ilist,*jlist,*numneigh,**firstneigh; + int jnum,i,j,ii,jj; + double xtmp,ytmp,ztmp,rsq,delrj[3]; + double **x = atom->x; + int *type = atom->type; + + const int inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + const int iidelta = 1 + inum/nthreads; + const int iifrom = tid*iidelta; + int iito = iifrom + iidelta; + if (iito > inum) iito = inum; + + int npage = tid; + npntj = 0; + + if (iifrom < inum) { + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if(pgsize - npntj < oneatom) { + npntj = 0; + npage += nthreads; + if (npage >= maxpage) add_pages(nthreads); + } + + neighptrj = &pages[npage][npntj]; + nj = 0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delrj[0] = xtmp - x[j][0]; + delrj[1] = ytmp - x[j][1]; + delrj[2] = ztmp - x[j][2]; + rsq = vec3_dot(delrj,delrj); + + if (rsq > cutmin) continue; + neighptrj[nj++] = j; + } + sht_first[i] = neighptrj; + sht_num[i] = nj; + npntj += nj; + } + } + } +} diff --git a/src/USER-OMP/pair_comb_omp.h b/src/USER-OMP/pair_comb_omp.h new file mode 100644 index 0000000000..85011064e7 --- /dev/null +++ b/src/USER-OMP/pair_comb_omp.h @@ -0,0 +1,47 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(comb/omp,PairCombOMP) + +#else + +#ifndef LMP_PAIR_COMB_OMP_H +#define LMP_PAIR_COMB_OMP_H + +#include "pair_comb.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCombOMP : public PairComb, public ThrOMP { + + public: + PairCombOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + virtual double yasu_char(double *, int &); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + void Short_neigh_thr(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_coul_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp new file mode 100644 index 0000000000..37b311f93e --- /dev/null +++ b/src/USER-OMP/pair_coul_cut_omp.cpp @@ -0,0 +1,160 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) : + PairCoulCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,r2inv,rinv,forcecoul,factor_coul; + int *ilist,*jlist,*numneigh,**firstneigh; + + ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + forcecoul = qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv; + fpair = factor_coul*forcecoul * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) + ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv; + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCoulCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h new file mode 100644 index 0000000000..9e5fe81197 --- /dev/null +++ b/src/USER-OMP/pair_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/cut/omp,PairCoulCutOMP) + +#else + +#ifndef LMP_PAIR_COUL_CUT_OMP_H +#define LMP_PAIR_COUL_CUT_OMP_H + +#include "pair_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulCutOMP : public PairCoulCut, public ThrOMP { + + public: + PairCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_coul_debye_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp new file mode 100644 index 0000000000..4c8aaba007 --- /dev/null +++ b/src/USER-OMP/pair_coul_debye_omp.cpp @@ -0,0 +1,161 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_debye_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) : + PairCoulDebye(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulDebyeOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,r2inv,r,rinv,forcecoul,factor_coul,screening; + int *ilist,*jlist,*numneigh,**firstneigh; + + ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv); + fpair = factor_coul*forcecoul * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) + ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening; + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ +double PairCoulDebyeOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulDebye::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h new file mode 100644 index 0000000000..dc08647553 --- /dev/null +++ b/src/USER-OMP/pair_coul_debye_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/debye/omp,PairCoulDebyeOMP) + +#else + +#ifndef LMP_PAIR_COUL_DEBYE_OMP_H +#define LMP_PAIR_COUL_DEBYE_OMP_H + +#include "pair_coul_debye.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP { + + public: + PairCoulDebyeOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_coul_diel_omp.cpp b/src/USER-OMP/pair_coul_diel_omp.cpp new file mode 100644 index 0000000000..c583800004 --- /dev/null +++ b/src/USER-OMP/pair_coul_diel_omp.cpp @@ -0,0 +1,167 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_diel_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairCoulDielOMP::PairCoulDielOMP(LAMMPS *lmp) : + PairCoulDiel(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulDielOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulDielOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,r,rarg,th,depsdr,epsr,forcecoul,factor_coul; + int *ilist,*jlist,*numneigh,**firstneigh; + + ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + rarg = (r-rme[itype][jtype])/sigmae[itype][jtype]; + th=tanh(rarg); + epsr=a_eps+b_eps*th; + depsdr=b_eps * (1.0 - th*th) / sigmae[itype][jtype]; + + forcecoul = qqrd2e*qtmp*q[j]*((eps_s*(epsr+r*depsdr)/epsr/epsr) -1.)/rsq; + fpair = factor_coul*forcecoul/r; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + ecoul = (qqrd2e*qtmp*q[j]*((eps_s/epsr) -1.)/r) - offset[itype][jtype]; + ecoul *= factor_coul; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCoulDielOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulDiel::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_diel_omp.h b/src/USER-OMP/pair_coul_diel_omp.h new file mode 100644 index 0000000000..68206e4c28 --- /dev/null +++ b/src/USER-OMP/pair_coul_diel_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/diel/omp,PairCoulDielOMP) + +#else + +#ifndef LMP_PAIR_COUL_DIEL_OMP_H +#define LMP_PAIR_COUL_DIEL_OMP_H + +#include "pair_coul_diel.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulDielOMP : public PairCoulDiel, public ThrOMP { + + public: + PairCoulDielOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_coul_long_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp new file mode 100644 index 0000000000..5ae5478755 --- /dev/null +++ b/src/USER-OMP/pair_coul_long_omp.cpp @@ -0,0 +1,200 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) : + PairCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itable,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double fraction,table; + double r,r2inv,rsq,forcecoul,factor_coul; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_coulsq) { + r2inv = 1.0/rsq; + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = scale[itype][jtype] * qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = scale[itype][jtype] * qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + fpair = forcecoul * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = scale[itype][jtype] * qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h new file mode 100644 index 0000000000..d628d22b60 --- /dev/null +++ b/src/USER-OMP/pair_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/long/omp,PairCoulLongOMP) + +#else + +#ifndef LMP_PAIR_COUL_LONG_OMP_H +#define LMP_PAIR_COUL_LONG_OMP_H + +#include "pair_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulLongOMP : public PairCoulLong, public ThrOMP { + + public: + PairCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_coul_wolf_omp.cpp b/src/USER-OMP/pair_coul_wolf_omp.cpp new file mode 100644 index 0000000000..16e0c4ed90 --- /dev/null +++ b/src/USER-OMP/pair_coul_wolf_omp.cpp @@ -0,0 +1,183 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_wolf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairCoulWolfOMP::PairCoulWolfOMP(LAMMPS *lmp) : + PairCoulWolf(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulWolfOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulWolfOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,forcecoul,factor_coul; + double prefactor; + double r,rexp; + int *ilist,*jlist,*numneigh,**firstneigh; + double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq; + + ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + // self and shifted coulombic energy + + e_self = v_sh = 0.0; + e_shift = erfc(alf*cut_coul)/cut_coul; + f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / + cut_coul; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + qisq = qtmp*qtmp; + e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e; + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr); + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = qqrd2e*qtmp*q[j]/r; + erfcc = erfc(alf*r); + erfcd = exp(-alf*alf*r*r); + v_sh = (erfcc - e_shift*r) * prefactor; + dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift; + forcecoul = dvdrr*rsq*prefactor; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + fpair = forcecoul / rsq; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = v_sh; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCoulWolfOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulWolf::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_wolf_omp.h b/src/USER-OMP/pair_coul_wolf_omp.h new file mode 100644 index 0000000000..aef683d22c --- /dev/null +++ b/src/USER-OMP/pair_coul_wolf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/wolf/omp,PairCoulWolfOMP) + +#else + +#ifndef LMP_PAIR_COUL_WOLF_OMP_H +#define LMP_PAIR_COUL_WOLF_OMP_H + +#include "pair_coul_wolf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulWolfOMP : public PairCoulWolf, public ThrOMP { + + public: + PairCoulWolfOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp new file mode 100644 index 0000000000..34d7e6c17e --- /dev/null +++ b/src/USER-OMP/pair_dipole_cut_omp.cpp @@ -0,0 +1,286 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_dipole_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) : + PairDipoleCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairDipoleCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv,fx,fy,fz; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const q = atom->q; + const double * const * const mu = atom->mu; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + qtmp = q[i]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + pre1 = qtmp*q[j]*r3inv; + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + r7inv = r5inv*r2inv; + + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + + pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; + pre2 = 3.0*r5inv*pjdotr; + pre3 = 3.0*r5inv*pidotr; + pre4 = -1.0*r3inv; + + forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0]; + forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1]; + forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2]; + + crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); + crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); + crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); + + tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); + tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); + } + + if (mu[i][3] > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pre1 = 3.0*q[j]*r5inv * pidotr; + pre2 = q[j]*r3inv; + + forcecoulx += pre2*mu[i][0] - pre1*delx; + forcecouly += pre2*mu[i][1] - pre1*dely; + forcecoulz += pre2*mu[i][2] - pre1*delz; + tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); + } + + if (mu[j][3] > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + pre1 = 3.0*qtmp*r5inv * pjdotr; + pre2 = qtmp*r3inv; + + forcecoulx += pre1*delx - pre2*mu[j][0]; + forcecouly += pre1*dely - pre2*mu[j][1]; + forcecoulz += pre1*delz - pre2*mu[j][2]; + tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + t1tmp += fq*tixcoul; + t2tmp += fq*tiycoul; + t3tmp += fq*tizcoul; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] += fq*tjxcoul; + torque[j][1] += fq*tjycoul; + torque[j][2] += fq*tjzcoul; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp*q[j]*rinv; + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) + ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; + if (mu[i][3] > 0.0 && q[j] != 0.0) + ecoul += -q[j]*r3inv*pidotr; + if (mu[j][3] > 0.0 && qtmp != 0.0) + ecoul += qtmp*r3inv*pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairDipoleCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairDipoleCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h new file mode 100644 index 0000000000..4e874b1ffb --- /dev/null +++ b/src/USER-OMP/pair_dipole_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dipole/cut/omp,PairDipoleCutOMP) + +#else + +#ifndef LMP_PAIR_DIPOLE_CUT_OMP_H +#define LMP_PAIR_DIPOLE_CUT_OMP_H + +#include "pair_dipole_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP { + + public: + PairDipoleCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp new file mode 100644 index 0000000000..4090bc3c02 --- /dev/null +++ b/src/USER-OMP/pair_dipole_sf_omp.cpp @@ -0,0 +1,318 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_dipole_sf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) : + PairDipoleSF(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairDipoleSFOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; + double aforcecoulx,aforcecouly,aforcecoulz; + double bforcecoulx,bforcecouly,bforcecoulz; + double rcutlj2inv, rcutcoul2inv,rcutlj6inv; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const double * const q = atom->q; + const double * const * const mu = atom->mu; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + qtmp = q[i]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + + afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; + pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); + aforcecoulx = pre1*delx; + aforcecouly = pre1*dely; + aforcecoulz = pre1*delz; + + bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + + 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; + presf = 2.0 * r2inv * pidotr * pjdotr; + bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx); + bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely); + bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz); + + forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); + forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); + forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz ); + + pre2 = 3.0 * bfac * r5inv * pjdotr; + pre3 = 3.0 * bfac * r5inv * pidotr; + pre4 = -bfac * r3inv; + + crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); + crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); + crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); + + tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); + tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); + } + + if (mu[i][3] > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); + pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = q[j] * r3inv * pqfac; + + forcecoulx += pre2*mu[i][0] - pre1*delx; + forcecouly += pre2*mu[i][1] - pre1*dely; + forcecoulz += pre2*mu[i][2] - pre1*delz; + tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); + } + + if (mu[j][3] > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); + qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = qtmp * r3inv * qpfac; + + forcecoulx += pre1*delx - pre2*mu[j][0]; + forcecouly += pre1*dely - pre2*mu[j][1]; + forcecoulz += pre1*delz - pre2*mu[j][2]; + tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv; + + rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; + rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; + forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * + rcutlj6inv * rcutlj2inv; + + forcelj = factor_lj * (forceljcut - forceljsf); + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + t1tmp += fq*tixcoul; + t2tmp += fq*tiycoul; + t3tmp += fq*tizcoul; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] += fq*tjxcoul; + torque[j][1] += fq*tjycoul; + torque[j][2] += fq*tjzcoul; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp * q[j] * rinv * + pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2.0); + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) + ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); + if (mu[i][3] > 0.0 && q[j] != 0.0) + ecoul += -q[j] * r3inv * pqfac * pidotr; + if (mu[j][3] > 0.0 && qtmp != 0.0) + ecoul += qtmp * r3inv * qpfac * pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+ + rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])* + rsq*rcutlj2inv+ + rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairDipoleSFOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairDipoleSF::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h new file mode 100644 index 0000000000..3ff008cef5 --- /dev/null +++ b/src/USER-OMP/pair_dipole_sf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dipole/sf/omp,PairDipoleSFOMP) + +#else + +#ifndef LMP_PAIR_DIPOLE_SF_OMP_H +#define LMP_PAIR_DIPOLE_SF_OMP_H + +#include "pair_dipole_sf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP { + + public: + PairDipoleSFOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_dpd_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp new file mode 100644 index 0000000000..cb7f560e05 --- /dev/null +++ b/src/USER-OMP/pair_dpd_omp.cpp @@ -0,0 +1,214 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_dpd_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" +#include "random_mars.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EPSILON 1.0e-10 + +/* ---------------------------------------------------------------------- */ + +PairDPDOMP::PairDPDOMP(LAMMPS *lmp) : + PairDPD(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + random_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairDPDOMP::~PairDPDOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + // to ensure full compatibility with the serial DPD style + // we use is random number generator instance for thread 0 + random_thr[0] = random; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairDPDOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double rsq,r,rinv,dot,wd,randnum,factor_dpd; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + const double * const * const v = atom->v; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double *special_lj = force->special_lj; + const double dtinvsqrt = 1.0/sqrt(update->dt); + double fxtmp,fytmp,fztmp; + RanMars &rng = *random_thr[thr->get_tid()]; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + vxtmp = v[i][0]; + vytmp = v[i][1]; + vztmp = v[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_dpd = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + delvx = vxtmp - v[j][0]; + delvy = vytmp - v[j][1]; + delvz = vztmp - v[j][2]; + dot = delx*delvx + dely*delvy + delz*delvz; + wd = 1.0 - r/cut[itype][jtype]; + randnum = rng.gaussian(); + + // conservative force = a0 * wd + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + fpair = a0[itype][jtype]*wd; + fpair -= gamma[itype][jtype]*wd*wd*dot*rinv; + fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt; + fpair *= factor_dpd*rinv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd; + evdwl *= factor_dpd; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairDPDOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairDPD::memory_usage(); + bytes += comm->nthreads * sizeof(RanMars*); + bytes += comm->nthreads * sizeof(RanMars); + + return bytes; +} diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h new file mode 100644 index 0000000000..470e6f1133 --- /dev/null +++ b/src/USER-OMP/pair_dpd_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dpd/omp,PairDPDOMP) + +#else + +#ifndef LMP_PAIR_DPD_OMP_H +#define LMP_PAIR_DPD_OMP_H + +#include "pair_dpd.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDPDOMP : public PairDPD, public ThrOMP { + + public: + PairDPDOMP(class LAMMPS *); + virtual ~PairDPDOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_dpd_tstat_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp new file mode 100644 index 0000000000..6965e69349 --- /dev/null +++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp @@ -0,0 +1,214 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_dpd_tstat_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" +#include "random_mars.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EPSILON 1.0e-10 + +/* ---------------------------------------------------------------------- */ + +PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) : + PairDPDTstat(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + random_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairDPDTstatOMP::~PairDPDTstatOMP() +{ + if (random_thr) { + for (int i=1; i < comm->nthreads; ++i) + delete random_thr[i]; + + delete[] random_thr; + random_thr = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDTstatOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!random_thr) + random_thr = new RanMars*[nthreads]; + + // to ensure full compatibility with the serial DPD style + // we use is random number generator instance for thread 0 + random_thr[0] = random; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // generate a random number generator instance for + // all threads != 0. make sure we use unique seeds. + if (random_thr && tid > 0) + random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me + + comm->nprocs*tid); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairDPDTstatOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fpair; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double rsq,r,rinv,dot,wd,randnum,factor_dpd; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + const double * const * const v = atom->v; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double *special_lj = force->special_lj; + const double dtinvsqrt = 1.0/sqrt(update->dt); + double fxtmp,fytmp,fztmp; + RanMars &rng = *random_thr[thr->get_tid()]; + + // adjust sigma if target T is changing + + if (t_start != t_stop) { + double delta = update->ntimestep - update->beginstep; + delta /= update->endstep - update->beginstep; + temperature = t_start + delta * (t_stop-t_start); + double boltz = force->boltz; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) + sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]); + } + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + vxtmp = v[i][0]; + vytmp = v[i][1]; + vztmp = v[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_dpd = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + rinv = 1.0/r; + delvx = vxtmp - v[j][0]; + delvy = vytmp - v[j][1]; + delvz = vztmp - v[j][2]; + dot = delx*delvx + dely*delvy + delz*delvz; + wd = 1.0 - r/cut[itype][jtype]; + randnum = rng.gaussian(); + + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + fpair = -gamma[itype][jtype]*wd*wd*dot*rinv; + fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt; + fpair *= factor_dpd*rinv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairDPDTstatOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairDPDTstat::memory_usage(); + bytes += comm->nthreads * sizeof(RanMars*); + bytes += comm->nthreads * sizeof(RanMars); + + return bytes; +} diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h new file mode 100644 index 0000000000..b04874efc2 --- /dev/null +++ b/src/USER-OMP/pair_dpd_tstat_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(dpd/tstat/omp,PairDPDTstatOMP) + +#else + +#ifndef LMP_PAIR_DPD_TSTAT_OMP_H +#define LMP_PAIR_DPD_TSTAT_OMP_H + +#include "pair_dpd_tstat.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP { + + public: + PairDPDTstatOMP(class LAMMPS *); + virtual ~PairDPDTstatOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + class RanMars **random_thr; + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eam_alloy_omp.cpp b/src/USER-OMP/pair_eam_alloy_omp.cpp new file mode 100644 index 0000000000..3dff868b6a --- /dev/null +++ b/src/USER-OMP/pair_eam_alloy_omp.cpp @@ -0,0 +1,323 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL) +------------------------------------------------------------------------- */ + +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_alloy_omp.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +PairEAMAlloyOMP::PairEAMAlloyOMP(LAMMPS *lmp) : PairEAMOMP(lmp) +{ + one_coeff = 1; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMAlloyOMP::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM setfl file + + if (setfl) { + for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i]; + delete [] setfl->elements; + delete [] setfl->mass; + memory->destroy(setfl->frho); + memory->destroy(setfl->rhor); + memory->destroy(setfl->z2r); + delete setfl; + } + setfl = new Setfl(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < setfl->nelements; j++) + if (strcmp(arg[i],setfl->elements[j]) == 0) break; + if (j < setfl->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,setfl->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMAlloyOMP::read_file(char *filename) +{ + Setfl *file = setfl; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = fopen(filename,"r"); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho"); + memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements,file->nr+1, + "pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]); + MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world); + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +void PairEAMAlloyOMP::file2array() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from setfl file + + nrho = setfl->nrho; + nr = setfl->nr; + drho = setfl->drho; + dr = setfl->dr; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of setfl elements + 1 for zero array + + nfrho = setfl->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = # of setfl elements + + nrhor = setfl->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element's rhor to global rhor + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m]; + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for setfl files, I,J mapping only depends on I + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of setfl elements + + nz2r = setfl->nelements * (setfl->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < setfl->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} diff --git a/src/USER-OMP/pair_eam_alloy_omp.h b/src/USER-OMP/pair_eam_alloy_omp.h new file mode 100644 index 0000000000..a044b18f60 --- /dev/null +++ b/src/USER-OMP/pair_eam_alloy_omp.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/alloy/omp,PairEAMAlloyOMP) + +#else + +#ifndef LMP_PAIR_EAM_ALLOY_OMP_H +#define LMP_PAIR_EAM_ALLOY_OMP_H + +#include "pair_eam_omp.h" + +namespace LAMMPS_NS { + +// need virtual public b/c of how eam/alloy/opt inherits from it + +class PairEAMAlloyOMP : virtual public PairEAMOMP { + public: + PairEAMAlloyOMP(class LAMMPS *); + virtual ~PairEAMAlloyOMP() {} + void coeff(int, char **); + + protected: + void read_file(char *); + void file2array(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eam_fs_omp.cpp b/src/USER-OMP/pair_eam_fs_omp.cpp new file mode 100644 index 0000000000..84101913c5 --- /dev/null +++ b/src/USER-OMP/pair_eam_fs_omp.cpp @@ -0,0 +1,332 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Tim Lau (MIT) +------------------------------------------------------------------------- */ + +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_fs_omp.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +PairEAMFSOMP::PairEAMFSOMP(LAMMPS *lmp) : PairEAMOMP(lmp) +{ + one_coeff = 1; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read EAM Finnis-Sinclair file +------------------------------------------------------------------------- */ + +void PairEAMFSOMP::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM Finnis-Sinclair file + + if (fs) { + for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i]; + delete [] fs->elements; + delete [] fs->mass; + memory->destroy(fs->frho); + memory->destroy(fs->rhor); + memory->destroy(fs->z2r); + delete fs; + } + fs = new Fs(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < fs->nelements; j++) + if (strcmp(arg[i],fs->elements[j]) == 0) break; + if (j < fs->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,fs->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMFSOMP::read_file(char *filename) +{ + Fs *file = fs; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = fopen(filename,"r"); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1, + "pair:frho"); + memory->create(file->rhor,file->nelements,file->nelements, + file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements, + file->nr+1,"pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + + for (j = 0; j < file->nelements; j++) { + if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]); + MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +void PairEAMFSOMP::file2array() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from fs file + + nrho = fs->nrho; + nr = fs->nr; + drho = fs->drho; + dr = fs->dr; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of fs elements + 1 for zero array + + nfrho = fs->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < fs->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = square of # of fs elements + + nrhor = fs->nelements * fs->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element pair rhor to global rhor + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j < fs->nelements; j++) { + for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m]; + n++; + } + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for fs files, there is a full NxN set of rhor arrays + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i] * fs->nelements + map[j]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of fs elements + + nz2r = fs->nelements * (fs->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} diff --git a/src/USER-OMP/pair_eam_fs_omp.h b/src/USER-OMP/pair_eam_fs_omp.h new file mode 100644 index 0000000000..42d4d59eb6 --- /dev/null +++ b/src/USER-OMP/pair_eam_fs_omp.h @@ -0,0 +1,43 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/fs/omp,PairEAMFSOMP) + +#else + +#ifndef LMP_PAIR_EAM_FS_OMP_H +#define LMP_PAIR_EAM_FS_OMP_H + +#include "pair_eam_omp.h" + +namespace LAMMPS_NS { + +// need virtual public b/c of how eam/fs/opt inherits from it + +class PairEAMFSOMP : virtual public PairEAMOMP { + public: + PairEAMFSOMP(class LAMMPS *); + virtual ~PairEAMFSOMP() {} + void coeff(int, char **); + + protected: + void read_file(char *); + void file2array(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp new file mode 100644 index 0000000000..59e58d1b73 --- /dev/null +++ b/src/USER-OMP/pair_eam_omp.cpp @@ -0,0 +1,303 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_eam_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairEAMOMP::PairEAMOMP(LAMMPS *lmp) : + PairEAM(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairEAMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(fp); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(fp,nmax,"pair:fp"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_eam(nall, rho); + else + thr->init_eam(atom->nlocal, rho); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairEAMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi; + double *coeff; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // rho = density at each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (NEWTON_PAIR || j < nlocal) { + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(rho, nall, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->reverse_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + } else { + data_reduce_thr(rho, nlocal, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + p = rho[i]*rdrho + 1.0; + m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + coeff = frho_spline[type2frho[type[i]]][m]; + fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2]; + if (EFLAG) { + phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (rho[i] > rhomax) phi += fp[i] * (rho[i]-rhomax); + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); + } + } + + // wait until all theads are done with computation + sync_threads(); + + // communicate derivative of embedding function + // MPI communication only on master thread +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + jtype = type[j]; + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + coeff = rhor_spline[type2rhor[itype][jtype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = rhor_spline[type2rhor[jtype][itype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = z2r_spline[type2z2r[itype][jtype]][m]; + z2p = (coeff[0]*p + coeff[1])*p + coeff[2]; + z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + + recip = 1.0/r; + phi = z2*recip; + phip = z2p*recip - phi*recip; + psip = fp[i]*rhojp + fp[j]*rhoip + phip; + fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = phi; + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairEAM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h new file mode 100644 index 0000000000..0f9d3cd51e --- /dev/null +++ b/src/USER-OMP/pair_eam_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/omp,PairEAMOMP) + +#else + +#ifndef LMP_PAIR_EAM_OMP_H +#define LMP_PAIR_EAM_OMP_H + +#include "pair_eam.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairEAMOMP : public PairEAM, public ThrOMP { + + public: + PairEAMOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int iifrom, int iito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp new file mode 100644 index 0000000000..d9d68d7a07 --- /dev/null +++ b/src/USER-OMP/pair_edip_omp.cpp @@ -0,0 +1,492 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_edip_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define GRIDDENSITY 8000 +#define GRIDSTART 0.1 + +// max number of interaction per atom for f(Z) environment potential + +#define leadDimInteractionList 64 + +/* ---------------------------------------------------------------------- */ + +PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) : + PairEDIP(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairEDIPOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairEDIPOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,inum,jnum; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp,evdwl; + int *ilist,*jlist,*numneigh,**firstneigh; + register int preForceCoord_counter; + + double invR_ij; + double invR_ik; + double directorCos_ij_x; + double directorCos_ij_y; + double directorCos_ij_z; + double directorCos_ik_x; + double directorCos_ik_y; + double directorCos_ik_z; + double cosTeta; + + int interpolIDX; + double interpolTMP; + double interpolDeltaX; + double interpolY1; + double interpolY2; + + double invRMinusCutoffA; + double sigmaInvRMinusCutoffA; + double gammInvRMinusCutoffA; + double cosTetaDiff; + double cosTetaDiffCosTetaDiff; + double cutoffFunction_ij; + double exp2B_ij; + double exp2BDerived_ij; + double pow2B_ij; + double pow2BDerived_ij; + double exp3B_ij; + double exp3BDerived_ij; + double exp3B_ik; + double exp3BDerived_ik; + double qFunction; + double qFunctionDerived; + double tauFunction; + double tauFunctionDerived; + double expMinusBetaZeta_iZeta_i; + double qFunctionCosTetaDiffCosTetaDiff; + double expMinusQFunctionCosTetaDiffCosTetaDiff; + double zeta_i; + double zeta_iDerived; + double zeta_iDerivedInvR_ij; + + double forceModCoord_factor; + double forceModCoord; + double forceModCoord_ij; + double forceMod2B; + double forceMod3B_factor1_ij; + double forceMod3B_factor2_ij; + double forceMod3B_factor2; + double forceMod3B_factor1_ik; + double forceMod3B_factor2_ik; + double potentia3B_factor; + double potential2B_factor; + + const int tid = thr->get_tid(); + + double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList; + double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList; + double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList; + double *pre_thrExp2B_ij = preExp2B_ij + tid * leadDimInteractionList; + double *pre_thrExp2BDerived_ij = preExp2BDerived_ij + tid * leadDimInteractionList; + double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList; + double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ii++) { + zeta_i = 0.0; + int numForceCoordPairs = 0; + + i = ilist[ii]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // pre-loop to compute environment coordination f(Z) + + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + double dr_ij[3], r_ij; + + dr_ij[0] = xtmp - x[j][0]; + dr_ij[1] = ytmp - x[j][1]; + dr_ij[2] = ztmp - x[j][2]; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + invR_ij = 1.0 / r_ij; + pre_thrInvR_ij[neighbor_j] = invR_ij; + + invRMinusCutoffA = 1.0 / (r_ij - cutoffA); + sigmaInvRMinusCutoffA = sigma * invRMinusCutoffA; + gammInvRMinusCutoffA = gamm * invRMinusCutoffA; + + interpolDeltaX = r_ij - GRIDSTART; + interpolTMP = (interpolDeltaX * GRIDDENSITY); + interpolIDX = (int) interpolTMP; + + interpolY1 = exp3B[interpolIDX]; + interpolY2 = exp3B[interpolIDX+1]; + exp3B_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + exp3BDerived_ij = - exp3B_ij * gammInvRMinusCutoffA * invRMinusCutoffA; + + pre_thrExp3B_ij[neighbor_j] = exp3B_ij; + pre_thrExp3BDerived_ij[neighbor_j] = exp3BDerived_ij; + + interpolY1 = exp2B[interpolIDX]; + interpolY2 = exp2B[interpolIDX+1]; + exp2B_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + exp2BDerived_ij = - exp2B_ij * sigmaInvRMinusCutoffA * invRMinusCutoffA; + + pre_thrExp2B_ij[neighbor_j] = exp2B_ij; + pre_thrExp2BDerived_ij[neighbor_j] = exp2BDerived_ij; + + interpolY1 = pow2B[interpolIDX]; + interpolY2 = pow2B[interpolIDX+1]; + pow2B_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + pre_thrPow2B_ij[neighbor_j] = pow2B_ij; + + // zeta and its derivative + + if (r_ij < cutoffC) zeta_i += 1.0; + else { + interpolY1 = cutoffFunction[interpolIDX]; + interpolY2 = cutoffFunction[interpolIDX+1]; + cutoffFunction_ij = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + zeta_i += cutoffFunction_ij; + + interpolY1 = cutoffFunctionDerived[interpolIDX]; + interpolY2 = cutoffFunctionDerived[interpolIDX+1]; + zeta_iDerived = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + zeta_iDerivedInvR_ij = zeta_iDerived * invR_ij; + + preForceCoord_counter=numForceCoordPairs*5; + pre_thrForceCoord[preForceCoord_counter+0]=zeta_iDerivedInvR_ij; + pre_thrForceCoord[preForceCoord_counter+1]=dr_ij[0]; + pre_thrForceCoord[preForceCoord_counter+2]=dr_ij[1]; + pre_thrForceCoord[preForceCoord_counter+3]=dr_ij[2]; + pre_thrForceCoord[preForceCoord_counter+4]=j; + numForceCoordPairs++; + } + } + + // quantities depending on zeta_i + + interpolDeltaX = zeta_i; + interpolTMP = (interpolDeltaX * GRIDDENSITY); + interpolIDX = (int) interpolTMP; + + interpolY1 = expMinusBetaZeta_iZeta_iGrid[interpolIDX]; + interpolY2 = expMinusBetaZeta_iZeta_iGrid[interpolIDX+1]; + expMinusBetaZeta_iZeta_i = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + interpolY1 = qFunctionGrid[interpolIDX]; + interpolY2 = qFunctionGrid[interpolIDX+1]; + qFunction = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + interpolY1 = tauFunctionGrid[interpolIDX]; + interpolY2 = tauFunctionGrid[interpolIDX+1]; + tauFunction = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + interpolY1 = tauFunctionDerivedGrid[interpolIDX]; + interpolY2 = tauFunctionDerivedGrid[interpolIDX+1]; + tauFunctionDerived = interpolY1 + (interpolY2 - interpolY1) * + (interpolTMP-interpolIDX); + + qFunctionDerived = -mu * qFunction; + + forceModCoord_factor = 2.0 * beta * zeta_i * expMinusBetaZeta_iZeta_i; + + forceModCoord = 0.0; + + // two-body interactions, skip half of them + + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + double dr_ij[3], r_ij, f_ij[3]; + + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + dr_ij[0] = x[j][0] - xtmp; + dr_ij[1] = x[j][1] - ytmp; + dr_ij[2] = x[j][2] - ztmp; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + invR_ij = pre_thrInvR_ij[neighbor_j]; + pow2B_ij = pre_thrPow2B_ij[neighbor_j]; + + potential2B_factor = pow2B_ij - expMinusBetaZeta_iZeta_i; + + exp2B_ij = pre_thrExp2B_ij[neighbor_j]; + + pow2BDerived_ij = - rho * invR_ij * pow2B_ij; + + forceModCoord += (forceModCoord_factor*exp2B_ij); + + exp2BDerived_ij = pre_thrExp2BDerived_ij[neighbor_j]; + forceMod2B = exp2BDerived_ij * potential2B_factor + + exp2B_ij * pow2BDerived_ij; + + directorCos_ij_x = invR_ij * dr_ij[0]; + directorCos_ij_y = invR_ij * dr_ij[1]; + directorCos_ij_z = invR_ij * dr_ij[2]; + + exp3B_ij = pre_thrExp3B_ij[neighbor_j]; + exp3BDerived_ij = pre_thrExp3BDerived_ij[neighbor_j]; + + f_ij[0] = forceMod2B * directorCos_ij_x; + f_ij[1] = forceMod2B * directorCos_ij_y; + f_ij[2] = forceMod2B * directorCos_ij_z; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[i][0] += f_ij[0]; + f[i][1] += f_ij[1]; + f[i][2] += f_ij[2]; + + // potential energy + + evdwl = (exp2B_ij * potential2B_factor); + + if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0, + -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); + + // three-body Forces + + for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = x[k][0] - xtmp; + dr_ik[1] = x[k][1] - ytmp; + dr_ik[2] = x[k][2] - ztmp; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + invR_ik = pre_thrInvR_ij[neighbor_k]; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + + directorCos_ij_y * directorCos_ik_y + + directorCos_ij_z * directorCos_ik_z; + + cosTetaDiff = cosTeta + tauFunction; + cosTetaDiffCosTetaDiff = cosTetaDiff * cosTetaDiff; + qFunctionCosTetaDiffCosTetaDiff = cosTetaDiffCosTetaDiff * qFunction; + expMinusQFunctionCosTetaDiffCosTetaDiff = + exp(-qFunctionCosTetaDiffCosTetaDiff); + + potentia3B_factor = lambda * + ((1.0 - expMinusQFunctionCosTetaDiffCosTetaDiff) + + eta * qFunctionCosTetaDiffCosTetaDiff); + + exp3B_ik = pre_thrExp3B_ij[neighbor_k]; + exp3BDerived_ik = pre_thrExp3BDerived_ij[neighbor_k]; + + forceMod3B_factor1_ij = - exp3BDerived_ij * exp3B_ik * + potentia3B_factor; + forceMod3B_factor2 = 2.0 * lambda * exp3B_ij * exp3B_ik * + qFunction * cosTetaDiff * + (eta + expMinusQFunctionCosTetaDiffCosTetaDiff); + forceMod3B_factor2_ij = forceMod3B_factor2 * invR_ij; + + f_ij[0] = forceMod3B_factor1_ij * directorCos_ij_x + + forceMod3B_factor2_ij * + (cosTeta * directorCos_ij_x - directorCos_ik_x); + f_ij[1] = forceMod3B_factor1_ij * directorCos_ij_y + + forceMod3B_factor2_ij * + (cosTeta * directorCos_ij_y - directorCos_ik_y); + f_ij[2] = forceMod3B_factor1_ij * directorCos_ij_z + + forceMod3B_factor2_ij * + (cosTeta * directorCos_ij_z - directorCos_ik_z); + + forceMod3B_factor1_ik = - exp3BDerived_ik * exp3B_ij * + potentia3B_factor; + forceMod3B_factor2_ik = forceMod3B_factor2 * invR_ik; + + f_ik[0] = forceMod3B_factor1_ik * directorCos_ik_x + + forceMod3B_factor2_ik * + (cosTeta * directorCos_ik_x - directorCos_ij_x); + f_ik[1] = forceMod3B_factor1_ik * directorCos_ik_y + + forceMod3B_factor2_ik * + (cosTeta * directorCos_ik_y - directorCos_ij_y); + f_ik[2] = forceMod3B_factor1_ik * directorCos_ik_z + + forceMod3B_factor2_ik * + (cosTeta * directorCos_ik_z - directorCos_ij_z); + + forceModCoord += (forceMod3B_factor2 * + (tauFunctionDerived - 0.5 * mu * cosTetaDiff)); + + f[j][0] += f_ij[0]; + f[j][1] += f_ij[1]; + f[j][2] += f_ij[2]; + + f[k][0] += f_ik[0]; + f[k][1] += f_ik[1]; + f[k][2] += f_ik[2]; + + f[i][0] -= f_ij[0] + f_ik[0]; + f[i][1] -= f_ij[1] + f_ik[1]; + f[i][2] -= f_ij[2] + f_ik[2]; + + // potential energy + + evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor); + + if (evflag) ev_tally3_thr(this,i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik,thr); + } + } + + // forces due to environment coordination f(Z) + + for (int idx = 0; idx < numForceCoordPairs; idx++) { + double dr_ij[3], f_ij[3]; + + preForceCoord_counter = idx * 5; + zeta_iDerivedInvR_ij=pre_thrForceCoord[preForceCoord_counter+0]; + dr_ij[0]=pre_thrForceCoord[preForceCoord_counter+1]; + dr_ij[1]=pre_thrForceCoord[preForceCoord_counter+2]; + dr_ij[2]=pre_thrForceCoord[preForceCoord_counter+3]; + j = static_cast (pre_thrForceCoord[preForceCoord_counter+4]); + + forceModCoord_ij = forceModCoord * zeta_iDerivedInvR_ij; + + f_ij[0] = forceModCoord_ij * dr_ij[0]; + f_ij[1] = forceModCoord_ij * dr_ij[1]; + f_ij[2] = forceModCoord_ij * dr_ij[2]; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[i][0] += f_ij[0]; + f[i][1] += f_ij[1]; + f[i][2] += f_ij[2]; + + // potential energy + + evdwl = 0.0; + if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0, + forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairEDIPOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairEDIP::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h new file mode 100644 index 0000000000..55e10c83bb --- /dev/null +++ b/src/USER-OMP/pair_edip_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(edip/omp,PairEDIPOMP) + +#else + +#ifndef LMP_PAIR_EDIP_OMP_H +#define LMP_PAIR_EDIP_OMP_H + +#include "pair_edip.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairEDIPOMP : public PairEDIP, public ThrOMP { + + public: + PairEDIPOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp new file mode 100644 index 0000000000..ad0909f242 --- /dev/null +++ b/src/USER-OMP/pair_eim_omp.cpp @@ -0,0 +1,355 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_eim_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairEIMOMP::PairEIMOMP(LAMMPS *lmp) : + PairEIM(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairEIMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + memory->destroy(rho); + memory->destroy(fp); + nmax = atom->nmax; + memory->create(rho,nthreads*nmax,"pair:rho"); + memory->create(fp,nthreads*nmax,"pair:fp"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (force->newton_pair) + thr->init_eim(nall, rho, fp); + else + thr->init_eim(atom->nlocal, rho, fp); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairEIMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,p,rhoip,rhojp,phip,phi,coul,coulp,recip,psip; + double *coeff; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const rho_t = thr->get_rho(); + double * const fp_t = thr->get_fp(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // rho = density at each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq[itype][jtype]) { + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = Fij_spline[type2Fij[itype][jtype]][m]; + rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + if (NEWTON_PAIR || j < nlocal) { + coeff = Fij_spline[type2Fij[jtype][itype]][m]; + rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum densities + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(rho, nall, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 1; + comm->reverse_comm_pair(this); + } + + } else { + data_reduce_thr(rho, nlocal, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 1; + comm->forward_comm_pair(this); + } + + // wait until master is finished communicating + sync_threads(); + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq[itype][jtype]) { + p = sqrt(rsq)*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + coeff = Gij_spline[type2Gij[itype][jtype]][m]; + fp_t[i] += rho[j]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]); + if (NEWTON_PAIR || j < nlocal) { + fp_t[j] += rho[i]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + + coeff[6]); + } + } + } + } + + // wait until all threads are done with computation + sync_threads(); + + // communicate and sum modified densities + if (NEWTON_PAIR) { + // reduce per thread density + data_reduce_thr(fp, nall, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 2; + comm->reverse_comm_pair(this); + } + + } else { + data_reduce_thr(fp, nlocal, nthreads, 1, tid); + + // wait until reduction is complete + sync_threads(); + } + +#if defined(_OPENMP) +#pragma omp master +#endif + { + rhofp = 2; + comm->forward_comm_pair(this); + } + + // wait until master is finished communicating + sync_threads(); + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + if (EFLAG) { + phi = 0.5*rho[i]*fp[i]; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); + } + } + + // compute forces on each atom + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq[itype][jtype]) { + r = sqrt(rsq); + p = r*rdr + 1.0; + m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + + coeff = Fij_spline[type2Fij[jtype][itype]][m]; + rhoip = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = Fij_spline[type2Fij[itype][jtype]][m]; + rhojp = (coeff[0]*p + coeff[1])*p + coeff[2]; + coeff = phiij_spline[type2phiij[itype][jtype]][m]; + phip = (coeff[0]*p + coeff[1])*p + coeff[2]; + phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coeff = Gij_spline[type2Gij[itype][jtype]][m]; + coul = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]; + coulp = (coeff[0]*p + coeff[1])*p + coeff[2]; + psip = phip + (rho[i]*rho[j]-q0[itype]*q0[jtype])*coulp + + fp[i]*rhojp + fp[j]*rhoip; + recip = 1.0/r; + fpair = -psip*recip; + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul; + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairEIMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairEIM::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h new file mode 100644 index 0000000000..c63222d7fb --- /dev/null +++ b/src/USER-OMP/pair_eim_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eim/omp,PairEIMOMP) + +#else + +#ifndef LMP_PAIR_EIM_OMP_H +#define LMP_PAIR_EIM_OMP_H + +#include "pair_eim.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairEIMOMP : public PairEIM, public ThrOMP { + + public: + PairEIMOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int iifrom, int iito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gauss_cut_omp.cpp b/src/USER-OMP/pair_gauss_cut_omp.cpp new file mode 100644 index 0000000000..cb54785627 --- /dev/null +++ b/src/USER-OMP/pair_gauss_cut_omp.cpp @@ -0,0 +1,158 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gauss_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGaussCutOMP::PairGaussCutOMP(LAMMPS *lmp) : + PairGaussCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGaussCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairGaussCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,rexp,ugauss,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + rexp = (r-rmh[itype][jtype])/sigmah[itype][jtype]; + ugauss = pgauss[itype][jtype]*exp(-0.5*rexp*rexp); + fpair = factor_lj*rexp/r*ugauss/sigmah[itype][jtype]; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = ugauss - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGaussCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGaussCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gauss_cut_omp.h b/src/USER-OMP/pair_gauss_cut_omp.h new file mode 100644 index 0000000000..2da2c14fdb --- /dev/null +++ b/src/USER-OMP/pair_gauss_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gauss/cut/omp,PairGaussCutOMP) + +#else + +#ifndef LMP_PAIR_GAUSS_CUT_OMP_H +#define LMP_PAIR_GAUSS_CUT_OMP_H + +#include "pair_gauss_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGaussCutOMP : public PairGaussCut, public ThrOMP { + + public: + PairGaussCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gauss_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp new file mode 100644 index 0000000000..ae8af0d774 --- /dev/null +++ b/src/USER-OMP/pair_gauss_omp.cpp @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gauss_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EPSILON 1.0e-10 +/* ---------------------------------------------------------------------- */ + +PairGaussOMP::PairGaussOMP(LAMMPS *lmp) : + PairGauss(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGaussOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + double occ = 0.0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr); + else occ = eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr); + else occ = eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr); + else occ = eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region + + if (eflag_global) pvector[0] = occ; +} + +template +double PairGaussOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,forcelj,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + int occ = 0; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + // define a Gaussian well to be occupied if + // the site it interacts with is within the force maximum + + if (EFLAG) + if (eflag_global && rsq < 0.5/b[itype][jtype]) occ++; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq * + exp(-b[itype][jtype]*rsq); + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) - + offset[itype][jtype]); + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + return occ; +} + +/* ---------------------------------------------------------------------- */ + +double PairGaussOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGauss::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h new file mode 100644 index 0000000000..a65a98730a --- /dev/null +++ b/src/USER-OMP/pair_gauss_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gauss/omp,PairGaussOMP) + +#else + +#ifndef LMP_PAIR_GAUSS_OMP_H +#define LMP_PAIR_GAUSS_OMP_H + +#include "pair_gauss.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGaussOMP : public PairGauss, public ThrOMP { + + public: + PairGaussOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + double eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp new file mode 100644 index 0000000000..98fa5428a2 --- /dev/null +++ b/src/USER-OMP/pair_gayberne_omp.cpp @@ -0,0 +1,231 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gayberne_omp.h" +#include "math_extra.h" +#include "atom.h" +#include "comm.h" +#include "atom_vec_ellipsoid.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) : + PairGayBerne(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGayBerneOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairGayBerneOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; + double fforce[3],ttor[3],rtor[3],r12[3]; + double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3]; + int *ilist,*jlist,*numneigh,**firstneigh; + double *iquat,*jquat; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const tor = thr->get_torque(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + const int * const ellipsoid = atom->ellipsoid; + + AtomVecEllipsoid::Bonus *bonus = avec->bonus; + + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + if (form[itype][itype] == ELLIPSE_ELLIPSE) { + iquat = bonus[ellipsoid[i]].quat; + MathExtra::quat_to_mat_trans(iquat,a1); + MathExtra::diag_times3(well[itype],a1,temp); + MathExtra::transpose_times3(a1,temp,b1); + MathExtra::diag_times3(shape2[itype],a1,temp); + MathExtra::transpose_times3(a1,temp,g1); + } + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + // r12 = center to center vector + + r12[0] = x[j][0]-x[i][0]; + r12[1] = x[j][1]-x[i][1]; + r12[2] = x[j][2]-x[i][2]; + rsq = MathExtra::dot3(r12,r12); + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + + switch (form[itype][jtype]) { + case SPHERE_SPHERE: + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= -r2inv; + if (EFLAG) + one_eng = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + fforce[0] = r12[0]*forcelj; + fforce[1] = r12[1]*forcelj; + fforce[2] = r12[2]*forcelj; + ttor[0] = ttor[1] = ttor[2] = 0.0; + rtor[0] = rtor[1] = rtor[2] = 0.0; + break; + + case SPHERE_ELLIPSE: + jquat = bonus[ellipsoid[j]].quat; + MathExtra::quat_to_mat_trans(jquat,a2); + MathExtra::diag_times3(well[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,b2); + MathExtra::diag_times3(shape2[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,g2); + one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor); + ttor[0] = ttor[1] = ttor[2] = 0.0; + break; + + case ELLIPSE_SPHERE: + one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor); + rtor[0] = rtor[1] = rtor[2] = 0.0; + break; + + default: + jquat = bonus[ellipsoid[j]].quat; + MathExtra::quat_to_mat_trans(jquat,a2); + MathExtra::diag_times3(well[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,b2); + MathExtra::diag_times3(shape2[jtype],a2,temp); + MathExtra::transpose_times3(a2,temp,g2); + one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq, + fforce,ttor,rtor); + break; + } + + fforce[0] *= factor_lj; + fforce[1] *= factor_lj; + fforce[2] *= factor_lj; + ttor[0] *= factor_lj; + ttor[1] *= factor_lj; + ttor[2] *= factor_lj; + + fxtmp += fforce[0]; + fytmp += fforce[1]; + fztmp += fforce[2]; + t1tmp += ttor[0]; + t2tmp += ttor[1]; + t3tmp += ttor[2]; + + if (NEWTON_PAIR || j < nlocal) { + rtor[0] *= factor_lj; + rtor[1] *= factor_lj; + rtor[2] *= factor_lj; + f[j][0] -= fforce[0]; + f[j][1] -= fforce[1]; + f[j][2] -= fforce[2]; + tor[j][0] += rtor[0]; + tor[j][1] += rtor[1]; + tor[j][2] += rtor[2]; + } + + if (EFLAG) evdwl = factor_lj*one_eng; + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fforce[0],fforce[1],fforce[2], + -r12[0],-r12[1],-r12[2],thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + tor[i][0] += t1tmp; + tor[i][1] += t2tmp; + tor[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGayBerneOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGayBerne::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h new file mode 100644 index 0000000000..0e7b6708ff --- /dev/null +++ b/src/USER-OMP/pair_gayberne_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gayberne/omp,PairGayBerneOMP) + +#else + +#ifndef LMP_PAIR_GAYBERNE_OMP_H +#define LMP_PAIR_GAYBERNE_OMP_H + +#include "pair_gayberne.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGayBerneOMP : public PairGayBerne, public ThrOMP { + + public: + PairGayBerneOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp new file mode 100644 index 0000000000..1b49c2dfb9 --- /dev/null +++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp @@ -0,0 +1,313 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gran_hertz_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "fix.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) : + PairGranHertzHistory(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGranHertzHistoryOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + computeflag = 1; + const int shearupdate = (update->setupflag) ? 0 : 1; + + // update body ptr and values for ghost atoms if using FixRigid masses + + if (fix_rigid && neighbor->ago == 0) { + int tmp; + body = (int *) fix_rigid->extract("body",tmp); + mass_rigid = (double *) fix_rigid->extract("masstotal",tmp); + comm->forward_comm_pair(this); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) + if (shearupdate) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); + else + if (shearupdate) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairGranHertzHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double radi,radj,radsum,rsq,r,rinv,rsqinv; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + double mi,mj,meff,damp,ccel,tor1,tor2,tor3; + double fn,fs,fs1,fs2,fs3; + double shrmag,rsht,polyhertz; + int *ilist,*jlist,*numneigh,**firstneigh; + int *touch,**firsttouch; + double *shear,*allshear,**firstshear; + + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + double t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + firsttouch = list->listgranhistory->firstneigh; + firstshear = list->listgranhistory->firstdouble; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + touch = firsttouch[i]; + allshear = firstshear[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + radsum = radi + radj; + + if (rsq >= radsum*radsum) { + + // unset non-touching neighbors + + touch[jj] = 0; + shear = &allshear[3*jj]; + shear[0] = 0.0; + shear[1] = 0.0; + shear[2] = 0.0; + + } else { + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vn1 = delx*vnnr * rsqinv; + vn2 = dely*vnnr * rsqinv; + vn3 = delz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; + wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; + wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; + + // meff = effective mass of pair of particles + // if I or J part of rigid body, use body mass + // if I or J is frozen, meff is other particle + + if (rmass) { + mi = rmass[i]; + mj = rmass[j]; + } else { + mi = mass[type[i]]; + mj = mass[type[j]]; + } + if (fix_rigid) { + if (body[i] >= 0) mi = mass_rigid[body[i]]; + if (body[j] >= 0) mj = mass_rigid[body[j]]; + } + + meff = mi*mj / (mi+mj); + if (mask[i] & freeze_group_bit) meff = mj; + if (mask[j] & freeze_group_bit) meff = mi; + + // normal force = Hertzian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radsum-r)*rinv - damp; + polyhertz = sqrt((radsum-r)*radi*radj / radsum); + ccel *= polyhertz; + + // relative velocities + + vtr1 = vt1 - (delz*wr2-dely*wr3); + vtr2 = vt2 - (delx*wr3-delz*wr1); + vtr3 = vt3 - (dely*wr1-delx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + touch[jj] = 1; + shear = &allshear[3*jj]; + + if (SHEARUPDATE) { + shear[0] += vtr1*dt; + shear[1] += vtr2*dt; + shear[2] += vtr3*dt; + } + shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + + shear[2]*shear[2]); + + // rotate shear displacements + + rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + shear[0] -= rsht*delx; + shear[1] -= rsht*dely; + shear[2] -= rsht*delz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = -polyhertz * (kt*shear[0] + meff*gammat*vtr1); + fs2 = -polyhertz * (kt*shear[1] + meff*gammat*vtr2); + fs3 = -polyhertz * (kt*shear[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + const double fnfs = fn/fs; + const double mgkt = meff*gammat/kt; + shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1; + shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2; + shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3; + fs1 *= fnfs; + fs2 *= fnfs; + fs3 *= fnfs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = delx*ccel + fs1; + fy = dely*ccel + fs2; + fz = delz*ccel + fs3; + fxtmp += fx; + fytmp += fy; + fztmp += fz; + + tor1 = rinv * (dely*fs3 - delz*fs2); + tor2 = rinv * (delz*fs1 - delx*fs3); + tor3 = rinv * (delx*fs2 - dely*fs1); + t1tmp -= radi*tor1; + t2tmp -= radi*tor2; + t3tmp -= radi*tor3; + + if (j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] -= radj*tor1; + torque[j][1] -= radj*tor2; + torque[j][2] -= radj*tor3; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); + + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGranHertzHistoryOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGranHertzHistory::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h new file mode 100644 index 0000000000..1a8d819920 --- /dev/null +++ b/src/USER-OMP/pair_gran_hertz_history_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hertz/history/omp,PairGranHertzHistoryOMP) + +#else + +#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_OMP_H +#define LMP_PAIR_GRAN_HERTZ_HISTORY_OMP_H + +#include "pair_gran_hertz_history.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP { + + public: + PairGranHertzHistoryOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp new file mode 100644 index 0000000000..6362228b41 --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -0,0 +1,317 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gran_hooke_history_omp.h" +#include "atom.h" +#include "comm.h" +#include "fix.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include "string.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) : + PairGranHookeHistory(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + // trigger use of OpenMP version of FixShearHistory + suffix = new char[4]; + memcpy(suffix,"omp",4); +} + +/* ---------------------------------------------------------------------- */ + +void PairGranHookeHistoryOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + computeflag = 1; + const int shearupdate = (update->setupflag) ? 0 : 1; + + // update body ptr and values for ghost atoms if using FixRigid masses + + if (fix_rigid && neighbor->ago == 0) { + int tmp; + body = (int *) fix_rigid->extract("body",tmp); + mass_rigid = (double *) fix_rigid->extract("masstotal",tmp); + comm->forward_comm_pair(this); + } + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) + if (shearupdate) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); + else + if (shearupdate) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double myshear[3]; + double radi,radj,radsum,rsq,r,rinv,rsqinv; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + double mi,mj,meff,damp,ccel,tor1,tor2,tor3; + double fn,fs,fs1,fs2,fs3; + double shrmag,rsht; + int *ilist,*jlist,*numneigh,**firstneigh; + int *touch,**firsttouch; + double *allshear,**firstshear; + + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + double t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + firsttouch = listgranhistory->firstneigh; + firstshear = listgranhistory->firstdouble; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + touch = firsttouch[i]; + allshear = firstshear[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + radsum = radi + radj; + + if (rsq >= radsum*radsum) { + + // unset non-touching neighbors + + touch[jj] = 0; + myshear[0] = 0.0; + myshear[1] = 0.0; + myshear[2] = 0.0; + + } else { + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vn1 = delx*vnnr * rsqinv; + vn2 = dely*vnnr * rsqinv; + vn3 = delz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; + wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; + wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; + + // meff = effective mass of pair of particles + // if I or J part of rigid body, use body mass + // if I or J is frozen, meff is other particle + + if (rmass) { + mi = rmass[i]; + mj = rmass[j]; + } else { + mi = mass[type[i]]; + mj = mass[type[j]]; + } + if (fix_rigid) { + if (body[i] >= 0) mi = mass_rigid[body[i]]; + if (body[j] >= 0) mj = mass_rigid[body[j]]; + } + + meff = mi*mj / (mi+mj); + if (mask[i] & freeze_group_bit) meff = mj; + if (mask[j] & freeze_group_bit) meff = mi; + + // normal forces = Hookian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radsum-r)*rinv - damp; + + // relative velocities + + vtr1 = vt1 - (delz*wr2-dely*wr3); + vtr2 = vt2 - (delx*wr3-delz*wr1); + vtr3 = vt3 - (dely*wr1-delx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // shear history effects + + touch[jj] = 1; + memcpy(myshear,allshear + 3*jj, 3*sizeof(double)); + + if (SHEARUPDATE) { + myshear[0] += vtr1*dt; + myshear[1] += vtr2*dt; + myshear[2] += vtr3*dt; + } + shrmag = sqrt(myshear[0]*myshear[0] + myshear[1]*myshear[1] + + myshear[2]*myshear[2]); + + // rotate shear displacements + + rsht = myshear[0]*delx + myshear[1]*dely + myshear[2]*delz; + rsht *= rsqinv; + if (SHEARUPDATE) { + myshear[0] -= rsht*delx; + myshear[1] -= rsht*dely; + myshear[2] -= rsht*delz; + } + + // tangential forces = shear + tangential velocity damping + + fs1 = - (kt*myshear[0] + meff*gammat*vtr1); + fs2 = - (kt*myshear[1] + meff*gammat*vtr2); + fs3 = - (kt*myshear[2] + meff*gammat*vtr3); + + // rescale frictional displacements and forces if needed + + fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); + fn = xmu * fabs(ccel*r); + + if (fs > fn) { + if (shrmag != 0.0) { + const double fnfs = fn/fs; + const double mgkt = meff*gammat/kt; + myshear[0] = fnfs * (myshear[0] + mgkt*vtr1) - mgkt*vtr1; + myshear[1] = fnfs * (myshear[1] + mgkt*vtr2) - mgkt*vtr2; + myshear[2] = fnfs * (myshear[2] + mgkt*vtr3) - mgkt*vtr3; + fs1 *= fnfs; + fs2 *= fnfs; + fs3 *= fnfs; + } else fs1 = fs2 = fs3 = 0.0; + } + + // forces & torques + + fx = delx*ccel + fs1; + fy = dely*ccel + fs2; + fz = delz*ccel + fs3; + fxtmp += fx; + fytmp += fy; + fztmp += fz; + + tor1 = rinv * (dely*fs3 - delz*fs2); + tor2 = rinv * (delz*fs1 - delx*fs3); + tor3 = rinv * (delx*fs2 - dely*fs1); + t1tmp -= radi*tor1; + t2tmp -= radi*tor2; + t3tmp -= radi*tor3; + + if (j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] -= radj*tor1; + torque[j][1] -= radj*tor2; + torque[j][2] -= radj*tor3; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); + + } + memcpy(allshear + 3*jj, myshear, 3*sizeof(double)); + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGranHookeHistoryOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGranHookeHistory::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h new file mode 100644 index 0000000000..9cdebf7104 --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_history_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hooke/history/omp,PairGranHookeHistoryOMP) + +#else + +#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_OMP_H +#define LMP_PAIR_GRAN_HOOKE_HISTORY_OMP_H + +#include "pair_gran_hooke_history.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP { + + public: + PairGranHookeHistoryOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp new file mode 100644 index 0000000000..a1a4b1fe03 --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -0,0 +1,256 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gran_hooke_omp.h" +#include "atom.h" +#include "comm.h" +#include "fix.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) : + PairGranHooke(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGranHookeOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // update body ptr and values for ghost atoms if using FixRigid masses + + if (fix_rigid && neighbor->ago == 0) { + int tmp; + body = (int *) fix_rigid->extract("body",tmp); + mass_rigid = (double *) fix_rigid->extract("masstotal",tmp); + comm->forward_comm_pair(this); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) + if (force->newton_pair) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); + else + if (force->newton_pair) eval<0,1>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; + double radi,radj,radsum,rsq,r,rinv,rsqinv; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; + double wr1,wr2,wr3; + double vtr1,vtr2,vtr3,vrel; + double mi,mj,meff,damp,ccel,tor1,tor2,tor3; + double fn,fs,ft,fs1,fs2,fs3; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + const double * const * const v = atom->v; + const double * const * const omega = atom->omega; + const double * const radius = atom->radius; + const double * const rmass = atom->rmass; + const double * const mass = atom->mass; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const type = atom->type; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + double t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + radsum = radi + radj; + + if (rsq < radsum*radsum) { + r = sqrt(rsq); + rinv = 1.0/r; + rsqinv = 1.0/rsq; + + // relative translational velocity + + vr1 = v[i][0] - v[j][0]; + vr2 = v[i][1] - v[j][1]; + vr3 = v[i][2] - v[j][2]; + + // normal component + + vnnr = vr1*delx + vr2*dely + vr3*delz; + vn1 = delx*vnnr * rsqinv; + vn2 = dely*vnnr * rsqinv; + vn3 = delz*vnnr * rsqinv; + + // tangential component + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // relative rotational velocity + + wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; + wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; + wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; + + // meff = effective mass of pair of particles + // if I or J part of rigid body, use body mass + // if I or J is frozen, meff is other particle + + if (rmass) { + mi = rmass[i]; + mj = rmass[j]; + } else { + mi = mass[type[i]]; + mj = mass[type[j]]; + } + if (fix_rigid) { + if (body[i] >= 0) mi = mass_rigid[body[i]]; + if (body[j] >= 0) mj = mass_rigid[body[j]]; + } + + meff = mi*mj / (mi+mj); + if (mask[i] & freeze_group_bit) meff = mj; + if (mask[j] & freeze_group_bit) meff = mi; + + // normal forces = Hookian contact + normal velocity damping + + damp = meff*gamman*vnnr*rsqinv; + ccel = kn*(radsum-r)*rinv - damp; + + // relative velocities + + vtr1 = vt1 - (delz*wr2-dely*wr3); + vtr2 = vt2 - (delx*wr3-delz*wr1); + vtr3 = vt3 - (dely*wr1-delx*wr2); + vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; + vrel = sqrt(vrel); + + // force normalization + + fn = xmu * fabs(ccel*r); + fs = meff*gammat*vrel; + if (vrel != 0.0) ft = MIN(fn,fs) / vrel; + else ft = 0.0; + + // tangential force due to tangential velocity damping + + fs1 = -ft*vtr1; + fs2 = -ft*vtr2; + fs3 = -ft*vtr3; + + // forces & torques + + fx = delx*ccel + fs1; + fy = dely*ccel + fs2; + fz = delz*ccel + fs3; + fxtmp += fx; + fytmp += fy; + fztmp += fz; + + tor1 = rinv * (dely*fs3 - delz*fs2); + tor2 = rinv * (delz*fs1 - delx*fs3); + tor3 = rinv * (delx*fs2 - dely*fs1); + t1tmp -= radi*tor1; + t2tmp -= radi*tor2; + t3tmp -= radi*tor3; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fx; + f[j][1] -= fy; + f[j][2] -= fz; + torque[j][0] -= radj*tor1; + torque[j][1] -= radj*tor2; + torque[j][2] -= radj*tor3; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); + + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGranHookeOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGranHooke::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h new file mode 100644 index 0000000000..81f831fcea --- /dev/null +++ b/src/USER-OMP/pair_gran_hooke_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gran/hooke/omp,PairGranHookeOMP) + +#else + +#ifndef LMP_PAIR_GRAN_HOOKE_OMP_H +#define LMP_PAIR_GRAN_HOOKE_OMP_H + +#include "pair_gran_hooke.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGranHookeOMP : public PairGranHooke, public ThrOMP { + + public: + PairGranHookeOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp new file mode 100644 index 0000000000..8e4b8a7a90 --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp @@ -0,0 +1,297 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_hbond_dreiding_lj_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) : + PairHbondDreidingLJ(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + hbcount_thr = hbeng_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingLJOMP::~PairHbondDreidingLJOMP() +{ + if (hbcount_thr) { + delete[] hbcount_thr; + delete[] hbeng_thr; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairHbondDreidingLJOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!hbcount_thr) { + hbcount_thr = new double[nthreads]; + hbeng_thr = new double[nthreads]; + } + + for (int i=0; i < nthreads; ++i) { + hbcount_thr[i] = 0.0; + hbeng_thr[i] = 0.0; + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region + + // reduce per thread hbond data + if (eflag_global) { + pvector[0] = 0.0; + pvector[1] = 0.0; + for (int i=0; i < nthreads; ++i) { + pvector[0] += hbcount_thr[i]; + pvector[1] += hbeng_thr[i]; + } + } +} + +template +void PairHbondDreidingLJOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; + double factor_hb,force_angle,force_kernel,evdwl,eng_lj; + double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; + double fi[3],fj[3],delr1[3],delr2[3]; + double r2inv,r10inv; + double switch1,switch2; + int *ilist,*jlist,*numneigh,**firstneigh; + Param *pm; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const double * const special_lj = force->special_lj; + const int * const * const nspecial = atom->nspecial; + const int * const * const special = atom->special; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + + // ii = loop over donors + // jj = loop over acceptors + // kk = loop over hydrogens bonded to donor + + int hbcount = 0; + double hbeng = 0.0; + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + if (!donor[itype]) continue; + + const int * const klist = special[i]; + const int knum = nspecial[i][0]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_hb = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + jtype = type[j]; + if (!acceptor[jtype]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + for (kk = 0; kk < knum; kk++) { + k = atom->map(klist[kk]); + if (k < 0) continue; + ktype = type[k]; + m = type2param[itype][jtype][ktype]; + if (m < 0) continue; + pm = ¶ms[m]; + + if (rsq < pm->cut_outersq) { + delr1[0] = xtmp - x[k][0]; + delr1[1] = ytmp - x[k][1]; + delr1[2] = ztmp - x[k][2]; + domain->minimum_image(delr1); + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + r1 = sqrt(rsq1); + + delr2[0] = x[j][0] - x[k][0]; + delr2[1] = x[j][1] - x[k][1]; + delr2[2] = x[j][2] - x[k][2]; + domain->minimum_image(delr2); + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + ac = acos(c); + + if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) { + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // LJ-specific kernel + + r2inv = 1.0/rsq; + r10inv = r2inv*r2inv*r2inv*r2inv*r2inv; + force_kernel = r10inv*(pm->lj1*r2inv - pm->lj2)*r2inv * + pow(c,(double)pm->ap); + force_angle = pm->ap * r10inv*(pm->lj3*r2inv - pm->lj4) * + pow(c,pm->ap-1.0)*s; + + eng_lj = r10inv*(pm->lj3*r2inv - pm->lj4); + if (rsq > pm->cut_innersq) { + switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * + (pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) / + pm->denom_vdw; + switch2 = 12.0*rsq * (pm->cut_outersq-rsq) * + (rsq-pm->cut_innersq) / pm->denom_vdw; + force_kernel = force_kernel*switch1 + eng_lj*switch2; + eng_lj *= switch1; + } + + if (EFLAG) { + evdwl = eng_lj * pow(c,(double)pm->ap); + evdwl *= factor_hb; + } + + a = factor_hb*force_angle/s; + b = factor_hb*force_kernel; + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + vx1 = a11*delr1[0] + a12*delr2[0]; + vx2 = a22*delr2[0] + a12*delr1[0]; + vy1 = a11*delr1[1] + a12*delr2[1]; + vy2 = a22*delr2[1] + a12*delr1[1]; + vz1 = a11*delr1[2] + a12*delr2[2]; + vz2 = a22*delr2[2] + a12*delr1[2]; + + fi[0] = vx1 + b*delx; + fi[1] = vy1 + b*dely; + fi[2] = vz1 + b*delz; + fj[0] = vx2 - b*delx; + fj[1] = vy2 - b*dely; + fj[2] = vz2 - b*delz; + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + + f[k][0] -= vx1 + vx2; + f[k][1] -= vy1 + vy2; + f[k][2] -= vz1 + vz2; + + // KIJ instead of IJK b/c delr1/delr2 are both with respect to k + + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr); + if (EFLAG) { + hbcount++; + hbeng += evdwl; + } + } + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + const int tid = thr->get_tid(); + hbcount_thr[tid] = static_cast(hbcount); + hbeng_thr[tid] = hbeng; +} + +/* ---------------------------------------------------------------------- */ + +double PairHbondDreidingLJOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += comm->nthreads * 2 * sizeof(double); + bytes += PairHbondDreidingLJ::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h new file mode 100644 index 0000000000..dc5108b3d7 --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(hbond/dreiding/lj/omp,PairHbondDreidingLJOMP) + +#else + +#ifndef LMP_PAIR_HBOND_DREIDING_LJ_OMP_H +#define LMP_PAIR_HBOND_DREIDING_LJ_OMP_H + +#include "pair_hbond_dreiding_lj.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP { + + public: + PairHbondDreidingLJOMP(class LAMMPS *); + virtual ~PairHbondDreidingLJOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + double *hbcount_thr, *hbeng_thr; + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp new file mode 100644 index 0000000000..3dccd98dbf --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp @@ -0,0 +1,295 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_hbond_dreiding_morse_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) : + PairHbondDreidingMorse(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + hbcount_thr = hbeng_thr = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairHbondDreidingMorseOMP::~PairHbondDreidingMorseOMP() +{ + if (hbcount_thr) { + delete[] hbcount_thr; + delete[] hbeng_thr; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairHbondDreidingMorseOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + if (!hbcount_thr) { + hbcount_thr = new double[nthreads]; + hbeng_thr = new double[nthreads]; + } + + for (int i=0; i < nthreads; ++i) { + hbcount_thr[i] = 0.0; + hbeng_thr[i] = 0.0; + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region + + // reduce per thread hbond data + if (eflag_global) { + pvector[0] = 0.0; + pvector[1] = 0.0; + for (int i=0; i < nthreads; ++i) { + pvector[0] += hbcount_thr[i]; + pvector[1] += hbeng_thr[i]; + } + } +} + +template +void PairHbondDreidingMorseOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,m,ii,jj,kk,jnum,itype,jtype,ktype; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2; + double factor_hb,force_angle,force_kernel,evdwl; + double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2; + double fi[3],fj[3],delr1[3],delr2[3]; + double r,dr,dexp,eng_morse,switch1,switch2; + int *ilist,*jlist,*numneigh,**firstneigh; + Param *pm; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const double * const special_lj = force->special_lj; + const int * const * const nspecial = atom->nspecial; + const int * const * const special = atom->special; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + + // ii = loop over donors + // jj = loop over acceptors + // kk = loop over hydrogens bonded to donor + + int hbcount = 0; + double hbeng = 0.0; + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + if (!donor[itype]) continue; + + const int * const klist = special[i]; + const int knum = nspecial[i][0]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_hb = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + jtype = type[j]; + if (!acceptor[jtype]) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + for (kk = 0; kk < knum; kk++) { + k = atom->map(klist[kk]); + if (k < 0) continue; + ktype = type[k]; + m = type2param[itype][jtype][ktype]; + if (m < 0) continue; + pm = ¶ms[m]; + + if (rsq < pm->cut_outersq) { + delr1[0] = xtmp - x[k][0]; + delr1[1] = ytmp - x[k][1]; + delr1[2] = ztmp - x[k][2]; + domain->minimum_image(delr1); + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + r1 = sqrt(rsq1); + + delr2[0] = x[j][0] - x[k][0]; + delr2[1] = x[j][1] - x[k][1]; + delr2[2] = x[j][2] - x[k][2]; + domain->minimum_image(delr2); + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + ac = acos(c); + + if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) { + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // Morse-specific kernel + + r = sqrt(rsq); + dr = r - pm->r0; + dexp = exp(-pm->alpha * dr); + force_kernel = pm->morse1*(dexp*dexp - dexp)/r * pow(c,(double)pm->ap); + force_angle = pm->ap * eng_morse * pow(c,(double)pm->ap-1.0)*s; + + eng_morse = pm->d0 * (dexp*dexp - 2.0*dexp); + if (rsq > pm->cut_innersq) { + switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * + (pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) / + pm->denom_vdw; + switch2 = 12.0*rsq * (pm->cut_outersq-rsq) * + (rsq-pm->cut_innersq) / pm->denom_vdw; + force_kernel = force_kernel*switch1 + eng_morse*switch2; + eng_morse *= switch1; + } + + if (EFLAG) { + evdwl = eng_morse * pow(c,(double)params[m].ap); + evdwl *= factor_hb; + } + + a = factor_hb*force_angle/s; + b = factor_hb*force_kernel; + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + vx1 = a11*delr1[0] + a12*delr2[0]; + vx2 = a22*delr2[0] + a12*delr1[0]; + vy1 = a11*delr1[1] + a12*delr2[1]; + vy2 = a22*delr2[1] + a12*delr1[1]; + vz1 = a11*delr1[2] + a12*delr2[2]; + vz2 = a22*delr2[2] + a12*delr1[2]; + + fi[0] = vx1 + b*delx; + fi[1] = vy1 + b*dely; + fi[2] = vz1 + b*delz; + fj[0] = vx2 - b*delx; + fj[1] = vy2 - b*dely; + fj[2] = vz2 - b*delz; + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + + f[k][0] -= vx1 + vx2; + f[k][1] -= vy1 + vy2; + f[k][2] -= vz1 + vz2; + + // KIJ instead of IJK b/c delr1/delr2 are both with respect to k + + if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,thr); + if (EFLAG) { + hbcount++; + hbeng += evdwl; + } + } + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + const int tid = thr->get_tid(); + hbcount_thr[tid] = static_cast(hbcount); + hbeng_thr[tid] = hbeng; +} + +/* ---------------------------------------------------------------------- */ + +double PairHbondDreidingMorseOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += comm->nthreads * 2 * sizeof(double); + bytes += PairHbondDreidingMorse::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h new file mode 100644 index 0000000000..5a8aaf2c5a --- /dev/null +++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(hbond/dreiding/morse/omp,PairHbondDreidingMorseOMP) + +#else + +#ifndef LMP_PAIR_HBOND_DREIDING_MORSE_OMP_H +#define LMP_PAIR_HBOND_DREIDING_MORSE_OMP_H + +#include "pair_hbond_dreiding_morse.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP { + + public: + PairHbondDreidingMorseOMP(class LAMMPS *); + virtual ~PairHbondDreidingMorseOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + double *hbcount_thr, *hbeng_thr; + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_line_lj_omp.cpp b/src/USER-OMP/pair_line_lj_omp.cpp new file mode 100644 index 0000000000..27c630166e --- /dev/null +++ b/src/USER-OMP/pair_line_lj_omp.cpp @@ -0,0 +1,341 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_line_lj_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLineLJOMP::PairLineLJOMP(LAMMPS *lmp) : + PairLineLJ(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLineLJOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + const int * const line = atom->line; + const int * const type = atom->type; + + // grow discrete list if necessary and initialize + + if (nall > nmax) { + nmax = nall; + memory->destroy(dnum); + memory->destroy(dfirst); + memory->create(dnum,nall,"pair:dnum"); + memory->create(dfirst,nall,"pair:dfirst"); + } + memset(dnum,0,nall*sizeof(int)); + ndiscrete = 0; + + // need to discretize the system ahead of time + // until we find a good way to multi-thread it. + for (int i = 0; i < nall; ++i) + if (line[i] >= 0) + if (dnum[i] == 0) + discretize(i,sigma[type[i]][type[i]]); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLineLJOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + int ni,nj,npi,npj,ifirst,jfirst; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj; + double xi[2],xj[2],fi[2],fj[2],dxi,dxj,dyi,dyj,ti,tj; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const line = atom->line; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq >= cutsq[itype][jtype]) continue; + + // line/line interactions = NxN particles + + evdwl = 0.0; + if (line[i] >= 0 && line[j] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + rsq = delx*delx + dely*dely; + + sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + ti += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + tj += fpair*(dxj*dely - dyj*delx); + } + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + torque[i][2] += ti; + torque[j][2] += tj; + + // line/particle interaction = Nx1 particles + // convert line into Np particles based on sigma and line length + + } else if (line[i] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + + fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xj[0] = x[j][0]; + xj[1] = x[j][1]; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + rsq = delx*delx + dely*dely; + + sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + ti += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + tj += fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + torque[i][2] += ti; + torque[j][2] += tj; + + // particle/line interaction = Nx1 particles + // convert line into Np particles based on sigma and line length + + } else if (line[j] >= 0) { + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + rsq = delx*delx + dely*dely; + + sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + ti += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + tj -= fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + torque[i][2] += ti; + torque[j][2] += tj; + + // particle/particle interaction = 1x1 particles + + } else { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = forcelj*r2inv; + + if (EFLAG) + evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLineLJOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLineLJ::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_line_lj_omp.h b/src/USER-OMP/pair_line_lj_omp.h new file mode 100644 index 0000000000..9a5e4333e0 --- /dev/null +++ b/src/USER-OMP/pair_line_lj_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(line/lj/omp,PairLineLJOMP) + +#else + +#ifndef LMP_PAIR_LINE_LJ_OMP_H +#define LMP_PAIR_LINE_LJ_OMP_H + +#include "pair_line_lj.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLineLJOMP : public PairLineLJ, public ThrOMP { + + public: + PairLineLJOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj96_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp new file mode 100644 index 0000000000..16827c7ee1 --- /dev/null +++ b/src/USER-OMP/pair_lj96_cut_omp.cpp @@ -0,0 +1,161 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj96_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) : + PairLJ96Cut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJ96CutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJ96CutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r3inv = sqrt(r6inv); + + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) + - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJ96CutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJ96Cut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h new file mode 100644 index 0000000000..037c68fc03 --- /dev/null +++ b/src/USER-OMP/pair_lj96_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj96/cut/omp,PairLJ96CutOMP) + +#else + +#ifndef LMP_PAIR_LJ96_CUT_OMP_H +#define LMP_PAIR_LJ96_CUT_OMP_H + +#include "pair_lj96_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP { + + public: + PairLJ96CutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp new file mode 100644 index 0000000000..739f754a23 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp @@ -0,0 +1,210 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_charmm_coul_charmm_implicit_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) : + PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulCharmmImplicitOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double philj,switch1,switch2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + const double invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_bothsq) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + forcecoul = 2.0 * qqrd2e * qtmp*q[j]*r2inv; + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * invdenom_coul; + switch2 = 12.0*rsq * (cut_coulsq-rsq) * + (rsq-cut_coul_innersq) * invdenom_coul; + forcecoul *= switch1 + switch2; + } + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) * invdenom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = qqrd2e * qtmp*q[j]*r2inv; + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * + invdenom_coul; + ecoul *= switch1; + } + ecoul *= factor_coul; + } else ecoul = 0.0; + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCharmmCoulCharmmImplicitOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCharmmCoulCharmmImplicit::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h new file mode 100644 index 0000000000..9fb2835353 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/charmm/implicit/omp,PairLJCharmmCoulCharmmImplicitOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_OMP_H + +#include "pair_lj_charmm_coul_charmm_implicit.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit, public ThrOMP { + + public: + PairLJCharmmCoulCharmmImplicitOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp new file mode 100644 index 0000000000..318cb3aaa8 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp @@ -0,0 +1,210 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_charmm_coul_charmm_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) : + PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulCharmmOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double philj,switch1,switch2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + const double invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0; + const double invdenom_lj = (denom_lj != 0.0) ? 1.0/denom_lj : 0.0; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_bothsq) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * invdenom_coul; + switch2 = 12.0*rsq * (cut_coulsq-rsq) * + (rsq-cut_coul_innersq) * invdenom_coul; + forcecoul *= switch1 + switch2; + } + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) * invdenom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (rsq > cut_coul_innersq) { + switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) * + (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * + invdenom_coul; + ecoul *= switch1; + } + ecoul *= factor_coul; + } else ecoul = 0.0; + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCharmmCoulCharmmOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCharmmCoulCharmm::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h new file mode 100644 index 0000000000..9591d62c85 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/charmm/omp,PairLJCharmmCoulCharmmOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_OMP_H + +#include "pair_lj_charmm_coul_charmm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP { + + public: + PairLJCharmmCoulCharmmOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp new file mode 100644 index 0000000000..afdc527567 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp @@ -0,0 +1,223 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_charmm_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) : + PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + const double inv_denom_lj = 1.0/denom_lj; + + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + const int nlocal = atom->nlocal; + + // loop over neighbors of my atoms + + for (int ii = iifrom; ii < iito; ++ii) { + + const int i = ilist[ii]; + const int itype = type[i]; + const double qtmp = q[i]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + double fxtmp,fytmp,fztmp; + fxtmp=fytmp=fztmp=0.0; + + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + double forcecoul, forcelj, evdwl, ecoul; + forcecoul = forcelj = evdwl = ecoul = 0.0; + + const int sbindex = sbmask(jlist[jj]); + const int j = jlist[jj] & NEIGHMASK; + + const double delx = xtmp - x[j][0]; + const double dely = ytmp - x[j][1]; + const double delz = ztmp - x[j][2]; + const double rsq = delx*delx + dely*dely + delz*delz; + const int jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + const double r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + const double A1 = 0.254829592; + const double A2 = -0.284496736; + const double A3 = 1.421413741; + const double A4 = -1.453152027; + const double A5 = 1.061405429; + const double EWALD_F = 1.12837917; + const double INV_EWALD_P = 1.0/0.3275911; + + const double r = sqrt(rsq); + const double grij = g_ewald * r; + const double expm2 = exp(-grij*grij); + const double t = INV_EWALD_P / (INV_EWALD_P + grij); + const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const double prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (EFLAG) ecoul = prefactor*erfc; + if (sbindex) { + const double adjust = (1.0-special_coul[sbindex])*prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + const double table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (EFLAG) ecoul = qtmp*q[j] * (etable[itable] + fraction*detable[itable]); + if (sbindex) { + const double table2 = ctable[itable] + fraction*dctable[itable]; + const double prefactor = qtmp*q[j] * table2; + const double adjust = (1.0-special_coul[sbindex])*prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } + } + + if (rsq < cut_ljsq) { + const double r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (EFLAG) evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + + if (rsq > cut_lj_innersq) { + const double drsq = cut_ljsq - rsq; + const double cut2 = (rsq - cut_lj_innersq) * drsq; + const double switch1 = drsq * (drsq*drsq + 3.0*cut2) * inv_denom_lj; + const double switch2 = 12.0*rsq * cut2 * inv_denom_lj; + if (EFLAG) { + forcelj = forcelj*switch1 + evdwl*switch2; + evdwl *= switch1; + } else { + const double philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + } + + if (sbindex) { + const double factor_lj = special_lj[sbindex]; + forcelj *= factor_lj; + if (EFLAG) evdwl *= factor_lj; + } + + } + const double fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCharmmCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCharmmCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h new file mode 100644 index 0000000000..ba28d05114 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/long/omp,PairLJCharmmCoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_LONG_OMP_H + +#include "pair_lj_charmm_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP { + + public: + PairLJCharmmCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp new file mode 100644 index 0000000000..b5878dfe3d --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp @@ -0,0 +1,261 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_charmm_coul_pppm_omp.h" +#include "pppm_proxy.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulPPPMOMP::PairLJCharmmCoulPPPMOMP(LAMMPS *lmp) : + PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + nproxy=1; + + kspace = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulPPPMOMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/proxy") != 0) + error->all(FLERR,"kspace style pppm/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJCharmmCoulLong::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulPPPMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + kspace->compute_proxy(eflag,vflag); + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double philj,switch1,switch2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) / denom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCharmmCoulPPPMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCharmmCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h new file mode 100644 index 0000000000..835f40389f --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/pppm/omp,PairLJCharmmCoulPPPMOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H + +#include "pair_lj_charmm_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulPPPMOMP : public PairLJCharmmCoulLong, public ThrOMP { + + public: + PairLJCharmmCoulPPPMOMP(class LAMMPS *); + virtual ~PairLJCharmmCoulPPPMOMP() {}; + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + class PPPMProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp new file mode 100644 index 0000000000..361923cb54 --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp @@ -0,0 +1,183 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_class2_coul_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) : + PairLJClass2CoulCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJClass2CoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj; + double factor_coul,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + if (rsq < cut_coulsq[itype][jtype]) { + forcecoul = qqrd2e * qtmp*q[j]*rinv; + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJClass2CoulCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJClass2CoulCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h new file mode 100644 index 0000000000..f03da67a15 --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/coul/cut/omp,PairLJClass2CoulCutOMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_COUL_CUT_OMP_H +#define LMP_PAIR_LJ_CLASS2_COUL_CUT_OMP_H + +#include "pair_lj_class2_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP { + + public: + PairLJClass2CoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp new file mode 100644 index 0000000000..6049d7a98c --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp @@ -0,0 +1,199 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_class2_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) : + PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJClass2CoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double r,rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJClass2CoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJClass2CoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h new file mode 100644 index 0000000000..973850c25c --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/coul/long/omp,PairLJClass2CoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_CLASS2_COUL_LONG_OMP_H + +#include "pair_lj_class2_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP { + + public: + PairLJClass2CoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_class2_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_pppm_omp.cpp new file mode 100644 index 0000000000..1b0ca0ec7d --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_pppm_omp.cpp @@ -0,0 +1,228 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_class2_coul_pppm_omp.h" +#include "pppm_proxy.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJClass2CoulPPPMOMP::PairLJClass2CoulPPPMOMP(LAMMPS *lmp) : + PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + nproxy=1; + + kspace = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulPPPMOMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/proxy") != 0) + error->all(FLERR,"kspace style pppm/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJClass2CoulLong::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulPPPMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + kspace->compute_proxy(eflag,vflag); + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJClass2CoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double r,rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJClass2CoulPPPMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJClass2CoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_class2_coul_pppm_omp.h b/src/USER-OMP/pair_lj_class2_coul_pppm_omp.h new file mode 100644 index 0000000000..8a5def6bcc --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_coul_pppm_omp.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/coul/pppm/omp,PairLJClass2CoulPPPMOMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_COUL_PPPM_OMP_H +#define LMP_PAIR_LJ_CLASS2_COUL_PPPM_OMP_H + +#include "pair_lj_class2_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2CoulPPPMOMP : public PairLJClass2CoulLong, public ThrOMP { + + public: + PairLJClass2CoulPPPMOMP(class LAMMPS *); + virtual ~PairLJClass2CoulPPPMOMP() {}; + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + class PPPMProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_class2_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp new file mode 100644 index 0000000000..978335d21e --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_omp.cpp @@ -0,0 +1,160 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) : + PairLJClass2(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2OMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJClass2OMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r3inv = sqrt(r6inv); + + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) + - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJClass2OMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJClass2::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h new file mode 100644 index 0000000000..5acd2cfb39 --- /dev/null +++ b/src/USER-OMP/pair_lj_class2_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/class2/omp,PairLJClass2OMP) + +#else + +#ifndef LMP_PAIR_LJ_CLASS2_OMP_H +#define LMP_PAIR_LJ_CLASS2_OMP_H + +#include "pair_lj_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJClass2OMP : public PairLJClass2, public ThrOMP { + + public: + PairLJClass2OMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp new file mode 100644 index 0000000000..21fd9ff8cd --- /dev/null +++ b/src/USER-OMP/pair_lj_coul_omp.cpp @@ -0,0 +1,233 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_coul_omp.h" +#include "atom.h" +#include "comm.h" +#include "math_vector.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) : + PairLJCoul(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCoulOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCoulOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + double evdwl,ecoul,fpair; + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + + const double *x0 = x[0]; + double *f0 = f[0], *fi = f0; + + int *ilist = list->ilist; + + // loop over neighbors of my atoms + + int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); + int *jneigh, *jneighn, typei, typej, ni; + double qi, qri, *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + double rsq, r2inv, force_coul, force_lj; + double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2; + vector xi, d; + + for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + if (order1) qri = (qi = q[i])*qqrd2e; // initialize constants + offseti = offset[typei = type[i]]; + lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; + cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + + for (; jneigh= cutsqi[typej = type[j]]) continue; + r2inv = 1.0/rsq; + + if (order1 && (rsq < cut_coulsq)) { // coulombic + if (!ncoultablebits || rsq <= tabinnersq) { // series real space + register double r = sqrt(rsq), x = g_ewald*r; + register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x); + if (ni == 0) { + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s; + if (EFLAG) ecoul = t; + } + else { // special case + r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r; + if (EFLAG) ecoul = t-r; + } + } // table real space + else { + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask)>>ncoulshiftbits; + register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; + if (ni == 0) { + force_coul = qiqj*(ftable[k]+f*dftable[k]); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]); + } + else { // special case + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); + } + } + } + else force_coul = ecoul = 0.0; + + if (rsq < cut_ljsqi[typej]) { // lj + if (order6) { // long-range lj + register double rn = r2inv*r2inv*r2inv; + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[typej]; + if (ni == 0) { + force_lj = + (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (EFLAG) + evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_lj = f*(rn *= rn)*lj1i[typej]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]; + if (EFLAG) + evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej]; + } + } + else { // cut lj + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + force_lj = rn*(rn*lj1i[typej]-lj2i[typej]); + if (EFLAG) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]; + } + else { // special case + register double f = special_lj[ni]; + force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]); + if (EFLAG) + evdwl = f * (rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]); + } + } + } + else force_lj = evdwl = 0.0; + + fpair = (force_coul+force_lj)*r2inv; + + if (NEWTON_PAIR || j < nlocal) { + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } + else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,d[0],d[1],d[2],thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCoulOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCoul::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h new file mode 100644 index 0000000000..6e988126a3 --- /dev/null +++ b/src/USER-OMP/pair_lj_coul_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/coul/omp,PairLJCoulOMP) + +#else + +#ifndef LMP_PAIR_LJ_COUL_OMP_H +#define LMP_PAIR_LJ_COUL_OMP_H + +#include "pair_lj_coul.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCoulOMP : public PairLJCoul, public ThrOMP { + + public: + PairLJCoulOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cubic_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp new file mode 100644 index 0000000000..bf4b2b2bc8 --- /dev/null +++ b/src/USER-OMP/pair_lj_cubic_omp.cpp @@ -0,0 +1,171 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cubic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace PairLJCubicConstants; + +/* ---------------------------------------------------------------------- */ + +PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) : + PairLJCubic(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCubicOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJCubicOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,rmin; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + if (rsq <= cut_inner_sq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + } else { + r = sqrt(rsq); + rmin = sigma[itype][jtype]*RT6TWO; + t = (r - cut_inner[itype][jtype])/rmin; + forcelj = epsilon[itype][jtype]*(-DPHIDS + A3*t*t/2.0)*r/rmin; + } + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq <= cut_inner_sq[itype][jtype]) + evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + else + evdwl = epsilon[itype][jtype]* + (PHIS + DPHIDS*t - A3*t*t*t/6.0); + + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCubicOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCubic::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h new file mode 100644 index 0000000000..78b298bb35 --- /dev/null +++ b/src/USER-OMP/pair_lj_cubic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cubic/omp,PairLJCubicOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUBIC_OMP_H +#define LMP_PAIR_LJ_CUBIC_OMP_H + +#include "pair_lj_cubic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCubicOMP : public PairLJCubic, public ThrOMP { + + public: + PairLJCubicOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp new file mode 100644 index 0000000000..6c1bdedef4 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp @@ -0,0 +1,181 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) : + PairLJCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rinv,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq[itype][jtype]) { + rinv = sqrt(r2inv); + forcecoul = qqrd2e * qtmp*q[j]*rinv; + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h new file mode 100644 index 0000000000..0d5de5b7b7 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/cut/omp,PairLJCutCoulCutOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_CUT_OMP_H + +#include "pair_lj_cut_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP { + + public: + PairLJCutCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp new file mode 100644 index 0000000000..81a73d78b1 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp @@ -0,0 +1,184 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_debye_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) : + PairLJCutCoulDebye(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulDebyeOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double r,rinv,screening; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq[itype][jtype]) { + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv); + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulDebyeOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulDebye::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h new file mode 100644 index 0000000000..b327fc665f --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/debye/omp,PairLJCutCoulDebyeOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_DEBYE_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_DEBYE_OMP_H + +#include "pair_lj_cut_coul_debye.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP { + + public: + PairLJCutCoulDebyeOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp new file mode 100644 index 0000000000..19639ac731 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp @@ -0,0 +1,219 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) : + PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h new file mode 100644 index 0000000000..cea3271af0 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/long/omp,PairLJCutCoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_LONG_OMP_H + +#include "pair_lj_cut_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP { + + public: + PairLJCutCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp new file mode 100644 index 0000000000..640fa836b5 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp @@ -0,0 +1,513 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_long_tip4p_omp.h" +#include "atom.h" +#include "domain.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "error.h" +#include "memory.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) : + PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + + // TIP4P cannot compute virial as F dot r + // due to finding bonded H atoms which are not near O atom + + no_virial_fdotr_compute = 1; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + // reallocate hneigh & newsite if necessary + // initialize hneigh[0] to -1 on steps when reneighboring occurred + // initialize hneigh[2] to 0 every step + + if (atom->nmax > nmax) { + nmax = atom->nmax; + memory->destroy(hneigh); + memory->create(hneigh,nmax,3,"pair:hneigh"); + memory->destroy(newsite); + memory->create(newsite,nmax,3,"pair:newsite"); + } + + // XXX: this could be threaded, too. + int i; + if (neighbor->ago == 0) + for (i = 0; i < nall; i++) hneigh[i][0] = -1; + for (i = 0; i < nall; i++) hneigh[i][2] = 0; + + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (vflag) eval<1,1,1,1>(ifrom, ito, thr); + else eval<1,1,1,0>(ifrom, ito, thr); + } else { + if (vflag) eval<1,1,0,1>(ifrom, ito, thr); + else eval<1,1,0,0>(ifrom, ito, thr); + } + } else eval<1,0,0,0>(ifrom, ito, thr); + } else { + if (evflag) { + if (eflag) { + if (vflag) eval<0,1,1,1>(ifrom, ito, thr); + else eval<0,1,1,0>(ifrom, ito, thr); + } else { + if (vflag) eval<0,1,0,1>(ifrom, ito, thr); + else eval<0,1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulLongTIP4POMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + int n,vlist[6]; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double fraction,table; + double delxOM,delyOM,delzOM; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc,ddotf; + double v[6],xH1[3],xH2[3]; + double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz; + const double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + const double cut_coulsqplus = (cut_coul+2.0*qdist) * (cut_coul+2.0*qdist); + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + + // if atom I = water O, set x1 = offset charge site + // else x1 = x of atom I + // NOTE: to make this part thread safe, we need to + // make sure that the hneigh[][] entries only get + // updated, when all data is in place. worst case, + // some calculation is repeated, but since the results + // will be the same, there is no race condition. + if (itype == typeO) { + if (hneigh[i][0] < 0) { + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + if (iH1 == -1 || iH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite_thr(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][2] = 1; + hneigh[i][1] = iH2; + hneigh[i][0] = iH1; + } else { + iH1 = hneigh[i][0]; + iH2 = hneigh[i][1]; + if (hneigh[i][2] == 0) { + compute_newsite_thr(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][2] = 1; + } + } + x1 = newsite[i]; + } else x1 = x[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + // LJ interaction based on true rsq + + if (rsq < cut_ljsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, + evdwl,0.0,forcelj,delx,dely,delz,thr); + } + + // adjust rsq and delxyz for off-site O charge(s) if necessary + // but only if they are within reach + // NOTE: to make this part thread safe, we need to + // make sure that the hneigh[][] entries only get + // updated, when all data is in place. worst case, + // some calculation is repeated, but since the results + // will be the same, there is no race condition. + if (rsq < cut_coulsqplus) { + if (itype == typeO || jtype == typeO) { + + // if atom J = water O, set x2 = offset charge site + // else x2 = x of atom J + + if (jtype == typeO) { + if (hneigh[j][0] < 0) { + jH1 = atom->map(atom->tag[j] + 1); + jH2 = atom->map(atom->tag[j] + 2); + if (jH1 == -1 || jH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite_thr(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][2] = 1; + hneigh[j][1] = jH2; + hneigh[j][0] = jH1; + } else { + jH1 = hneigh[j][0]; + jH2 = hneigh[j][1]; + if (hneigh[j][2] == 0) { + compute_newsite_thr(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][2] = 1; + } + } + x2 = newsite[j]; + } else x2 = x[j]; + + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // Coulombic interaction based on modified rsq + + if (rsq < cut_coulsq) { + r2inv = 1 / rsq; + if (CTABLE || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + cforce = forcecoul * r2inv; + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + if (VFLAG) { + v[0] = x[i][0] * delx * cforce; + v[1] = x[i][1] * dely * cforce; + v[2] = x[i][2] * delz * cforce; + v[3] = x[i][0] * dely * cforce; + v[4] = x[i][0] * delz * cforce; + v[5] = x[i][1] * delz * cforce; + vlist[n++] = i; + } + + } else { + + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; + + delxOM = x[i][0] - x1[0]; + delyOM = x[i][1] - x1[1]; + delzOM = x[i][2] - x1[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; + + f[iH1][0] += fHx; + f[iH1][1] += fHy; + f[iH1][2] += fHz; + + f[iH2][0] += fHx; + f[iH2][1] += fHy; + f[iH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[i],x[iH1],xH1); + domain->closest_image(x[i],x[iH2],xH2); + + v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (VFLAG) { + v[0] -= x[j][0] * delx * cforce; + v[1] -= x[j][1] * dely * cforce; + v[2] -= x[j][2] * delz * cforce; + v[3] -= x[j][0] * dely * cforce; + v[4] -= x[j][0] * delz * cforce; + v[5] -= x[j][1] * delz * cforce; + vlist[n++] = j; + } + + } else { + + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; + + delxOM = x[j][0] - x2[0]; + delyOM = x[j][1] - x2[1]; + delzOM = x[j][2] - x2[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + f[j][0] += fOx; + f[j][1] += fOy; + f[j][2] += fOz; + + f[jH1][0] += fHx; + f[jH1][1] += fHy; + f[jH1][2] += fHz; + + f[jH2][0] += fHx; + f[jH2][1] += fHy; + f[jH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[j],x[jH1],xH1); + domain->closest_image(x[j],x[jH2],xH2); + + v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; + } + } + + if (EFLAG) { + if (CTABLE || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr); + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + compute position xM of fictitious charge site for O atom and 2 H atoms + return it as xM +------------------------------------------------------------------------- */ + +void PairLJCutCoulLongTIP4POMP::compute_newsite_thr(const double * xO, + const double * xH1, + const double * xH2, + double * xM) const +{ + double delx1 = xH1[0] - xO[0]; + double dely1 = xH1[1] - xO[1]; + double delz1 = xH1[2] - xO[2]; + domain->minimum_image(delx1,dely1,delz1); + + double delx2 = xH2[0] - xO[0]; + double dely2 = xH2[1] - xO[1]; + double delz2 = xH2[2] - xO[2]; + domain->minimum_image(delx2,dely2,delz2); + + const double prefac = alpha * 0.5; + xM[0] = xO[0] + prefac * (delx1 + delx2); + xM[1] = xO[1] + prefac * (dely1 + dely2); + xM[2] = xO[2] + prefac * (delz1 + delz2); +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulLongTIP4POMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLongTIP4P::memory_usage(); + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h new file mode 100644 index 0000000000..abb9c46ba8 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h @@ -0,0 +1,51 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/long/tip4p/omp,PairLJCutCoulLongTIP4POMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_TIP4P_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_LONG_TIP4P_OMP_H + +#include "pair_lj_cut_coul_long_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { + + public: + PairLJCutCoulLongTIP4POMP(class LAMMPS *); + virtual ~PairLJCutCoulLongTIP4POMP() {}; + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + void compute_newsite_thr(const double *, const double *, + const double *, double *) const; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp new file mode 100644 index 0000000000..093f8d0216 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp @@ -0,0 +1,246 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_pppm_omp.h" +#include "pppm_proxy.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulPPPMOMP::PairLJCutCoulPPPMOMP(LAMMPS *lmp) : + PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + nproxy=1; + + kspace = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMOMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/proxy") != 0) + error->all(FLERR,"kspace style pppm/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJCutCoulLong::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + kspace->compute_proxy(eflag,vflag); + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulPPPMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h new file mode 100644 index 0000000000..092996a21f --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/pppm/omp,PairLJCutCoulPPPMOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H + +#include "pair_lj_cut_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulPPPMOMP : public PairLJCutCoulLong, public ThrOMP { + + public: + PairLJCutCoulPPPMOMP(class LAMMPS *); + virtual ~PairLJCutCoulPPPMOMP() {}; + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + class PPPMProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp new file mode 100644 index 0000000000..ebdd8eeab7 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp @@ -0,0 +1,542 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_pppm_tip4p_omp.h" +#include "pppm_tip4p_proxy.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulPPPMTIP4POMP::PairLJCutCoulPPPMTIP4POMP(LAMMPS *lmp) : + PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + nproxy=1; + + kspace = NULL; + + // TIP4P cannot compute virial as F dot r + // due to finding bonded H atoms which are not near O atom + + no_virial_fdotr_compute = 1; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMTIP4POMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/tip4p/proxy") != 0) + error->all(FLERR,"kspace style pppm/tip4p/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJCutCoulLongTIP4P::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMTIP4POMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + // reallocate hneigh & newsite if necessary + // initialize hneigh[0] to -1 on steps when reneighboring occurred + // initialize hneigh[2] to 0 every step + + if (atom->nmax > nmax) { + nmax = atom->nmax; + memory->destroy(hneigh); + memory->create(hneigh,nmax,3,"pair:hneigh"); + memory->destroy(newsite); + memory->create(newsite,nmax,3,"pair:newsite"); + } + + // XXX: this could be threaded, too. + int i; + if (neighbor->ago == 0) + for (i = 0; i < nall; i++) hneigh[i][0] = -1; + for (i = 0; i < nall; i++) hneigh[i][2] = 0; + + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + kspace->compute_proxy(eflag,vflag); + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (vflag) eval<1,1,1,1>(ifrom, ito, thr); + else eval<1,1,1,0>(ifrom, ito, thr); + } else { + if (vflag) eval<1,1,0,1>(ifrom, ito, thr); + else eval<1,1,0,0>(ifrom, ito, thr); + } + } else eval<1,0,0,0>(ifrom, ito, thr); + } else { + if (evflag) { + if (eflag) { + if (vflag) eval<0,1,1,1>(ifrom, ito, thr); + else eval<0,1,1,0>(ifrom, ito, thr); + } else { + if (vflag) eval<0,1,0,1>(ifrom, ito, thr); + else eval<0,1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulPPPMTIP4POMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + int n,vlist[6]; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double fraction,table; + double delxOM,delyOM,delzOM; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc,ddotf; + double v[6],xH1[3],xH2[3]; + double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz; + const double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + const double cut_coulsqplus = (cut_coul+2.0*qdist) * (cut_coul+2.0*qdist); + + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + + // if atom I = water O, set x1 = offset charge site + // else x1 = x of atom I + // NOTE: to make this part thread safe, we need to + // make sure that the hneigh[][] entries only get + // updated, when all data is in place. worst case, + // some calculation is repeated, but since the results + // will be the same, there is no race condition. + if (itype == typeO) { + if (hneigh[i][0] < 0) { + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + if (iH1 == -1 || iH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite_thr(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][2] = 1; + hneigh[i][1] = iH2; + hneigh[i][0] = iH1; + } else { + iH1 = hneigh[i][0]; + iH2 = hneigh[i][1]; + if (hneigh[i][2] == 0) { + compute_newsite_thr(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][2] = 1; + } + } + x1 = newsite[i]; + } else x1 = x[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + // LJ interaction based on true rsq + + if (rsq < cut_ljsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, + evdwl,0.0,forcelj,delx,dely,delz,thr); + } + + // adjust rsq and delxyz for off-site O charge(s) if necessary + // but only if they are within reach + // NOTE: to make this part thread safe, we need to + // make sure that the hneigh[][] entries only get + // updated, when all data is in place. worst case, + // some calculation is repeated, but since the results + // will be the same, there is no race condition. + if (rsq < cut_coulsqplus) { + if (itype == typeO || jtype == typeO) { + + // if atom J = water O, set x2 = offset charge site + // else x2 = x of atom J + + if (jtype == typeO) { + if (hneigh[j][0] < 0) { + jH1 = atom->map(atom->tag[j] + 1); + jH2 = atom->map(atom->tag[j] + 2); + if (jH1 == -1 || jH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite_thr(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][2] = 1; + hneigh[j][1] = jH2; + hneigh[j][0] = jH1; + } else { + jH1 = hneigh[j][0]; + jH2 = hneigh[j][1]; + if (hneigh[j][2] == 0) { + compute_newsite_thr(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][2] = 1; + } + } + x2 = newsite[j]; + } else x2 = x[j]; + + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // Coulombic interaction based on modified rsq + + if (rsq < cut_coulsq) { + r2inv = 1 / rsq; + if (CTABLE || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + cforce = forcecoul * r2inv; + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + if (VFLAG) { + v[0] = x[i][0] * delx * cforce; + v[1] = x[i][1] * dely * cforce; + v[2] = x[i][2] * delz * cforce; + v[3] = x[i][0] * dely * cforce; + v[4] = x[i][0] * delz * cforce; + v[5] = x[i][1] * delz * cforce; + vlist[n++] = i; + } + + } else { + + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; + + delxOM = x[i][0] - x1[0]; + delyOM = x[i][1] - x1[1]; + delzOM = x[i][2] - x1[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; + + f[iH1][0] += fHx; + f[iH1][1] += fHy; + f[iH1][2] += fHz; + + f[iH2][0] += fHx; + f[iH2][1] += fHy; + f[iH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[i],x[iH1],xH1); + domain->closest_image(x[i],x[iH2],xH2); + + v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (VFLAG) { + v[0] -= x[j][0] * delx * cforce; + v[1] -= x[j][1] * dely * cforce; + v[2] -= x[j][2] * delz * cforce; + v[3] -= x[j][0] * dely * cforce; + v[4] -= x[j][0] * delz * cforce; + v[5] -= x[j][1] * delz * cforce; + vlist[n++] = j; + } + + } else { + + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; + + delxOM = x[j][0] - x2[0]; + delyOM = x[j][1] - x2[1]; + delzOM = x[j][2] - x2[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + f[j][0] += fOx; + f[j][1] += fOy; + f[j][2] += fOz; + + f[jH1][0] += fHx; + f[jH1][1] += fHy; + f[jH1][2] += fHz; + + f[jH2][0] += fHx; + f[jH2][1] += fHy; + f[jH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[j],x[jH1],xH1); + domain->closest_image(x[j],x[jH2],xH2); + + v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; + } + } + + if (EFLAG) { + if (CTABLE || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr); + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + compute position xM of fictitious charge site for O atom and 2 H atoms + return it as xM +------------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMTIP4POMP::compute_newsite_thr(const double * xO, + const double * xH1, + const double * xH2, + double * xM) const +{ + double delx1 = xH1[0] - xO[0]; + double dely1 = xH1[1] - xO[1]; + double delz1 = xH1[2] - xO[2]; + domain->minimum_image(delx1,dely1,delz1); + + double delx2 = xH2[0] - xO[0]; + double dely2 = xH2[1] - xO[1]; + double delz2 = xH2[2] - xO[2]; + domain->minimum_image(delx2,dely2,delz2); + + const double prefac = alpha * 0.5; + xM[0] = xO[0] + prefac * (delx1 + delx2); + xM[1] = xO[1] + prefac * (dely1 + dely2); + xM[2] = xO[2] + prefac * (delz1 + delz2); +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulPPPMTIP4POMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLongTIP4P::memory_usage(); + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h new file mode 100644 index 0000000000..b149020925 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/pppm/tip4p/omp,PairLJCutCoulPPPMTIP4POMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H + +#include "pair_lj_cut_coul_long_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulPPPMTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { + + public: + PairLJCutCoulPPPMTIP4POMP(class LAMMPS *); + virtual ~PairLJCutCoulPPPMTIP4POMP() {}; + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + void compute_newsite_thr(const double *, const double *, + const double *, double *) const; + + class PPPMTIP4PProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp new file mode 100644 index 0000000000..83e5244add --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_omp.cpp @@ -0,0 +1,159 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) : + PairLJCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_omp.h b/src/USER-OMP/pair_lj_cut_omp.h new file mode 100644 index 0000000000..419b0693b1 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/omp,PairLJCutOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_OMP_H +#define LMP_PAIR_LJ_CUT_OMP_H + +#include "pair_lj_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutOMP : public PairLJCut, public ThrOMP { + + public: + PairLJCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_expand_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp new file mode 100644 index 0000000000..936c55c827 --- /dev/null +++ b/src/USER-OMP/pair_lj_expand_omp.cpp @@ -0,0 +1,162 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_expand_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) : + PairLJExpand(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJExpandOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJExpandOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,rshift,rshiftsq; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + rshift = r - shift[itype][jtype]; + rshiftsq = rshift*rshift; + r2inv = 1.0/rshiftsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj/rshift/r; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJExpandOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJExpand::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h new file mode 100644 index 0000000000..8ca3bd41b0 --- /dev/null +++ b/src/USER-OMP/pair_lj_expand_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/expand/omp,PairLJExpandOMP) + +#else + +#ifndef LMP_PAIR_LJ_EXPAND_OMP_H +#define LMP_PAIR_LJ_EXPAND_OMP_H + +#include "pair_lj_expand.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJExpandOMP : public PairLJExpand, public ThrOMP { + + public: + PairLJExpandOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp new file mode 100644 index 0000000000..4df4d334e7 --- /dev/null +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp @@ -0,0 +1,208 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_gromacs_coul_gromacs_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) : + PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJGromacsCoulGromacsOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double r,tlj,tc,fswitch,fswitchcoul,eswitch,ecoulswitch; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + // skip if qi or qj = 0.0 since this potential may be used as + // coarse-grain model with many uncharged atoms + + if (rsq < cut_coulsq && qtmp != 0.0 && q[j] != 0.0) { + forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (rsq > cut_coul_innersq) { + r = sqrt(rsq); + tc = r - cut_coul_inner; + fswitchcoul = qqrd2e * qtmp*q[j]*r*tc*tc*(coulsw1 + coulsw2*tc); + forcecoul += fswitchcoul; + } + forcecoul *= factor_coul; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + r = sqrt(rsq); + tlj = r - cut_lj_inner; + fswitch = r*tlj*tlj*(ljsw1[itype][jtype] + + ljsw2[itype][jtype]*tlj); + forcelj += fswitch; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + ecoul = qqrd2e * qtmp*q[j] * (sqrt(r2inv) - coulsw5); + if (rsq > cut_coul_innersq) { + ecoulswitch = tc*tc*tc * (coulsw3 + coulsw4*tc); + ecoul += qqrd2e*qtmp*q[j]*ecoulswitch; + } + ecoul *= factor_coul; + } else ecoul = 0.0; + if (rsq < cut_ljsq) { + evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + evdwl += ljsw5[itype][jtype]; + if (rsq > cut_lj_innersq) { + eswitch = tlj*tlj*tlj * + (ljsw3[itype][jtype] + ljsw4[itype][jtype]*tlj); + evdwl += eswitch; + } + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJGromacsCoulGromacsOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJGromacsCoulGromacs::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h new file mode 100644 index 0000000000..353e4d673d --- /dev/null +++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/gromacs/coul/gromacs/omp,PairLJGromacsCoulGromacsOMP) + +#else + +#ifndef LMP_PAIR_LJ_GROMACS_COUL_GROMACS_OMP_H +#define LMP_PAIR_LJ_GROMACS_COUL_GROMACS_OMP_H + +#include "pair_lj_gromacs_coul_gromacs.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrOMP { + + public: + PairLJGromacsCoulGromacsOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_gromacs_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp new file mode 100644 index 0000000000..ce97ad2d1d --- /dev/null +++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_gromacs_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) : + PairLJGromacs(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJGromacsOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJGromacsOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,fswitch,eswitch; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_inner_sq[itype][jtype]) { + r = sqrt(rsq); + t = r - cut_inner[itype][jtype]; + fswitch = r*t*t*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*t); + forcelj += fswitch; + } + + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + evdwl += ljsw5[itype][jtype]; + if (rsq > cut_inner_sq[itype][jtype]) { + eswitch = t*t*t*(ljsw3[itype][jtype] + ljsw4[itype][jtype]*t); + evdwl += eswitch; + } + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJGromacsOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJGromacs::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h new file mode 100644 index 0000000000..3669d1b219 --- /dev/null +++ b/src/USER-OMP/pair_lj_gromacs_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/gromacs/omp,PairLJGromacsOMP) + +#else + +#ifndef LMP_PAIR_LJ_GROMACS_OMP_H +#define LMP_PAIR_LJ_GROMACS_OMP_H + +#include "pair_lj_gromacs.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP { + + public: + PairLJGromacsOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp new file mode 100644 index 0000000000..aa4e432a78 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp @@ -0,0 +1,235 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sdk_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "lj_sdk_common.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace LJSDKParms; +/* ---------------------------------------------------------------------- */ + +PairLJSDKCoulLongOMP::PairLJSDKCoulLongOMP(LAMMPS *lmp) : + PairLJSDKCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSDKCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr); + else eval_thr<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr); + else eval_thr<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr); + else eval_thr<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJSDKCoulLongOMP::eval_thr(int iifrom, int iito, ThrData * const thr) +{ + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + const int nlocal = atom->nlocal; + + // loop over neighbors of my atoms + + for (int ii = iifrom; ii < iito; ++ii) { + + const int i = ilist[ii]; + const int itype = type[i]; + const double qtmp = q[i]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + double fxtmp,fytmp,fztmp; + fxtmp=fytmp=fztmp=0.0; + + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + double forcecoul, forcelj, evdwl, ecoul; + forcecoul = forcelj = evdwl = ecoul = 0.0; + + const int sbindex = sbmask(jlist[jj]); + const int j = jlist[jj] & NEIGHMASK; + + const double delx = xtmp - x[j][0]; + const double dely = ytmp - x[j][1]; + const double delz = ztmp - x[j][2]; + const double rsq = delx*delx + dely*dely + delz*delz; + const int jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + const double r2inv = 1.0/rsq; + const int ljt = lj_type[itype][jtype]; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + const double A1 = 0.254829592; + const double A2 = -0.284496736; + const double A3 = 1.421413741; + const double A4 = -1.453152027; + const double A5 = 1.061405429; + const double EWALD_F = 1.12837917; + const double INV_EWALD_P = 1.0/0.3275911; + + const double r = sqrt(rsq); + const double grij = g_ewald * r; + const double expm2 = exp(-grij*grij); + const double t = INV_EWALD_P / (INV_EWALD_P + grij); + const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const double prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (EFLAG) ecoul = prefactor*erfc; + if (sbindex) { + const double adjust = (1.0-special_coul[sbindex])*prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits; + const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + const double table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (EFLAG) ecoul = qtmp*q[j] * (etable[itable] + fraction*detable[itable]); + if (sbindex) { + const double table2 = ctable[itable] + fraction*dctable[itable]; + const double prefactor = qtmp*q[j] * table2; + const double adjust = (1.0-special_coul[sbindex])*prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } + } + + if (rsq < cut_ljsq[itype][jtype]) { + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv + - lj2[itype][jtype]); + + if (EFLAG) + evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + forcelj = r6inv*(lj1[itype][jtype]*r3inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r3inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv*(lj1[itype][jtype]*r6inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r6inv + - lj4[itype][jtype]) - offset[itype][jtype]; + } + + if (sbindex) { + const double factor_lj = special_lj[sbindex]; + forcelj *= factor_lj; + if (EFLAG) evdwl *= factor_lj; + } + + } + + const double fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSDKCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSDKCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.h b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h new file mode 100644 index 0000000000..a615efb507 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sdk/coul/long/omp,PairLJSDKCoulLongOMP) +PairStyle(cg/cmm/coul/long/omp,PairLJSDKCoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H + +#include "pair_lj_sdk_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSDKCoulLongOMP : public PairLJSDKCoulLong, public ThrOMP { + + public: + PairLJSDKCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval_thr(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_sdk_omp.cpp b/src/USER-OMP/pair_lj_sdk_omp.cpp new file mode 100644 index 0000000000..63b1a8c171 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_omp.cpp @@ -0,0 +1,186 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) + This style is a simplified re-implementation of the CG/CMM pair style +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sdk_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "lj_sdk_common.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace LJSDKParms; + +/* ---------------------------------------------------------------------- */ + +PairLJSDKOMP::PairLJSDKOMP(LAMMPS *lmp) : + PairLJSDK(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSDKOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr); + else eval_thr<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr); + else eval_thr<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr); + else eval_thr<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJSDKOMP::eval_thr(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,forcelj,factor_lj; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp=fytmp=fztmp=0.0; + + const int itype = type[i]; + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + const int ljt = lj_type[itype][jtype]; + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv + - lj2[itype][jtype]); + + if (EFLAG) + evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + forcelj = r6inv*(lj1[itype][jtype]*r3inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r3inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv*(lj1[itype][jtype]*r6inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r6inv + - lj4[itype][jtype]) - offset[itype][jtype]; + } else continue; + + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl *= factor_lj; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSDKOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSDK::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sdk_omp.h b/src/USER-OMP/pair_lj_sdk_omp.h new file mode 100644 index 0000000000..c3837fb683 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sdk/omp,PairLJSDKOMP) +PairStyle(cg/cmm/omp,PairLJSDKOMP) + +#else + +#ifndef LMP_PAIR_LJ_SDK_OMP_H +#define LMP_PAIR_LJ_SDK_OMP_H + +#include "pair_lj_sdk.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSDKOMP : public PairLJSDK, public ThrOMP { + + public: + PairLJSDKOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval_thr(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp new file mode 100644 index 0000000000..5c14e29131 --- /dev/null +++ b/src/USER-OMP/pair_lj_sf_omp.cpp @@ -0,0 +1,161 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) : + PairLJShiftedForce(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJShiftedForceOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJShiftedForceOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double t,rsq,r2inv,r6inv,forcelj,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + t = sqrt(r2inv*cutsq[itype][jtype]); + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) - + t*foffset[itype][jtype]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + + (t-1.0)*foffset[itype][jtype] - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJShiftedForceOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJShiftedForce::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h new file mode 100644 index 0000000000..92db973b3d --- /dev/null +++ b/src/USER-OMP/pair_lj_sf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sf/omp,PairLJShiftedForceOMP) + +#else + +#ifndef LMP_PAIR_LJ_SF_OMP_H +#define LMP_PAIR_LJ_SF_OMP_H + +#include "pair_lj_sf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP { + + public: + PairLJShiftedForceOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_smooth_linear_omp.cpp b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp new file mode 100644 index 0000000000..a8f3ad8daa --- /dev/null +++ b/src/USER-OMP/pair_lj_smooth_linear_omp.cpp @@ -0,0 +1,163 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_smooth_linear_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJSmoothLinearOMP::PairLJSmoothLinearOMP(LAMMPS *lmp) : + PairLJSmoothLinear(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSmoothLinearOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJSmoothLinearOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,rinv; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + rinv = sqrt(r2inv); + forcelj = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype]); + forcelj = rinv*forcelj - dljcut[itype][jtype]; + + fpair = factor_lj*forcelj*rinv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + r = sqrt(rsq); + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + evdwl = evdwl - ljcut[itype][jtype] + + (r-cut[itype][jtype])*dljcut[itype][jtype]; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSmoothLinearOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSmoothLinear::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_smooth_linear_omp.h b/src/USER-OMP/pair_lj_smooth_linear_omp.h new file mode 100644 index 0000000000..940c0ea707 --- /dev/null +++ b/src/USER-OMP/pair_lj_smooth_linear_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/smooth/linear/omp,PairLJSmoothLinearOMP) + +#else + +#ifndef LMP_PAIR_LJ_SMOOTH_LINEAR_OMP_H +#define LMP_PAIR_LJ_SMOOTH_LINEAR_OMP_H + +#include "pair_lj_smooth_linear.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSmoothLinearOMP : public PairLJSmoothLinear, public ThrOMP { + + public: + PairLJSmoothLinearOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_smooth_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp new file mode 100644 index 0000000000..b1918c3fcd --- /dev/null +++ b/src/USER-OMP/pair_lj_smooth_omp.cpp @@ -0,0 +1,174 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_smooth_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) : + PairLJSmooth(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSmoothOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJSmoothOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,tsq,fskin; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + if (rsq < cut_inner_sq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv-lj2[itype][jtype]); + } else { + r = sqrt(rsq); + t = r - cut_inner[itype][jtype]; + tsq = t*t; + fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype]*t + + ljsw3[itype][jtype]*tsq + ljsw4[itype][jtype]*tsq*t; + forcelj = fskin*r; + } + + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_inner_sq[itype][jtype]) + evdwl = r6inv * (lj3[itype][jtype]*r6inv - + lj4[itype][jtype]) - offset[itype][jtype]; + else + evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype]*t - + ljsw2[itype][jtype]*tsq/2.0 - ljsw3[itype][jtype]*tsq*t/3.0 - + ljsw4[itype][jtype]*tsq*tsq/4.0 - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSmoothOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSmooth::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h new file mode 100644 index 0000000000..0b47a792a4 --- /dev/null +++ b/src/USER-OMP/pair_lj_smooth_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/smooth/omp,PairLJSmoothOMP) + +#else + +#ifndef LMP_PAIR_LJ_SMOOTH_OMP_H +#define LMP_PAIR_LJ_SMOOTH_OMP_H + +#include "pair_lj_smooth.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP { + + public: + PairLJSmoothOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lubricate_omp.cpp b/src/USER-OMP/pair_lubricate_omp.cpp new file mode 100644 index 0000000000..22090b2978 --- /dev/null +++ b/src/USER-OMP/pair_lubricate_omp.cpp @@ -0,0 +1,484 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lubricate_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "input.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" +#include "variable.h" +#include "random_mars.h" +#include "fix_wall.h" +#include "fix_deform.h" +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +// same as fix_deform.cpp + +enum{NO_REMAP,X_REMAP,V_REMAP}; + +// same as fix_wall.cpp + +enum{EDGE,CONSTANT,VARIABLE}; + +/* ---------------------------------------------------------------------- */ + +PairLubricateOMP::PairLubricateOMP(LAMMPS *lmp) : + PairLubricate(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +PairLubricateOMP::~PairLubricateOMP() +{} + +/* ---------------------------------------------------------------------- */ + +void PairLubricateOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + + // This section of code adjusts R0/RT0/RS0 if necessary due to changes + // in the volume fraction as a result of fix deform or moving walls + + double dims[3], wallcoord; + if (flagVF) // Flag for volume fraction corrections + if (flagdeform || flagwall == 2){ // Possible changes in volume fraction + if (flagdeform && !flagwall) + for (int j = 0; j < 3; j++) + dims[j] = domain->prd[j]; + else if (flagwall == 2 || (flagdeform && flagwall == 1)){ + double wallhi[3], walllo[3]; + for (int j = 0; j < 3; j++){ + wallhi[j] = domain->prd[j]; + walllo[j] = 0; + } + for (int m = 0; m < wallfix->nwall; m++){ + int dim = wallfix->wallwhich[m] / 2; + int side = wallfix->wallwhich[m] % 2; + if (wallfix->wallstyle[m] == VARIABLE){ + wallcoord = input->variable->compute_equal(wallfix->varindex[m]); + } + else wallcoord = wallfix->coord0[m]; + if (side == 0) walllo[dim] = wallcoord; + else wallhi[dim] = wallcoord; + } + for (int j = 0; j < 3; j++) + dims[j] = wallhi[j] - walllo[j]; + } + double vol_T = dims[0]*dims[1]*dims[2]; + double vol_f = vol_P/vol_T; + if (flaglog == 0) { + R0 = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f); + RT0 = 8*MY_PI*mu*pow(rad,3.0); + RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)* + (1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); + } else { + R0 = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); + RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); + RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)* + (1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); + } + } + + + // end of R0 adjustment code + + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (flaglog) { + if (evflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr); + else eval<0,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLubricateOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz; + double rsq,r,h_sep,radi; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3; + double vt1,vt2,vt3,wt1,wt2,wt3,wdotn; + double vRS0; + double vi[3],vj[3],wi[3],wj[3],xl[3]; + double a_sq,a_sh,a_pu; + int *ilist,*jlist,*numneigh,**firstneigh; + double lamda[3],vstream[3]; + + double vxmu2f = force->vxmu2f; + + double * const * const x = atom->x; + double * const * const v = atom->v; + double * const * const f = thr->get_f(); + double * const * const omega = atom->omega; + double * const * const torque = thr->get_torque(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + int overlaps = 0; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // subtract streaming component of velocity, omega, angmom + // assume fluid streaming velocity = box deformation rate + // vstream = (ux,uy,uz) + // ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z + // uy = h_rate[1]*y + h_rate[3]*z + // uz = h_rate[2]*z + // omega_new = omega - curl(vstream)/2 + // angmom_new = angmom - I*curl(vstream)/2 + // Ef = (grad(vstream) + (grad(vstream))^T) / 2 + + if (shearing) { + double *h_rate = domain->h_rate; + double *h_ratelo = domain->h_ratelo; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + radi = radius[i]; + + domain->x2lamda(x[i],lamda); + vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + + h_rate[4]*lamda[2] + h_ratelo[0]; + vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; + vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + v[i][0] -= vstream[0]; + v[i][1] -= vstream[1]; + v[i][2] -= vstream[2]; + + omega[i][0] += 0.5*h_rate[3]; + omega[i][1] -= 0.5*h_rate[4]; + omega[i][2] += 0.5*h_rate[5]; + } + + // set Ef from h_rate in strain units + + Ef[0][0] = h_rate[0]/domain->xprd; + Ef[1][1] = h_rate[1]/domain->yprd; + Ef[2][2] = h_rate[2]/domain->zprd; + Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd; + Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd; + Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd; + + // copy updated velocity/omega/angmom to the ghost particles + // no need to do this if not shearing since comm->ghost_velocity is set + + sync_threads(); + + // MPI communication only on master thread +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + sync_threads(); + } + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + radi = radius[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // angular velocity + + wi[0] = omega[i][0]; + wi[1] = omega[i][1]; + wi[2] = omega[i][2]; + + // FLD contribution to force and torque due to isotropic terms + // FLD contribution to stress from isotropic RS0 + + if (flagfld) { + f[i][0] -= vxmu2f*R0*v[i][0]; + f[i][1] -= vxmu2f*R0*v[i][1]; + f[i][2] -= vxmu2f*R0*v[i][2]; + torque[i][0] -= vxmu2f*RT0*wi[0]; + torque[i][1] -= vxmu2f*RT0*wi[1]; + torque[i][2] -= vxmu2f*RT0*wi[2]; + + if (shearing && vflag_either) { + vRS0 = -vxmu2f * RS0; + v_tally_tensor(i,i,nlocal,NEWTON_PAIR, + vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2], + vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]); + } + } + + if (!flagHI) continue; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + + // angular momentum = I*omega = 2/5 * M*R^2 * omega + + wj[0] = omega[j][0]; + wj[1] = omega[j][1]; + wj[2] = omega[j][2]; + + // xl = point of closest approach on particle i from its center + + xl[0] = -delx/r*radi; + xl[1] = -dely/r*radi; + xl[2] = -delz/r*radi; + + // velocity at the point of closest approach on both particles + // v = v + omega_cross_xl - Ef.xl + + // particle i + + vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]) + - (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]); + + vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]) + - (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]); + + vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]) + - (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]); + + // particle j + + vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1]) + + (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]); + + vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2]) + + (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]); + + vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0]) + + (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]); + + // scalar resistances XA and YA + + h_sep = r - 2.0*radi; + + // check for overlaps + + if (h_sep < 0.0) overlaps++; + + // if less than the minimum gap use the minimum gap instead + + if (r < cut_inner[itype][jtype]) + h_sep = cut_inner[itype][jtype] - 2.0*radi; + + // scale h_sep by radi + + h_sep = h_sep/radi; + + // scalar resistances + + if (FLAGLOG) { + a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep)); + a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep)); + a_pu = 8.0*MY_PI*mu*pow(radi,3.0)*(3.0/160.0*log(1.0/h_sep)); + } else + a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep); + + // relative velocity at the point of closest approach + // includes fluid velocity + + vr1 = vi[0] - vj[0]; + vr2 = vi[1] - vj[1]; + vr3 = vi[2] - vj[2]; + + // normal component (vr.n)n + + vnnr = (vr1*delx + vr2*dely + vr3*delz)/r; + vn1 = vnnr*delx/r; + vn2 = vnnr*dely/r; + vn3 = vnnr*delz/r; + + // tangential component vr - (vr.n)n + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // force due to squeeze type motion + + fx = a_sq*vn1; + fy = a_sq*vn2; + fz = a_sq*vn3; + + // force due to all shear kind of motions + + if (FLAGLOG) { + fx = fx + a_sh*vt1; + fy = fy + a_sh*vt2; + fz = fz + a_sh*vt3; + } + + // scale forces for appropriate units + + fx *= vxmu2f; + fy *= vxmu2f; + fz *= vxmu2f; + + // add to total force + + f[i][0] -= fx; + f[i][1] -= fy; + f[i][2] -= fz; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] += fx; + f[j][1] += fy; + f[j][2] += fz; + } + + // torque due to this force + + if (FLAGLOG) { + tx = xl[1]*fz - xl[2]*fy; + ty = xl[2]*fx - xl[0]*fz; + tz = xl[0]*fy - xl[1]*fx; + + torque[i][0] -= vxmu2f*tx; + torque[i][1] -= vxmu2f*ty; + torque[i][2] -= vxmu2f*tz; + + if (NEWTON_PAIR || j < nlocal) { + torque[j][0] -= vxmu2f*tx; + torque[j][1] -= vxmu2f*ty; + torque[j][2] -= vxmu2f*tz; + } + + // torque due to a_pu + + wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely + + (wi[2]-wj[2])*delz)/r; + wt1 = (wi[0]-wj[0]) - wdotn*delx/r; + wt2 = (wi[1]-wj[1]) - wdotn*dely/r; + wt3 = (wi[2]-wj[2]) - wdotn*delz/r; + + tx = a_pu*wt1; + ty = a_pu*wt2; + tz = a_pu*wt3; + + torque[i][0] -= vxmu2f*tx; + torque[i][1] -= vxmu2f*ty; + torque[i][2] -= vxmu2f*tz; + + if (NEWTON_PAIR || j < nlocal) { + torque[j][0] += vxmu2f*tx; + torque[j][1] += vxmu2f*ty; + torque[j][2] += vxmu2f*tz; + } + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,0.0,0.0, + -fx,-fy,-fz,delx,dely,delz,thr); + } + } + } + + // restore streaming component of velocity, omega, angmom + + if (shearing) { + double *h_rate = domain->h_rate; + double *h_ratelo = domain->h_ratelo; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + radi = radius[i]; + + domain->x2lamda(x[i],lamda); + vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + + h_rate[4]*lamda[2] + h_ratelo[0]; + vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; + vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + v[i][0] += vstream[0]; + v[i][1] += vstream[1]; + v[i][2] += vstream[2]; + + omega[i][0] -= 0.5*h_rate[3]; + omega[i][1] += 0.5*h_rate[4]; + omega[i][2] -= 0.5*h_rate[5]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLubricateOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLubricate::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lubricate_omp.h b/src/USER-OMP/pair_lubricate_omp.h new file mode 100644 index 0000000000..b35dc5d563 --- /dev/null +++ b/src/USER-OMP/pair_lubricate_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lubricate/omp,PairLubricateOMP) + +#else + +#ifndef LMP_PAIR_LUBRICATE_OMP_H +#define LMP_PAIR_LUBRICATE_OMP_H + +#include "pair_lubricate.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLubricateOMP : public PairLubricate, public ThrOMP { + + public: + PairLubricateOMP(class LAMMPS *); + virtual ~PairLubricateOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lubricate_poly_omp.cpp b/src/USER-OMP/pair_lubricate_poly_omp.cpp new file mode 100644 index 0000000000..26339b7749 --- /dev/null +++ b/src/USER-OMP/pair_lubricate_poly_omp.cpp @@ -0,0 +1,487 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lubricate_poly_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "input.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" +#include "variable.h" +#include "random_mars.h" +#include "fix_wall.h" +#include "fix_deform.h" +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +// same as fix_deform.cpp + +enum{NO_REMAP,X_REMAP,V_REMAP}; + + +// same as fix_wall.cpp + +enum{EDGE,CONSTANT,VARIABLE}; + +/* ---------------------------------------------------------------------- */ + +PairLubricatePolyOMP::PairLubricatePolyOMP(LAMMPS *lmp) : + PairLubricatePoly(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +PairLubricatePolyOMP::~PairLubricatePolyOMP() +{} + +/* ---------------------------------------------------------------------- */ + +void PairLubricatePolyOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + + // This section of code adjusts R0/RT0/RS0 if necessary due to changes + // in the volume fraction as a result of fix deform or moving walls + + double dims[3], wallcoord; + if (flagVF) // Flag for volume fraction corrections + if (flagdeform || flagwall == 2){ // Possible changes in volume fraction + if (flagdeform && !flagwall) + for (int j = 0; j < 3; j++) + dims[j] = domain->prd[j]; + else if (flagwall == 2 || (flagdeform && flagwall == 1)){ + double wallhi[3], walllo[3]; + for (int j = 0; j < 3; j++){ + wallhi[j] = domain->prd[j]; + walllo[j] = 0; + } + for (int m = 0; m < wallfix->nwall; m++){ + int dim = wallfix->wallwhich[m] / 2; + int side = wallfix->wallwhich[m] % 2; + if (wallfix->wallstyle[m] == VARIABLE){ + wallcoord = input->variable->compute_equal(wallfix->varindex[m]); + } + else wallcoord = wallfix->coord0[m]; + if (side == 0) walllo[dim] = wallcoord; + else wallhi[dim] = wallcoord; + } + for (int j = 0; j < 3; j++) + dims[j] = wallhi[j] - walllo[j]; + } + double vol_T = dims[0]*dims[1]*dims[2]; + double vol_f = vol_P/vol_T; + if (flaglog == 0) { + R0 = 6*MY_PI*mu*(1.0 + 2.16*vol_f); + RT0 = 8*MY_PI*mu; + RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f); + } else { + R0 = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f); + RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f); + RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f); + } + } + + // end of R0 adjustment code + + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (flaglog) { + if (shearing) { + if (evflag) + eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (evflag) + eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (shearing) { + if (evflag) + eval<0,1,1>(ifrom, ito, thr); + else eval<0,1,0>(ifrom, ito, thr); + } else { + if (evflag) + eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLubricatePolyOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz; + double rsq,r,h_sep,beta0,beta1,radi,radj; + double vr1,vr2,vr3,vnnr,vn1,vn2,vn3; + double vt1,vt2,vt3,wt1,wt2,wt3,wdotn; + double vRS0; + double vi[3],vj[3],wi[3],wj[3],xl[3],jl[3]; + double a_sq,a_sh,a_pu; + int *ilist,*jlist,*numneigh,**firstneigh; + double lamda[3],vstream[3]; + + double vxmu2f = force->vxmu2f; + + double * const * const x = atom->x; + double * const * const v = atom->v; + double * const * const f = thr->get_f(); + double * const * const omega = atom->omega; + double * const * const torque = thr->get_torque(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + int overlaps = 0; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // subtract streaming component of velocity, omega, angmom + // assume fluid streaming velocity = box deformation rate + // vstream = (ux,uy,uz) + // ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z + // uy = h_rate[1]*y + h_rate[3]*z + // uz = h_rate[2]*z + // omega_new = omega - curl(vstream)/2 + // angmom_new = angmom - I*curl(vstream)/2 + // Ef = (grad(vstream) + (grad(vstream))^T) / 2 + + if (shearing) { + double *h_rate = domain->h_rate; + double *h_ratelo = domain->h_ratelo; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + radi = radius[i]; + domain->x2lamda(x[i],lamda); + vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + + h_rate[4]*lamda[2] + h_ratelo[0]; + vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; + vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + v[i][0] -= vstream[0]; + v[i][1] -= vstream[1]; + v[i][2] -= vstream[2]; + + omega[i][0] += 0.5*h_rate[3]; + omega[i][1] -= 0.5*h_rate[4]; + omega[i][2] += 0.5*h_rate[5]; + } + + // set Ef from h_rate in strain units + + Ef[0][0] = h_rate[0]/domain->xprd; + Ef[1][1] = h_rate[1]/domain->yprd; + Ef[2][2] = h_rate[2]/domain->zprd; + Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd; + Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd; + Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd; + + // copy updated omega to the ghost particles + // no need to do this if not shearing since comm->ghost_velocity is set + + sync_threads(); + + // MPI communication only on master thread +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + sync_threads(); + } + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + radi = radius[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // angular velocity + + wi[0] = omega[i][0]; + wi[1] = omega[i][1]; + wi[2] = omega[i][2]; + + // FLD contribution to force and torque due to isotropic terms + // FLD contribution to stress from isotropic RS0 + + if (flagfld) { + f[i][0] -= vxmu2f*R0*radi*v[i][0]; + f[i][1] -= vxmu2f*R0*radi*v[i][1]; + f[i][2] -= vxmu2f*R0*radi*v[i][2]; + const double rad3 = radi*radi*radi; + torque[i][0] -= vxmu2f*RT0*rad3*wi[0]; + torque[i][1] -= vxmu2f*RT0*rad3*wi[1]; + torque[i][2] -= vxmu2f*RT0*rad3*wi[2]; + + if (SHEARING && vflag_either) { + vRS0 = -vxmu2f * RS0*rad3; + v_tally_tensor(i,i,nlocal,/* newton_pair */ 0, + vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2], + vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]); + } + } + + if (!flagHI) continue; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + radj = atom->radius[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + + // angular momentum = I*omega = 2/5 * M*R^2 * omega + + wj[0] = omega[j][0]; + wj[1] = omega[j][1]; + wj[2] = omega[j][2]; + + // xl = point of closest approach on particle i from its center + + xl[0] = -delx/r*radi; + xl[1] = -dely/r*radi; + xl[2] = -delz/r*radi; + jl[0] = -delx/r*radj; + jl[1] = -dely/r*radj; + jl[2] = -delz/r*radj; + + // velocity at the point of closest approach on both particles + // v = v + omega_cross_xl - Ef.xl + + // particle i + + vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]) + - (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]); + + vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]) + - (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]); + + vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]) + - (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]); + + // particle j + + vj[0] = v[j][0] - (wj[1]*jl[2] - wj[2]*jl[1]) + + (Ef[0][0]*jl[0] + Ef[0][1]*jl[1] + Ef[0][2]*jl[2]); + + vj[1] = v[j][1] - (wj[2]*jl[0] - wj[0]*jl[2]) + + (Ef[1][0]*jl[0] + Ef[1][1]*jl[1] + Ef[1][2]*jl[2]); + + vj[2] = v[j][2] - (wj[0]*jl[1] - wj[1]*jl[0]) + + (Ef[2][0]*jl[0] + Ef[2][1]*jl[1] + Ef[2][2]*jl[2]); + + // scalar resistances XA and YA + + h_sep = r - radi-radj; + + // check for overlaps + + if (h_sep < 0.0) overlaps++; + + // if less than the minimum gap use the minimum gap instead + + if (r < cut_inner[itype][jtype]) + h_sep = cut_inner[itype][jtype] - radi-radj; + + // scale h_sep by radi + + h_sep = h_sep/radi; + beta0 = radj/radi; + beta1 = 1.0 + beta0; + + // scalar resistances + + if (FLAGLOG) { + a_sq = beta0*beta0/beta1/beta1/h_sep + + (1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3.0)*log(1.0/h_sep); + a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0 * + pow(beta0,3.0)+pow(beta0,4.0))/21.0/pow(beta1,4.0) * + h_sep*log(1.0/h_sep); + a_sq *= 6.0*MY_PI*mu*radi; + a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3.0) * + log(1.0/h_sep); + a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3.0) + + 16.0*pow(beta0,4.0))/375.0/pow(beta1,4.0) * + h_sep*log(1.0/h_sep); + a_sh *= 6.0*MY_PI*mu*radi; + a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep); + a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 * + pow(beta0,3.0))/250.0/pow(beta1,3.0)*h_sep*log(1.0/h_sep); + a_pu *= 8.0*MY_PI*mu*pow(radi,3.0); + } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep); + + // relative velocity at the point of closest approach + // includes fluid velocity + + vr1 = vi[0] - vj[0]; + vr2 = vi[1] - vj[1]; + vr3 = vi[2] - vj[2]; + + // normal component (vr.n)n + + vnnr = (vr1*delx + vr2*dely + vr3*delz)/r; + vn1 = vnnr*delx/r; + vn2 = vnnr*dely/r; + vn3 = vnnr*delz/r; + + // tangential component vr - (vr.n)n + + vt1 = vr1 - vn1; + vt2 = vr2 - vn2; + vt3 = vr3 - vn3; + + // force due to squeeze type motion + + fx = a_sq*vn1; + fy = a_sq*vn2; + fz = a_sq*vn3; + + // force due to all shear kind of motions + + if (FLAGLOG) { + fx = fx + a_sh*vt1; + fy = fy + a_sh*vt2; + fz = fz + a_sh*vt3; + } + + // scale forces for appropriate units + + fx *= vxmu2f; + fy *= vxmu2f; + fz *= vxmu2f; + + // add to total force + + f[i][0] -= fx; + f[i][1] -= fy; + f[i][2] -= fz; + + // torque due to this force + + if (FLAGLOG) { + tx = xl[1]*fz - xl[2]*fy; + ty = xl[2]*fx - xl[0]*fz; + tz = xl[0]*fy - xl[1]*fx; + + torque[i][0] -= vxmu2f*tx; + torque[i][1] -= vxmu2f*ty; + torque[i][2] -= vxmu2f*tz; + + // torque due to a_pu + + wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely + + (wi[2]-wj[2])*delz)/r; + wt1 = (wi[0]-wj[0]) - wdotn*delx/r; + wt2 = (wi[1]-wj[1]) - wdotn*dely/r; + wt3 = (wi[2]-wj[2]) - wdotn*delz/r; + + tx = a_pu*wt1; + ty = a_pu*wt2; + tz = a_pu*wt3; + + torque[i][0] -= vxmu2f*tx; + torque[i][1] -= vxmu2f*ty; + torque[i][2] -= vxmu2f*tz; + + } + + if (EVFLAG) ev_tally_xyz(i,nlocal,nlocal, /* newton_pair */ 0, + 0.0,0.0,-fx,-fy,-fz,delx,dely,delz); + } + } + } + + // restore streaming component of velocity, omega, angmom + + if (SHEARING) { + double *h_rate = domain->h_rate; + double *h_ratelo = domain->h_ratelo; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + radi = radius[i]; + + domain->x2lamda(x[i],lamda); + vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] + + h_rate[4]*lamda[2] + h_ratelo[0]; + vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1]; + vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2]; + v[i][0] += vstream[0]; + v[i][1] += vstream[1]; + v[i][2] += vstream[2]; + + omega[i][0] -= 0.5*h_rate[3]; + omega[i][1] += 0.5*h_rate[4]; + omega[i][2] -= 0.5*h_rate[5]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLubricatePolyOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLubricatePoly::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lubricate_poly_omp.h b/src/USER-OMP/pair_lubricate_poly_omp.h new file mode 100644 index 0000000000..e9984ce0d4 --- /dev/null +++ b/src/USER-OMP/pair_lubricate_poly_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lubricate/poly/omp,PairLubricateOMP) + +#else + +#ifndef LMP_PAIR_LUBRICATE_POLY_OMP_H +#define LMP_PAIR_LUBRICATE_POLY_OMP_H + +#include "pair_lubricate_poly.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLubricatePolyOMP : public PairLubricatePoly, public ThrOMP { + + public: + PairLubricatePolyOMP(class LAMMPS *); + virtual ~PairLubricatePolyOMP(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_meam_spline_omp.cpp b/src/USER-OMP/pair_meam_spline_omp.cpp new file mode 100644 index 0000000000..cf9a6c5bc9 --- /dev/null +++ b/src/USER-OMP/pair_meam_spline_omp.cpp @@ -0,0 +1,324 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_meam_spline_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairMEAMSplineOMP::PairMEAMSplineOMP(LAMMPS *lmp) : + PairMEAMSpline(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairMEAMSplineOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = listfull->inum; + + if (listhalf->inum != inum) + error->warning(FLERR,"inconsistent half and full neighborlist"); + + // Grow per-atom array if necessary. + + if (atom->nmax > nmax) { + memory->destroy(Uprime_values); + nmax = atom->nmax; + memory->create(Uprime_values,nmax*nthreads,"pair:Uprime"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + thr->init_eam(nall,Uprime_values); + + if (evflag) { + if (eflag) { + eval<1,1>(ifrom, ito, thr); + } else { + eval<1,0>(ifrom, ito, thr); + } + } else { + eval<0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + const int* const ilist_full = listfull->ilist; + const int* const numneigh_full = listfull->numneigh; + const int* const * const firstneigh_full = listfull->firstneigh; + + // Determine the maximum number of neighbors a single atom has. + int myMaxNeighbors = 0; + for(int ii = iifrom; ii < iito; ii++) { + int jnum = numneigh_full[ilist_full[ii]]; + if(jnum > myMaxNeighbors) myMaxNeighbors = jnum; + } + + // Allocate array for temporary bond info. + MEAM2Body *myTwoBodyInfo = new MEAM2Body[myMaxNeighbors]; + + const double * const * const x = atom->x; + double * const * const forces = thr->get_f(); + double * const Uprime_thr = thr->get_rho(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + const double cutforcesq = cutoff*cutoff; + + // Sum three-body contributions to charge density and compute embedding energies. + for(int ii = iifrom; ii < iito; ii++) { + + const int i = ilist_full[ii]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int* const jlist = firstneigh_full[i]; + const int jnum = numneigh_full[i]; + double rho_value = 0; + int numBonds = 0; + MEAM2Body* nextTwoBodyInfo = myTwoBodyInfo; + + for(int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + + const double jdelx = x[j][0] - xtmp; + const double jdely = x[j][1] - ytmp; + const double jdelz = x[j][2] - ztmp; + const double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz; + + if (rij_sq < cutforcesq) { + const double rij = sqrt(rij_sq); + double partial_sum = 0; + + nextTwoBodyInfo->tag = j; + nextTwoBodyInfo->r = rij; + nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime); + nextTwoBodyInfo->del[0] = jdelx / rij; + nextTwoBodyInfo->del[1] = jdely / rij; + nextTwoBodyInfo->del[2] = jdelz / rij; + + for(int kk = 0; kk < numBonds; kk++) { + const MEAM2Body& bondk = myTwoBodyInfo[kk]; + double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + nextTwoBodyInfo->del[1]*bondk.del[1] + nextTwoBodyInfo->del[2]*bondk.del[2]); + partial_sum += bondk.f * g.eval(cos_theta); + } + + rho_value += nextTwoBodyInfo->f * partial_sum; + rho_value += rho.eval(rij); + + numBonds++; + nextTwoBodyInfo++; + } + } + + // Compute embedding energy and its derivative. + double Uprime_i; + double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy; + Uprime_thr[i] = Uprime_i; + if (EFLAG) { + if (eflag_global) eng_vdwl += embeddingEnergy; + if (eflag_atom) eatom[i] += embeddingEnergy; + } + + double forces_i[3] = {0.0, 0.0, 0.0}; + + // Compute three-body contributions to force. + for(int jj = 0; jj < numBonds; jj++) { + const MEAM2Body bondj = myTwoBodyInfo[jj]; + const double rij = bondj.r; + const int j = bondj.tag; + + const double f_rij_prime = bondj.fprime; + const double f_rij = bondj.f; + + double forces_j[3] = {0.0, 0.0, 0.0}; + + MEAM2Body const* bondk = myTwoBodyInfo; + for(int kk = 0; kk < jj; kk++, ++bondk) { + const double rik = bondk->r; + + const double cos_theta = (bondj.del[0]*bondk->del[0] + bondj.del[1]*bondk->del[1] + bondj.del[2]*bondk->del[2]); + double g_prime; + double g_value = g.eval(cos_theta, g_prime); + const double f_rik_prime = bondk->fprime; + const double f_rik = bondk->f; + + double fij = -Uprime_i * g_value * f_rik * f_rij_prime; + double fik = -Uprime_i * g_value * f_rij * f_rik_prime; + + const double prefactor = Uprime_i * f_rij * f_rik * g_prime; + const double prefactor_ij = prefactor / rij; + const double prefactor_ik = prefactor / rik; + fij += prefactor_ij * cos_theta; + fik += prefactor_ik * cos_theta; + + double fj[3], fk[3]; + + fj[0] = bondj.del[0] * fij - bondk->del[0] * prefactor_ij; + fj[1] = bondj.del[1] * fij - bondk->del[1] * prefactor_ij; + fj[2] = bondj.del[2] * fij - bondk->del[2] * prefactor_ij; + forces_j[0] += fj[0]; + forces_j[1] += fj[1]; + forces_j[2] += fj[2]; + + fk[0] = bondk->del[0] * fik - bondj.del[0] * prefactor_ik; + fk[1] = bondk->del[1] * fik - bondj.del[1] * prefactor_ik; + fk[2] = bondk->del[2] * fik - bondj.del[2] * prefactor_ik; + forces_i[0] -= fk[0]; + forces_i[1] -= fk[1]; + forces_i[2] -= fk[2]; + + const int k = bondk->tag; + forces[k][0] += fk[0]; + forces[k][1] += fk[1]; + forces[k][2] += fk[2]; + + if(EVFLAG) { + double delta_ij[3]; + double delta_ik[3]; + delta_ij[0] = bondj.del[0] * rij; + delta_ij[1] = bondj.del[1] * rij; + delta_ij[2] = bondj.del[2] * rij; + delta_ik[0] = bondk->del[0] * rik; + delta_ik[1] = bondk->del[1] * rik; + delta_ik[2] = bondk->del[2] * rik; + ev_tally3_thr(this,i,j,k,0.0,0.0,fj,fk,delta_ij,delta_ik,thr); + } + } + + forces[i][0] -= forces_j[0]; + forces[i][1] -= forces_j[1]; + forces[i][2] -= forces_j[2]; + forces[j][0] += forces_j[0]; + forces[j][1] += forces_j[1]; + forces[j][2] += forces_j[2]; + } + + forces[i][0] += forces_i[0]; + forces[i][1] += forces_i[1]; + forces[i][2] += forces_i[2]; + } + + delete[] myTwoBodyInfo; + + sync_threads(); + + // reduce per thread density + data_reduce_thr(Uprime_values, nall, nthreads, 1, tid); + + // wait until reduction is complete so that master thread + // can communicate U'(rho) values. + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + const int* const ilist_half = listhalf->ilist; + const int* const numneigh_half = listhalf->numneigh; + const int* const * const firstneigh_half = listhalf->firstneigh; + + // Compute two-body pair interactions. + for(int ii = iifrom; ii < iito; ii++) { + const int i = ilist_half[ii]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int* const jlist = firstneigh_half[i]; + const int jnum = numneigh_half[i]; + + for(int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + + double jdel[3]; + jdel[0] = x[j][0] - xtmp; + jdel[1] = x[j][1] - ytmp; + jdel[2] = x[j][2] - ztmp; + double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2]; + + if(rij_sq < cutforcesq) { + double rij = sqrt(rij_sq); + + double rho_prime; + rho.eval(rij, rho_prime); + double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]); + + double pair_pot_deriv; + double pair_pot = phi.eval(rij, pair_pot_deriv); + fpair += pair_pot_deriv; + + // Divide by r_ij to get forces from gradient. + fpair /= rij; + + forces[i][0] += jdel[0]*fpair; + forces[i][1] += jdel[1]*fpair; + forces[i][2] += jdel[2]*fpair; + forces[j][0] -= jdel[0]*fpair; + forces[j][1] -= jdel[1]*fpair; + forces[j][2] -= jdel[2]*fpair; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal, 1 /* newton_pair */, + pair_pot,0.0,-fpair,jdel[0],jdel[1],jdel[2],thr); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairMEAMSplineOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairMEAMSpline::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_meam_spline_omp.h b/src/USER-OMP/pair_meam_spline_omp.h new file mode 100644 index 0000000000..2fd169609f --- /dev/null +++ b/src/USER-OMP/pair_meam_spline_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(meam/spline/omp,PairMEAMSplineOMP) + +#else + +#ifndef LMP_PAIR_MEAM_SPLINE_OMP_H +#define LMP_PAIR_MEAM_SPLINE_OMP_H + +#include "pair_meam_spline.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairMEAMSplineOMP : public PairMEAMSpline, public ThrOMP { + + public: + PairMEAMSplineOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int iifrom, int iito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_morse_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp new file mode 100644 index 0000000000..d1b0d56cfd --- /dev/null +++ b/src/USER-OMP/pair_morse_omp.cpp @@ -0,0 +1,158 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_morse_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairMorseOMP::PairMorseOMP(LAMMPS *lmp) : + PairMorse(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairMorseOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairMorseOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,dr,dexp,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + dr = r - r0[itype][jtype]; + dexp = exp(-alpha[itype][jtype] * dr); + fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) - + offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairMorseOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairMorse::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h new file mode 100644 index 0000000000..40797e2502 --- /dev/null +++ b/src/USER-OMP/pair_morse_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(morse/omp,PairMorseOMP) + +#else + +#ifndef LMP_PAIR_MORSE_OMP_H +#define LMP_PAIR_MORSE_OMP_H + +#include "pair_morse.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairMorseOMP : public PairMorse, public ThrOMP { + + public: + PairMorseOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp new file mode 100644 index 0000000000..d61f71aa06 --- /dev/null +++ b/src/USER-OMP/pair_peri_lps_omp.cpp @@ -0,0 +1,453 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "float.h" +#include "pair_peri_lps_omp.h" +#include "fix.h" +#include "fix_peri_neigh.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "lattice.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) : + PairPeriLPS(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairPeriLPSOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow bond forces array if necessary + + if (atom->nmax > nmax) { + memory->destroy(s0_new); + memory->destroy(theta); + nmax = atom->nmax; + memory->create(s0_new,nmax,"pair:s0_new"); + memory->create(theta,nmax,"pair:theta"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairPeriLPSOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0; + double rsq,r,dr,rk,evdwl,fpair,fbond; + int *ilist,*jlist,*numneigh,**firstneigh; + double d_ij,delta,stretch; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + + double *vfrac = atom->vfrac; + double *s0 = atom->s0; + double **x0 = atom->x0; + double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0; + int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner; + int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner; + double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume; + + // lc = lattice constant + // init_style guarantees it's the same in x, y, and z + + double lc = domain->lattice->xlattice; + double half_lc = 0.5*lc; + double vfrac_scale = 1.0; + + // short-range forces + + int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic); + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + // need minimg() for x0 difference since not ghosted + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0; + jtype = type[j]; + + r = sqrt(rsq); + + // short-range interaction distance based on initial particle position + // 0.9 and 1.35 are constants + + d_ij = MIN(0.9*sqrt(rsq0),1.35*lc); + + // short-range contact forces + // 15 is constant taken from the EMU Theory Manual + // Silling, 12 May 2005, p 18 + + if (r < d_ij) { + dr = r - d_ij; + + // kshort based upon short-range force constant + // of the bond-based theory used in PMB model + + double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) / + (MY_PI * cutsq[itype][jtype] * cutsq[itype][jtype]); + rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]); + + if (r > 0.0) fpair = -(rk/r); + else fpair = 0.0; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = 0.5*rk*dr; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fpair*vfrac[i],delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + + // wait until all threads are done since we + // need to distribute the work differently. + sync_threads(); + +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + nlocal/comm->nthreads; + iifrom = thr->get_tid()*idelta; + iito = ((iifrom + idelta) > nlocal) ? nlocal : (iifrom + idelta); +#else + iifrom = 0; + iito = nlocal; +#endif + + // Compute the dilatation on each particle + if (iifrom < nlocal) + compute_dilatation_thr(iifrom, iito); + + // wait until all threads are done before communication + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { // communicate dilatation (theta) of each particle + comm->forward_comm_pair(this); + // communicate weighted volume (wvolume) upon every reneighbor + if (neighbor->ago == 0) + comm->forward_comm_fix(modify->fix[ifix_peri]); + } + + sync_threads(); + + // Volume-dependent part of the energy + if (EFLAG) { + for (i = iifrom; i < iito; i++) { + itype = type[i]; + e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, + 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]), 0.0, thr); + } + } + + // loop over my particles and their partners + // partner list contains all bond partners, so I-J appears twice + // if bond already broken, skip this partner + // first = true if this is first neighbor of particle i + + bool first; + double omega_minus, omega_plus; + + for (i = iifrom; i < iito; ++i) { + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + itype = type[i]; + jnum = npartner[i]; + first = true; + + for (jj = 0; jj < jnum; jj++) { + if (partner[i][jj] == 0) continue; + j = atom->map(partner[i][jj]); + + // check if lost a partner without first breaking bond + + if (j < 0) { + partner[i][jj] = 0; + continue; + } + + // compute force density, add to PD equation of motion + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + if (periodic) domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + jtype = type[j]; + delta = cut[itype][jtype]; + r = sqrt(rsq); + dr = r - r0[i][jj]; + + // avoid roundoff errors + + if (fabs(dr) < 2.2204e-016) dr = 0.0; + + // scale vfrac[j] if particle j near the horizon + + if ((fabs(r0[i][jj] - delta)) <= half_lc) + vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + + (1.0 + ((delta - half_lc)/(2*half_lc) ) ); + else vfrac_scale = 1.0; + + omega_plus = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0); + omega_minus = influence_function(delx0,dely0,delz0); + rk = ( (3.0 * bulkmodulus[itype][itype]) - + (5.0 * shearmodulus[itype][itype]) ) * vfrac[j] * vfrac_scale * + ( (omega_plus * theta[i] / wvolume[i]) + + ( omega_minus * theta[j] / wvolume[j] ) ) * r0[i][jj]; + rk += 15.0 * ( shearmodulus[itype][itype] * vfrac[j] * vfrac_scale ) * + ( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * dr; + + if (r > 0.0) fbond = -(rk/r); + else fbond = 0.0; + + f[i][0] += delx*fbond; + f[i][1] += dely*fbond; + f[i][2] += delz*fbond; + + // since I-J is double counted, set newton off & use 1/2 factor and I,I + + double deviatoric_extension = dr - (theta[i]* r0[i][jj] / 3.0); + if (EFLAG) evdwl = 0.5 * 15 * (shearmodulus[itype][itype]/wvolume[i]) * + omega_plus*(deviatoric_extension * deviatoric_extension) * + vfrac[j] * vfrac_scale; + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, + 0.5*fbond*vfrac[i],delx,dely,delz,thr); + + // find stretch in bond I-J and break if necessary + // use s0 from previous timestep + + stretch = dr / r0[i][jj]; + if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0; + + // update s0 for next timestep + + if (first) + s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); + else + s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + + first = false; + } + } + + sync_threads(); + + // store new s0 (in parallel) + if (iifrom < nlocal) + for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; +} + +/* ---------------------------------------------------------------------- */ + +void PairPeriLPSOMP::compute_dilatation_thr(int ifrom, int ito) +{ + int i,j,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0; + double rsq,r,dr; + double delta; + + double **x = atom->x; + int *type = atom->type; + double **x0 = atom->x0; + double *vfrac = atom->vfrac; + double vfrac_scale = 1.0; + + double lc = domain->lattice->xlattice; + double half_lc = 0.5*lc; + + double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0; + int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner; + int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner; + double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume; + + int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic; + + // compute the dilatation theta + + for (i = ifrom; i < ito; i++) { + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + jnum = npartner[i]; + theta[i] = 0.0; + itype = type[i]; + + for (jj = 0; jj < jnum; jj++) { + + // if bond already broken, skip this partner + if (partner[i][jj] == 0) continue; + + // Look up local index of this partner particle + j = atom->map(partner[i][jj]); + + // Skip if particle is "lost" + if (j < 0) continue; + + // Compute force density and add to PD equation of motion + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + if (periodic) domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + + r = sqrt(rsq); + dr = r - r0[i][jj]; + if (fabs(dr) < 2.2204e-016) dr = 0.0; + + jtype = type[j]; + delta = cut[itype][jtype]; + + // scale vfrac[j] if particle j near the horizon + + if ((fabs(r0[i][jj] - delta)) <= half_lc) + vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + + (1.0 + ((delta - half_lc)/(2*half_lc) ) ); + else vfrac_scale = 1.0; + + theta[i] += influence_function(delx0, dely0, delz0) * r0[i][jj] * dr * + vfrac[j] * vfrac_scale; + } + + // if wvolume[i] is zero, then particle i has no bonds + // therefore, the dilatation is set to + + if (wvolume[i] != 0.0) theta[i] = (3.0/wvolume[i]) * theta[i]; + else theta[i] = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairPeriLPSOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairPeriLPS::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h new file mode 100644 index 0000000000..aedb51472e --- /dev/null +++ b/src/USER-OMP/pair_peri_lps_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(peri/lps/omp,PairPeriLPSOMP) + +#else + +#ifndef LMP_PAIR_PERI_LPS_OMP_H +#define LMP_PAIR_PERI_LPS_OMP_H + +#include "pair_peri_lps.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP { + + public: + PairPeriLPSOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + void compute_dilatation_thr(int ifrom, int ito); + + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp new file mode 100644 index 0000000000..404f1d5695 --- /dev/null +++ b/src/USER-OMP/pair_peri_pmb_omp.cpp @@ -0,0 +1,311 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "float.h" +#include "pair_peri_pmb_omp.h" +#include "fix.h" +#include "fix_peri_neigh.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "lattice.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) : + PairPeriPMB(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairPeriPMBOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow bond forces array if necessary + + if (atom->nmax > nmax) { + memory->destroy(s0_new); + nmax = atom->nmax; + memory->create(s0_new,nmax,"pair:s0_new"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairPeriPMBOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz; + double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0; + double rsq,r,dr,rk,evdwl,fpair,fbond; + int *ilist,*jlist,*numneigh,**firstneigh; + double d_ij,delta,stretch; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + double fxtmp,fytmp,fztmp; + + double *vfrac = atom->vfrac; + double *s0 = atom->s0; + double **x0 = atom->x0; + double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0; + int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner; + int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner; + + // lc = lattice constant + // init_style guarantees it's the same in x, y, and z + + double lc = domain->lattice->xlattice; + double half_lc = 0.5*lc; + double vfrac_scale = 1.0; + + // short-range forces + + int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic); + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + // need minimg() for x0 difference since not ghosted + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + xtmp0 = x0[i][0]; + ytmp0 = x0[i][1]; + ztmp0 = x0[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + + rsq = delx*delx + dely*dely + delz*delz; + delx0 = xtmp0 - x0[j][0]; + dely0 = ytmp0 - x0[j][1]; + delz0 = ztmp0 - x0[j][2]; + if (periodic) domain->minimum_image(delx0,dely0,delz0); + rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0; + jtype = type[j]; + + r = sqrt(rsq); + + // short-range interaction distance based on initial particle position + // 0.9 and 1.35 are constants + + d_ij = MIN(0.9*sqrt(rsq0),1.35*lc); + + // short-range contact forces + // 15 is constant taken from the EMU Theory Manual + // Silling, 12 May 2005, p 18 + + if (r < d_ij) { + dr = r - d_ij; + + rk = (15.0 * kspring[itype][jtype] * vfrac[j]) * + (dr / cut[itype][jtype]); + if (r > 0.0) fpair = -(rk/r); + else fpair = 0.0; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl = 0.5*rk*dr; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, + fpair*vfrac[i],delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } + + // wait until all threads are done since we + // need to distribute the work differently. + sync_threads(); + +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + nlocal/comm->nthreads; + iifrom = thr->get_tid()*idelta; + iito = ((iifrom + idelta) > nlocal) ? nlocal : (iifrom + idelta); +#else + iifrom = 0; + iito = nlocal; +#endif + + // loop over my particles and their partners + // partner list contains all bond partners, so I-J appears twice + // if bond already broken, skip this partner + // first = true if this is first neighbor of particle i + + bool first; + + for (i = iifrom; i < iito; ++i) { + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jnum = npartner[i]; + s0_new[i] = DBL_MAX; + first = true; + + for (jj = 0; jj < jnum; jj++) { + if (partner[i][jj] == 0) continue; + j = atom->map(partner[i][jj]); + + // check if lost a partner without first breaking bond + + if (j < 0) { + partner[i][jj] = 0; + continue; + } + + // compute force density, add to PD equation of motion + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + if (periodic) domain->minimum_image(delx,dely,delz); + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + delta = cut[itype][jtype]; + r = sqrt(rsq); + dr = r - r0[i][jj]; + + // avoid roundoff errors + + if (fabs(dr) < 2.2204e-016) dr = 0.0; + + // scale vfrac[j] if particle j near the horizon + + if ((fabs(r0[i][jj] - delta)) <= half_lc) + vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + + (1.0 + ((delta - half_lc)/(2*half_lc) ) ); + else vfrac_scale = 1.0; + + stretch = dr / r0[i][jj]; + rk = (kspring[itype][jtype] * vfrac[j]) * vfrac_scale * stretch; + if (r > 0.0) fbond = -(rk/r); + else fbond = 0.0; + + f[i][0] += delx*fbond; + f[i][1] += dely*fbond; + f[i][2] += delz*fbond; + + // since I-J is double counted, set newton off & use 1/2 factor and I,I + + if (EFLAG) evdwl = 0.5*rk*dr; + if (EVFLAG) + ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0, + 0.5*fbond*vfrac[i],delx,dely,delz,thr); + + // find stretch in bond I-J and break if necessary + // use s0 from previous timestep + + if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0; + + // update s0 for next timestep + + if (first) + s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch); + else + s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch)); + + first = false; + } + } + + sync_threads(); + + // store new s0 (in parallel) + if (iifrom < nlocal) + for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; +} + +/* ---------------------------------------------------------------------- */ + +double PairPeriPMBOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairPeriPMB::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h new file mode 100644 index 0000000000..cb2d83250e --- /dev/null +++ b/src/USER-OMP/pair_peri_pmb_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(peri/pmb/omp,PairPeriPMBOMP) + +#else + +#ifndef LMP_PAIR_PERI_PMB_OMP_H +#define LMP_PAIR_PERI_PMB_OMP_H + +#include "pair_peri_pmb.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP { + + public: + PairPeriPMBOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_rebo_omp.cpp b/src/USER-OMP/pair_rebo_omp.cpp new file mode 100644 index 0000000000..9a1ce8e11b --- /dev/null +++ b/src/USER-OMP/pair_rebo_omp.cpp @@ -0,0 +1,33 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_rebo_omp.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) {} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairREBOOMP::settings(int narg, char **arg) +{ + if (narg != 0) error->all(FLERR,"Illegal pair_style command"); + + cutlj = 0.0; + ljflag = torflag = 0; +} diff --git a/src/USER-OMP/pair_rebo_omp.h b/src/USER-OMP/pair_rebo_omp.h new file mode 100644 index 0000000000..685c5d26a0 --- /dev/null +++ b/src/USER-OMP/pair_rebo_omp.h @@ -0,0 +1,36 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(rebo/omp,PairREBOOMP) + +#else + +#ifndef LMP_PAIR_REBO_OMP_H +#define LMP_PAIR_REBO_OMP_H + +#include "pair_airebo_omp.h" + +namespace LAMMPS_NS { + +class PairREBOOMP : public PairAIREBOOMP { + public: + PairREBOOMP(class LAMMPS *); + virtual void settings(int, char **); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp new file mode 100644 index 0000000000..de05202300 --- /dev/null +++ b/src/USER-OMP/pair_resquared_omp.cpp @@ -0,0 +1,215 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_resquared_omp.h" +#include "math_extra.h" +#include "atom.h" +#include "comm.h" +#include "atom_vec_ellipsoid.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) : + PairRESquared(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairRESquaredOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairRESquaredOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj; + double fforce[3],ttor[3],rtor[3],r12[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + RE2Vars wi,wj; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const tor = thr->get_torque(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itype = type[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + // not a LJ sphere + + if (lshape[itype] != 0.0) precompute_i(i,wi); + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + // r12 = center to center vector + + r12[0] = x[j][0]-x[i][0]; + r12[1] = x[j][1]-x[i][1]; + r12[2] = x[j][2]-x[i][2]; + rsq = MathExtra::dot3(r12,r12); + jtype = type[j]; + + // compute if less than cutoff + + if (rsq < cutsq[itype][jtype]) { + fforce[0] = fforce[1] = fforce[2] = 0.0; + + switch (form[itype][jtype]) { + + case SPHERE_SPHERE: + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= -r2inv; + if (EFLAG) one_eng = + r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) - + offset[itype][jtype]; + fforce[0] = r12[0]*forcelj; + fforce[1] = r12[1]*forcelj; + fforce[2] = r12[2]*forcelj; + break; + + case SPHERE_ELLIPSE: + precompute_i(j,wj); + if (NEWTON_PAIR || j < nlocal) { + one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,true); + tor[j][0] += rtor[0]*factor_lj; + tor[j][1] += rtor[1]*factor_lj; + tor[j][2] += rtor[2]*factor_lj; + } else + one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false); + break; + + case ELLIPSE_SPHERE: + one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true); + t1tmp += ttor[0]*factor_lj; + t2tmp += ttor[1]*factor_lj; + t3tmp += ttor[2]*factor_lj; + break; + + default: + precompute_i(j,wj); + one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor); + t1tmp += ttor[0]*factor_lj; + t2tmp += ttor[1]*factor_lj; + t3tmp += ttor[2]*factor_lj; + if (NEWTON_PAIR || j < nlocal) { + tor[j][0] += rtor[0]*factor_lj; + tor[j][1] += rtor[1]*factor_lj; + tor[j][2] += rtor[2]*factor_lj; + } + break; + } + + fforce[0] *= factor_lj; + fforce[1] *= factor_lj; + fforce[2] *= factor_lj; + fxtmp += fforce[0]; + fytmp += fforce[1]; + fztmp += fforce[2]; + + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= fforce[0]; + f[j][1] -= fforce[1]; + f[j][2] -= fforce[2]; + } + + if (EFLAG) evdwl = factor_lj*one_eng; + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fforce[0],fforce[1],fforce[2], + -r12[0],-r12[1],-r12[2],thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + tor[i][0] += t1tmp; + tor[i][1] += t2tmp; + tor[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairRESquaredOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairRESquared::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h new file mode 100644 index 0000000000..cc2a59d74a --- /dev/null +++ b/src/USER-OMP/pair_resquared_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(resquared/omp,PairRESquaredOMP) + +#else + +#ifndef LMP_PAIR_RESQUARED_OMP_H +#define LMP_PAIR_RESQUARED_OMP_H + +#include "pair_resquared.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairRESquaredOMP : public PairRESquared, public ThrOMP { + + public: + PairRESquaredOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_soft_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp new file mode 100644 index 0000000000..51a43be28c --- /dev/null +++ b/src/USER-OMP/pair_soft_omp.cpp @@ -0,0 +1,160 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_soft_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "math_const.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 1.0e-4 + +/* ---------------------------------------------------------------------- */ + +PairSoftOMP::PairSoftOMP(LAMMPS *lmp) : + PairSoft(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairSoftOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairSoftOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double r,rsq,arg,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + arg = MY_PI/cut[itype][jtype]; + if (r > SMALL) fpair = factor_lj * prefactor[itype][jtype] * + sin(arg*r) * arg/r; + else fpair = 0.0; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) + evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r)); + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairSoftOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairSoft::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h new file mode 100644 index 0000000000..74958208ed --- /dev/null +++ b/src/USER-OMP/pair_soft_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(soft/omp,PairSoftOMP) + +#else + +#ifndef LMP_PAIR_SOFT_OMP_H +#define LMP_PAIR_SOFT_OMP_H + +#include "pair_soft.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairSoftOMP : public PairSoft, public ThrOMP { + + public: + PairSoftOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp new file mode 100644 index 0000000000..6bac2420e1 --- /dev/null +++ b/src/USER-OMP/pair_sw_omp.cpp @@ -0,0 +1,210 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_sw_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairSWOMP::PairSWOMP(LAMMPS *lmp) : + PairSW(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairSWOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + eval<1,1>(ifrom, ito, thr); + } else { + eval<1,0>(ifrom, ito, thr); + } + } else eval<0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairSWOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fj[3],fk[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + double fxtmp,fytmp,fztmp; + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + ijparam = elem2param[itype][jtype][jtype]; + if (rsq > params[ijparam].cutsq) continue; + + twobody(¶ms[ijparam],rsq,fpair,EFLAG,evdwl); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + + jnumm1 = jnum - 1; + + for (jj = 0; jj < jnumm1; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[ijparam].cutsq) continue; + + double fjxtmp,fjytmp,fjztmp; + fjxtmp = fjytmp = fjztmp = 0.0; + + for (kk = jj+1; kk < jnum; kk++) { + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[ikparam].cutsq) continue; + + threebody(¶ms[ijparam],¶ms[ikparam],¶ms[ijkparam], + rsq1,rsq2,delr1,delr2,fj,fk,EFLAG,evdwl); + + fxtmp -= fj[0] + fk[0]; + fytmp -= fj[1] + fk[1]; + fztmp -= fj[2] + fk[2]; + fjxtmp += fj[0]; + fjytmp += fj[1]; + fjztmp += fj[2]; + f[k][0] += fk[0]; + f[k][1] += fk[1]; + f[k][2] += fk[2]; + + if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,thr); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairSWOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairSW::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h new file mode 100644 index 0000000000..a99edba46e --- /dev/null +++ b/src/USER-OMP/pair_sw_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(sw/omp,PairSWOMP) + +#else + +#ifndef LMP_PAIR_SW_OMP_H +#define LMP_PAIR_SW_OMP_H + +#include "pair_sw.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairSWOMP : public PairSW, public ThrOMP { + + public: + PairSWOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_table_omp.cpp b/src/USER-OMP/pair_table_omp.cpp new file mode 100644 index 0000000000..ee2e447a56 --- /dev/null +++ b/src/USER-OMP/pair_table_omp.cpp @@ -0,0 +1,212 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTableOMP::PairTableOMP(LAMMPS *lmp) : + PairTable(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTableOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTableOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,factor_lj,fraction,value,a,b; + int *ilist,*jlist,*numneigh,**firstneigh; + Table *tb; + + union_int_float_t rsq_lookup; + int tlm1 = tablength - 1; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + tb = &tables[tabindex[itype][jtype]]; + + if (check_error_thr((rsq < tb->innersq),tid, + FLERR,"Pair distance < table inner cutoff")) + return; + + if (tabstyle == LOOKUP) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + + if (check_error_thr((itable >= tlm1),tid, + FLERR,"Pair distance > table outer cutoff")) + return; + + fpair = factor_lj * tb->f[itable]; + } else if (tabstyle == LINEAR) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + + if (check_error_thr((itable >= tlm1),tid, + FLERR,"Pair distance > table outer cutoff")) + return; + + fraction = (rsq - tb->rsq[itable]) * tb->invdelta; + value = tb->f[itable] + fraction*tb->df[itable]; + fpair = factor_lj * value; + } else if (tabstyle == SPLINE) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + + if (check_error_thr((itable >= tlm1),tid, + FLERR,"Pair distance > table outer cutoff")) + return; + + b = (rsq - tb->rsq[itable]) * tb->invdelta; + a = 1.0 - b; + value = a * tb->f[itable] + b * tb->f[itable+1] + + ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * + tb->deltasq6; + fpair = factor_lj * value; + } else { + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; + itable >>= tb->nshiftbits; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; + value = tb->f[itable] + fraction*tb->df[itable]; + fpair = factor_lj * value; + } + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (tabstyle == LOOKUP) + evdwl = tb->e[itable]; + else if (tabstyle == LINEAR || tabstyle == BITMAP) + evdwl = tb->e[itable] + fraction*tb->de[itable]; + else + evdwl = a * tb->e[itable] + b * tb->e[itable+1] + + ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * + tb->deltasq6; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairTableOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTable::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h new file mode 100644 index 0000000000..dbc0b8e5b9 --- /dev/null +++ b/src/USER-OMP/pair_table_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(table/omp,PairTableOMP) + +#else + +#ifndef LMP_PAIR_TABLE_OMP_H +#define LMP_PAIR_TABLE_OMP_H + +#include "pair_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTableOMP : public PairTable, public ThrOMP { + + public: + PairTableOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp new file mode 100644 index 0000000000..573ed92ba3 --- /dev/null +++ b/src/USER-OMP/pair_tersoff_omp.cpp @@ -0,0 +1,250 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tersoff_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) : + PairTersoff(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTersoffOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,jj,kk,jnum; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + double fxtmp,fytmp,fztmp; + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,EFLAG,evdwl); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + + // three-body interactions + // skip immediately if I-J is not within cutoff + double fjxtmp,fjytmp,fjztmp; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + fjxtmp = fjytmp = fjztmp = 0.0; + zeta_ij = 0.0; + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + } + + // pairwise force due to zeta + + force_zeta(¶ms[iparam_ij],rsq1,zeta_ij,fpair,prefactor,EFLAG,evdwl); + + fxtmp += delr1[0]*fpair; + fytmp += delr1[1]*fpair; + fztmp += delr1[2]*fpair; + fjxtmp -= delr1[0]*fpair; + fjytmp -= delr1[1]*fpair; + fjztmp -= delr1[2]*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, + -fpair,-delr1[0],-delr1[1],-delr1[2],thr); + + // attractive term via loop over k + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + fjxtmp += fj[0]; + fjytmp += fj[1]; + fjztmp += fj[2]; + f[k][0] += fk[0]; + f[k][1] += fk[1]; + f[k][2] += fk[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairTersoffOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTersoff::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h new file mode 100644 index 0000000000..97c20548af --- /dev/null +++ b/src/USER-OMP/pair_tersoff_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/omp,PairTersoffOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_OMP_H +#define LMP_PAIR_TERSOFF_OMP_H + +#include "pair_tersoff.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffOMP : public PairTersoff, public ThrOMP { + + public: + PairTersoffOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp new file mode 100644 index 0000000000..4560975b11 --- /dev/null +++ b/src/USER-OMP/pair_tersoff_table_omp.cpp @@ -0,0 +1,516 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tersoff_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define GRIDSTART 0.1 +#define GRIDDENSITY_FCUTOFF 5000 +#define GRIDDENSITY_EXP 12000 +#define GRIDDENSITY_GTETA 12000 +#define GRIDDENSITY_BIJ 7500 + +// max number of interaction per atom for environment potential + +#define leadingDimensionInteractionList 64 + +/* ---------------------------------------------------------------------- */ + +PairTersoffTableOMP::PairTersoffTableOMP(LAMMPS *lmp) : + PairTersoffTable(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffTableOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) + if (vflag_atom) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTersoffTableOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,inum,jnum; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp; + double fxtmp,fytmp,fztmp; + int *ilist,*jlist,*numneigh,**firstneigh; + + int interpolIDX; + double r_ik_x, r_ik_y, r_ik_z; + double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z; + double invR_ij, invR_ik, cosTeta; + double repulsivePotential, attractivePotential; + double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1; + double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta; + double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK; + double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik; + double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot; + double factor_force_ij; + double gtetaFunctionDerived_temp,gtetaFunction_temp; + + double evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const int tid = thr->get_tid(); + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over full neighbor list of my atoms + for (ii = iifrom; ii < iito; ii++) { + + i = ilist[ii]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + if (check_error_thr((jnum > leadingDimensionInteractionList), tid, + FLERR,"Too many neighbors for interaction list.\n" + "Check your system or increase 'leadingDimension" + "InteractionList'")) + return; + + // Pre-calculate gteta and cutoff function + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + + double dr_ij[3], r_ij; + + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + dr_ij[0] = xtmp - x[j][0]; + dr_ij[1] = ytmp - x[j][1]; + dr_ij[2] = ztmp - x[j][2]; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + invR_ij = 1.0 / r_ij; + + directorCos_ij_x = invR_ij * dr_ij[0]; + directorCos_ij_y = invR_ij * dr_ij[1]; + directorCos_ij_z = invR_ij * dr_ij[2]; + + // preCutoffFunction + interpolDeltaX = r_ij - GRIDSTART; + interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF); + interpolIDX = (int) interpolTMP; + interpolY1 = cutoffFunction[itype][jtype][interpolIDX]; + interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1]; + preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + // preCutoffFunctionDerived + interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX]; + interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1]; + preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + + + for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z; + + // preGtetaFunction + interpolDeltaX=cosTeta+1.0; + interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA); + interpolIDX = (int) interpolTMP; + interpolY1 = gtetaFunction[itype][interpolIDX]; + interpolY2 = gtetaFunction[itype][interpolIDX+1]; + gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + // preGtetaFunctionDerived + interpolY1 = gtetaFunctionDerived[itype][interpolIDX]; + interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1]; + gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + + preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp; + preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp; + preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp; + preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp; + + } // loop on K + + } // loop on J + + + // loop over neighbours of atom i + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + + double dr_ij[3], r_ij, f_ij[3]; + + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + dr_ij[0] = xtmp - x[j][0]; + dr_ij[1] = ytmp - x[j][1]; + dr_ij[2] = ztmp - x[j][2]; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + invR_ij = 1.0 / r_ij; + + directorCos_ij_x = invR_ij * dr_ij[0]; + directorCos_ij_y = invR_ij * dr_ij[1]; + directorCos_ij_z = invR_ij * dr_ij[2]; + + exponentRepulsivePotential = params[ijparam].lam1 * r_ij; + exponentAttractivePotential = params[ijparam].lam2 * r_ij; + + // repulsiveExponential + interpolDeltaX = exponentRepulsivePotential - minArgumentExponential; + interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP); + interpolIDX = (int) interpolTMP; + interpolY1 = exponential[interpolIDX]; + interpolY2 = exponential[interpolIDX+1]; + repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + // attractiveExponential + interpolDeltaX = exponentAttractivePotential - minArgumentExponential; + interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP); + interpolIDX = (int) interpolTMP; + interpolY1 = exponential[interpolIDX]; + interpolY2 = exponential[interpolIDX+1]; + attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + + repulsivePotential = params[ijparam].biga * repulsiveExponential; + attractivePotential = -params[ijparam].bigb * attractiveExponential; + + cutoffFunctionIJ = preCutoffFunction[neighbor_j]; + cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j]; + + zeta = 0.0; + + // first loop over neighbours of atom i except j - part 1/2 + for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) { + double dr_ik[3], r_ik; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * r_ik_x; + directorCos_ik_y = invR_ik * r_ik_y; + directorCos_ik_z = invR_ik * r_ik_z; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + zeta += cutoffFunctionIK * gtetaFunctionIJK; + + } + + // first loop over neighbours of atom i except j - part 2/2 + for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + zeta += cutoffFunctionIK * gtetaFunctionIJK; + } + + // betaZetaPowerIJK + interpolDeltaX= params[ijparam].beta * zeta; + interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ); + interpolIDX = (int) interpolTMP; + interpolY1 = betaZetaPower[itype][interpolIDX]; + interpolY2 = betaZetaPower[itype][interpolIDX+1]; + betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX)); + // betaZetaPowerDerivedIJK + interpolY1 = betaZetaPowerDerived[itype][interpolIDX]; + interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1]; + betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX)); + + // Forces and virial + factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2); + + f_ij[0] = factor_force_ij * directorCos_ij_x; + f_ij[1] = factor_force_ij * directorCos_ij_y; + f_ij[2] = factor_force_ij * directorCos_ij_z; + + f[j][0] += f_ij[0]; + f[j][1] += f_ij[1]; + f[j][2] += f_ij[2]; + + fxtmp -= f_ij[0]; + fytmp -= f_ij[1]; + fztmp -= f_ij[2]; + + // potential energy + evdwl = cutoffFunctionIJ * repulsivePotential + + cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK; + + if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.5 * evdwl, 0.0, + -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); + + factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK; + + // second loop over neighbours of atom i except j, forces and virial only - part 1/2 + for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + + directorCos_ij_z * directorCos_ik_z; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k]; + + factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot; + + f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x); + f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y); + f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z); + + factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot; + factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot; + + f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + + factor_2_force3_ik * directorCos_ik_x; + f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + + factor_2_force3_ik * directorCos_ik_y; + f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + + factor_2_force3_ik * directorCos_ik_z; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[k][0] -= f_ik[0]; + f[k][1] -= f_ik[1]; + f[k][2] -= f_ik[2]; + + fxtmp += f_ij[0] + f_ik[0]; + fytmp += f_ij[1] + f_ik[1]; + fztmp += f_ij[2] + f_ik[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr); + } + + // second loop over neighbours of atom i except j, forces and virial only - part 2/2 + for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + + directorCos_ij_z * directorCos_ik_z; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k]; + + factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot; + + f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x); + f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y); + f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z); + + factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot; + factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot; + + f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + + factor_2_force3_ik * directorCos_ik_x; + f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + + factor_2_force3_ik * directorCos_ik_y; + f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + + factor_2_force3_ik * directorCos_ik_z; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[k][0] -= f_ik[0]; + f[k][1] -= f_ik[1]; + f[k][2] -= f_ik[2]; + + fxtmp += f_ij[0] + f_ik[0]; + fytmp += f_ij[1] + f_ik[1]; + fztmp += f_ij[2] + f_ik[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr); + + } + } // loop on J + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } // loop on I +} + +/* ---------------------------------------------------------------------- */ + +double PairTersoffTableOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTersoffTable::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tersoff_table_omp.h b/src/USER-OMP/pair_tersoff_table_omp.h new file mode 100644 index 0000000000..ffde58215d --- /dev/null +++ b/src/USER-OMP/pair_tersoff_table_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/table/omp,PairTersoffTableOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_TABLE_OMP_H +#define LMP_PAIR_TERSOFF_TABLE_OMP_H + +#include "pair_tersoff_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffTableOMP : public PairTersoffTable, public ThrOMP { + + public: + PairTersoffTableOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_zbl_omp.cpp b/src/USER-OMP/pair_tersoff_zbl_omp.cpp new file mode 100644 index 0000000000..f143312e0a --- /dev/null +++ b/src/USER-OMP/pair_tersoff_zbl_omp.cpp @@ -0,0 +1,295 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Aidan Thompson (SNL) - original Tersoff implementation + David Farrell (NWU) - ZBL addition +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_tersoff_zbl_omp.h" +#include "atom.h" +#include "update.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXLINE 1024 +#define DELTA 4 + +/* ---------------------------------------------------------------------- + Fermi-like smoothing function +------------------------------------------------------------------------- */ + +static double F_fermi(const double r, const double expsc, const double cut) +{ + return 1.0 / (1.0 + exp(-expsc*(r-cut))); +} + +/* ---------------------------------------------------------------------- + Fermi-like smoothing function derivative with respect to r +------------------------------------------------------------------------- */ + +static double F_fermi_d(const double r, const double expsc, const double cut) +{ + return expsc*exp(-expsc*(r-cut)) / pow(1.0 + exp(-expsc*(r-cut)),2.0); +} + +/* ---------------------------------------------------------------------- */ + +PairTersoffZBLOMP::PairTersoffZBLOMP(LAMMPS *lmp) : PairTersoffOMP(lmp) +{ + // hard-wired constants in metal or real units + // a0 = Bohr radius + // epsilon0 = permittivity of vacuum = q / energy-distance units + // e = unit charge + // 1 Kcal/mole = 0.043365121 eV + + if (strcmp(update->unit_style,"metal") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635; + global_e = 1.0; + } else if (strcmp(update->unit_style,"real") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635 * 0.043365121; + global_e = 1.0; + } else error->all(FLERR,"Pair tersoff/zbl requires metal or real units"); +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffZBLOMP::read_file(char *file) +{ + int params_per_line = 21; + char **words = new char*[params_per_line+1]; + + memory->sfree(params); + params = NULL; + nparams = 0; + + // open file on proc 0 + + FILE *fp; + if (comm->me == 0) { + fp = fopen(file,"r"); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open Tersoff potential file %s",file); + error->one(FLERR,str); + } + } + + // read each line out of file, skipping blank lines or leading '#' + // store line of params if all 3 element tags are in element list + + int n,nwords,ielement,jelement,kelement; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if (ptr = strchr(line,'#')) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if (ptr = strchr(line,'#')) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in Tersoff potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + // ielement,jelement,kelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next line + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + for (kelement = 0; kelement < nelements; kelement++) + if (strcmp(words[2],elements[kelement]) == 0) break; + if (kelement == nelements) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), + "pair:params"); + } + + params[nparams].ielement = ielement; + params[nparams].jelement = jelement; + params[nparams].kelement = kelement; + params[nparams].powerm = atof(words[3]); + params[nparams].gamma = atof(words[4]); + params[nparams].lam3 = atof(words[5]); + params[nparams].c = atof(words[6]); + params[nparams].d = atof(words[7]); + params[nparams].h = atof(words[8]); + params[nparams].powern = atof(words[9]); + params[nparams].beta = atof(words[10]); + params[nparams].lam2 = atof(words[11]); + params[nparams].bigb = atof(words[12]); + params[nparams].bigr = atof(words[13]); + params[nparams].bigd = atof(words[14]); + params[nparams].lam1 = atof(words[15]); + params[nparams].biga = atof(words[16]); + params[nparams].Z_i = atof(words[17]); + params[nparams].Z_j = atof(words[18]); + params[nparams].ZBLcut = atof(words[19]); + params[nparams].ZBLexpscale = atof(words[20]); + + // currently only allow m exponent of 1 or 3 + + params[nparams].powermint = int(params[nparams].powerm); + + if ( + params[nparams].lam3 < 0.0 || params[nparams].c < 0.0 || + params[nparams].d < 0.0 || params[nparams].powern < 0.0 || + params[nparams].beta < 0.0 || params[nparams].lam2 < 0.0 || + params[nparams].bigb < 0.0 || params[nparams].bigr < 0.0 || + params[nparams].bigd < 0.0 || + params[nparams].bigd > params[nparams].bigr || + params[nparams].lam3 < 0.0 || params[nparams].biga < 0.0 || + params[nparams].powerm - params[nparams].powermint != 0.0 || + (params[nparams].powermint != 3 && params[nparams].powermint != 1) || + params[nparams].gamma < 0.0 || + params[nparams].Z_i < 1.0 || params[nparams].Z_j < 1.0 || + params[nparams].ZBLcut < 0.0 || params[nparams].ZBLexpscale < 0.0) + error->all(FLERR,"Illegal Tersoff parameter"); + + nparams++; + } + + delete [] words; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffZBLOMP::force_zeta(Param *param, double rsq, double zeta_ij, + double &fforce, double &prefactor, + int eflag, double &eng) +{ + double r,fa,fa_d,bij; + + r = sqrt(rsq); + + fa = (r > param->bigr + param->bigd) ? 0.0 : + -param->bigb * exp(-param->lam2 * r) * ters_fc(r,param) * + F_fermi(r,param->ZBLexpscale,param->ZBLcut); + + fa_d = (r > param->bigr + param->bigd) ? 0.0 : + param->bigb * exp(-param->lam2 * r) * + (param->lam2 * ters_fc(r,param) * + F_fermi(r,param->ZBLexpscale,param->ZBLcut) - + ters_fc_d(r,param) * F_fermi(r,param->ZBLexpscale,param->ZBLcut) + - ters_fc(r,param) * F_fermi_d(r,param->ZBLexpscale,param->ZBLcut)); + + bij = ters_bij(zeta_ij,param); + fforce = 0.5*bij*fa_d / r; + prefactor = -0.5*fa * ters_bij_d(zeta_ij,param); + if (eflag) eng = 0.5*bij*fa; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffZBLOMP::repulsive(Param *param, double rsq, double &fforce, + int eflag, double &eng) +{ + double r,tmp_fc,tmp_fc_d,tmp_exp; + + // Tersoff repulsive portion + + r = sqrt(rsq); + tmp_fc = ters_fc(r,param); + tmp_fc_d = ters_fc_d(r,param); + tmp_exp = exp(-param->lam1 * r); + double fforce_ters = param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1); + double eng_ters = tmp_fc * param->biga * tmp_exp; + + // ZBL repulsive portion + + double esq = pow(global_e,2.0); + double a_ij = (0.8854*global_a_0) / + (pow(param->Z_i,0.23) + pow(param->Z_j,0.23)); + double premult = (param->Z_i * param->Z_j * esq)/(4.0*MY_PI*global_epsilon_0); + double r_ov_a = r/a_ij; + double phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + + 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a); + double dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - + 0.9423*0.5099*exp(-0.9423*r_ov_a) - + 0.4029*0.2802*exp(-0.4029*r_ov_a) - + 0.2016*0.02817*exp(-0.2016*r_ov_a)); + double fforce_ZBL = premult*-pow(r,-2.0)* phi + premult*pow(r,-1.0)*dphi; + double eng_ZBL = premult*(1.0/r)*phi; + + // combine two parts with smoothing by Fermi-like function + + fforce = -(-F_fermi_d(r,param->ZBLexpscale,param->ZBLcut) * eng_ZBL + + (1.0 - F_fermi(r,param->ZBLexpscale,param->ZBLcut))*fforce_ZBL + + F_fermi_d(r,param->ZBLexpscale,param->ZBLcut)*eng_ters + + F_fermi(r,param->ZBLexpscale,param->ZBLcut)*fforce_ters) / r; + + if (eflag) + eng = (1.0 - F_fermi(r,param->ZBLexpscale,param->ZBLcut))*eng_ZBL + + F_fermi(r,param->ZBLexpscale,param->ZBLcut)*eng_ters; +} diff --git a/src/USER-OMP/pair_tersoff_zbl_omp.h b/src/USER-OMP/pair_tersoff_zbl_omp.h new file mode 100644 index 0000000000..32d7b6b4c4 --- /dev/null +++ b/src/USER-OMP/pair_tersoff_zbl_omp.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/zbl/omp,PairTersoffZBLOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_ZBL_OMP_H +#define LMP_PAIR_TERSOFF_ZBL_OMP_H + +#include "pair_tersoff_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffZBLOMP : public PairTersoffOMP { + public: + PairTersoffZBLOMP(class LAMMPS *); + virtual ~PairTersoffZBLOMP() {} + + protected: + double global_a_0; // Bohr radius for Coulomb repulsion + double global_epsilon_0; // permittivity of vacuum for Coulomb repulsion + double global_e; // proton charge (negative of electron charge) + + virtual void read_file(char *); + virtual void repulsive(Param *, double, double &, int, double &); + virtual void force_zeta(Param *, double, double, double &, double &, int, double &); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tri_lj_omp.cpp b/src/USER-OMP/pair_tri_lj_omp.cpp new file mode 100644 index 0000000000..16dce231ba --- /dev/null +++ b/src/USER-OMP/pair_tri_lj_omp.cpp @@ -0,0 +1,411 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tri_lj_omp.h" +#include "math_extra.h" +#include "atom.h" +#include "atom_vec_tri.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTriLJOMP::PairTriLJOMP(LAMMPS *lmp) : + PairTriLJ(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTriLJOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + const int * const tri = atom->tri; + const int * const type = atom->type; + AtomVecTri::Bonus * const bonus = avec->bonus; + + // grow discrete list if necessary and initialize + + if (nall > nmax) { + nmax = nall; + memory->destroy(dnum); + memory->destroy(dfirst); + memory->create(dnum,nall,"pair:dnum"); + memory->create(dfirst,nall,"pair:dfirst"); + } + memset(dnum,0,nall*sizeof(int)); + ndiscrete = 0; + + // need to discretize the system ahead of time + // until we find a good way to multi-thread it. + for (int i = 0; i < nall; ++i) { + double dc1[3],dc2[3],dc3[3],p[3][3]; + + if (tri[i] >= 0) { + if (dnum[i] == 0) { + MathExtra::quat_to_mat(bonus[tri[i]].quat,p); + MathExtra::matvec(p,bonus[tri[i]].c1,dc1); + MathExtra::matvec(p,bonus[tri[i]].c2,dc2); + MathExtra::matvec(p,bonus[tri[i]].c3,dc3); + dfirst[i] = ndiscrete; + discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3); + dnum[i] = ndiscrete - dfirst[i]; + } + } + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTriLJOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + int ni,nj,npi,npj,ifirst,jfirst; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj; + double dxi,dxj,dyi,dyj,dzi,dzj; + double xi[3],xj[3],fi[3],fj[3],ti[3],tj[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const tri = atom->tri; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq >= cutsq[itype][jtype]) continue; + + // tri/tri interactions = NxN particles + // c1,c2,c3 = corner pts of triangle I or J + + evdwl = 0.0; + if (tri[i] >= 0 && tri[j] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0; + ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + dzi = discrete[ifirst+ni].dz; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + dzj = discrete[jfirst+nj].dz; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xi[2] = x[i][2] + dzi; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + xj[2] = x[j][2] + dzj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + delz = xi[2] - xj[2]; + rsq = delx*delx + dely*dely + delz*delz; + + sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + fi[2] += delz*fpair; + ti[0] += fpair*(dyi*delz - dzi*dely); + ti[1] += fpair*(dzi*delx - dxi*delz); + ti[2] += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + fj[2] -= delz*fpair; + tj[0] -= fpair*(dyj*delz - dzj*dely); + tj[1] -= fpair*(dzj*delx - dxj*delz); + tj[2] -= fpair*(dxj*dely - dyj*delx); + } + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + torque[i][0] += ti[0]; + torque[i][1] += ti[1]; + torque[i][2] += ti[2]; + torque[j][0] += tj[0]; + torque[j][1] += tj[1]; + torque[j][2] += tj[2]; + + // tri/particle interaction = Nx1 particles + // c1,c2,c3 = corner pts of triangle I + + } else if (tri[i] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + + fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0; + ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + dzi = discrete[ifirst+ni].dz; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xi[2] = x[i][2] + dzi; + xj[0] = x[j][0]; + xj[1] = x[j][1]; + xj[2] = x[j][2]; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + delz = xi[2] - xj[2]; + rsq = delx*delx + dely*dely + delz*delz; + + sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + fi[2] += delz*fpair; + ti[0] += fpair*(dyi*delz - dzi*dely); + ti[1] += fpair*(dzi*delx - dxi*delz); + ti[2] += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + fj[2] -= delz*fpair; + tj[0] -= fpair*(dyj*delz - dzj*dely); + tj[1] -= fpair*(dzj*delx - dxj*delz); + tj[2] -= fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + torque[i][0] += ti[0]; + torque[i][1] += ti[1]; + torque[i][2] += ti[2]; + torque[j][0] += tj[0]; + torque[j][1] += tj[1]; + torque[j][2] += tj[2]; + + // particle/tri interaction = Nx1 particles + // c1,c2,c3 = corner pts of triangle J + + } else if (tri[j] >= 0) { + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0; + ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + dzj = discrete[jfirst+nj].dz; + + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xi[2] = x[i][2]; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + xj[2] = x[j][2] + dzj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + delz = xi[2] - xj[2]; + rsq = delx*delx + dely*dely + delz*delz; + + sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + fi[2] += delz*fpair; + ti[0] += fpair*(dyi*delz - dzi*dely); + ti[1] += fpair*(dzi*delx - dxi*delz); + ti[2] += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + fj[2] -= delz*fpair; + tj[0] -= fpair*(dyj*delz - dzj*dely); + tj[1] -= fpair*(dzj*delx - dxj*delz); + tj[2] -= fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + torque[i][0] += ti[0]; + torque[i][1] += ti[1]; + torque[i][2] += ti[2]; + torque[j][0] += tj[0]; + torque[j][1] += tj[1]; + torque[j][2] += tj[2]; + + // particle/particle interaction = 1x1 particles + + } else { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = forcelj*r2inv; + + if (EFLAG) + evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairTriLJOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTriLJ::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tri_lj_omp.h b/src/USER-OMP/pair_tri_lj_omp.h new file mode 100644 index 0000000000..7c66fdcf44 --- /dev/null +++ b/src/USER-OMP/pair_tri_lj_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tri/lj/omp,PairTriLJOMP) + +#else + +#ifndef LMP_PAIR_TRI_LJ_OMP_H +#define LMP_PAIR_TRI_LJ_OMP_H + +#include "pair_tri_lj.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTriLJOMP : public PairTriLJ, public ThrOMP { + + public: + PairTriLJOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp new file mode 100644 index 0000000000..e66d1bb2bf --- /dev/null +++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp @@ -0,0 +1,161 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_yukawa_colloid_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) : + PairYukawaColloid(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairYukawaColloidOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairYukawaColloidOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj; + double rsq,r,rinv,screening,forceyukawa,factor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const radius = atom->radius; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + radi = radius[i]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radj = radius[j]; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*(r-(radi+radj))); + forceyukawa = a[itype][jtype] * screening; + + fpair = factor*forceyukawa * rinv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype]; + evdwl *= factor; + } + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairYukawaColloidOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairYukawaColloid::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h new file mode 100644 index 0000000000..b0ef483b5b --- /dev/null +++ b/src/USER-OMP/pair_yukawa_colloid_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/colloid/omp,PairYukawaColloidOMP) + +#else + +#ifndef LMP_PAIR_YUKAWA_COLLOID_OMP_H +#define LMP_PAIR_YUKAWA_COLLOID_OMP_H + +#include "pair_yukawa_colloid.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP { + + public: + PairYukawaColloidOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_yukawa_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp new file mode 100644 index 0000000000..3028901a9e --- /dev/null +++ b/src/USER-OMP/pair_yukawa_omp.cpp @@ -0,0 +1,160 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_yukawa_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) : + PairYukawa(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairYukawaOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairYukawaOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r,rinv,screening,forceyukawa,factor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forceyukawa = a[itype][jtype] * screening * (kappa + rinv); + + fpair = factor*forceyukawa * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype]; + evdwl *= factor; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairYukawaOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairYukawa::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h new file mode 100644 index 0000000000..d9db213474 --- /dev/null +++ b/src/USER-OMP/pair_yukawa_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/omp,PairYukawaOMP) + +#else + +#ifndef LMP_PAIR_YUKAWA_OMP_H +#define LMP_PAIR_YUKAWA_OMP_H + +#include "pair_yukawa.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairYukawaOMP : public PairYukawa, public ThrOMP { + + public: + PairYukawaOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp new file mode 100644 index 0000000000..51fc048b3b --- /dev/null +++ b/src/USER-OMP/pppm_cg_omp.cpp @@ -0,0 +1,610 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_cg_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "math_const.h" + +#include +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALLQ 0.00001 +#if defined(FFT_SINGLE) +#define ZEROF 0.0f +#else +#define ZEROF 0.0 +#endif + +#define EPS_HOC 1.0e-7 + +/* ---------------------------------------------------------------------- */ + +PPPMCGOMP::PPPMCGOMP(LAMMPS *lmp, int narg, char **arg) : + PPPMCG(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMCGOMP::allocate() +{ + PPPMCG::allocate(); + + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + FFT_SCALAR **rho1d_thr; + memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr"); + ThrData *thr = fix->get_thr(tid); + thr->init_pppm(static_cast(rho1d_thr)); + } + + const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1; + + // reallocate density brick, so it fits our needs + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); +} + +// NOTE: special version of reduce_data for FFT_SCALAR data type. +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// writing to the array. +static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + // this if protects against having more threads than atoms + if (ifrom < nall) { + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} + +/* ---------------------------------------------------------------------- + free memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMCGOMP::deallocate() +{ + PPPMCG::deallocate(); + for (int i=0; i < comm->nthreads; ++i) { + ThrData * thr = fix->get_thr(i); + FFT_SCALAR ** rho1d_thr = static_cast(thr->get_rho1d()); + memory->destroy2d_offset(rho1d_thr,-order/2); + } +} + +/* ---------------------------------------------------------------------- + adjust PPPMCG coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMCGOMP::setup() +{ + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + const double unitkx = (2.0*MY_PI/xprd); + const double unitky = (2.0*MY_PI/yprd); + const double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + // modified (Hockney-Eastwood) Coulomb Green's function + + const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + + const double form = 1.0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m; + double snx,sny,snz,sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + const double gew2 = -4.0*g_ewald*g_ewald; + + const int nnx = nxhi_fft-nxlo_fft+1; + const int nny = nyhi_fft-nylo_fft+1; + + loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads); + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + + const double fkzm = fkz[m]; + snz = sin(0.5*fkzm*zprd_slab/nz_pppm); + snz *= snz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + const double fkyl = fky[l]; + sny = sin(0.5*fkyl*yprd/ny_pppm); + sny *= sny; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + + /* only compute the part designated to this thread */ + nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft)); + if ((nn < nnfrom) || (nn >=nnto)) continue; + + const double fkxk = fkx[k]; + snx = sin(0.5*fkxk*xprd/nx_pppm); + snx *= snx; + + sqk = fkxk*fkxk + fkyl*fkyl + fkzm*fkzm; + + if (sqk != 0.0) { + numerator = form*MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + const double dorder = static_cast(order); + sum1 = 0.0; + for (nx = -nbx; nx <= nbx; nx++) { + qx = fkxk + unitkx*nx_pppm*nx; + sx = exp(qx*qx/gew2); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); + wx *=wx; + + for (ny = -nby; ny <= nby; ny++) { + qy = fkyl + unitky*ny_pppm*ny; + sy = exp(qy*qy/gew2); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); + wy *= wy; + + for (nz = -nbz; nz <= nbz; nz++) { + qz = fkzm + unitkz*nz_pppm*nz; + sz = exp(qz*qz/gew2); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); + wz *= wz; + + dot1 = fkxk*qx + fkyl*qy + fkzm*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[nn] = numerator*sum1/denominator; + } else greensfn[nn] = 0.0; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + run the regular toplevel compute method from plain PPPPMCG + which will have individual methods replaced by our threaded + versions and then call the obligatory force reduction. +------------------------------------------------------------------------- */ + +void PPPMCGOMP::compute(int eflag, int vflag) +{ + + PPPMCG::compute(eflag,vflag); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMCGOMP::make_rho() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = num_charged; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int tid = 0; + const int ifrom = 0; + const int ito = num_charged; +#endif + + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // set up clear 3d density array + const int nzoffs = (nzhi_out-nzlo_out+1)*tid; + FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); + memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); + + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + // this if protects against having more threads than charged local atoms + if (ifrom < num_charged) { + for (j = ifrom; j < ito; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + db[mz][my][mx] += x0*r1d[0][l]; + } + } + } + } + } +#if defined(_OPENMP) + // reduce 3d density array + if (nthreads > 1) { + data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); + } +#endif + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMCGOMP::fieldforce() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = num_charged; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = num_charged; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // this if protects against having more threads than charged local atoms + if (ifrom < num_charged) { + for (j = ifrom; j < ito; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + const double qfactor = qqrd2e*scale*q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMCGOMP::fieldforce_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = num_charged; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = num_charged; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int i,j,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // this if protects against having more threads than charged local atoms + if (ifrom < num_charged) { + for (j = ifrom; j < ito; j++) { + i = is_charged[j]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ +void PPPMCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} diff --git a/src/USER-OMP/pppm_cg_omp.h b/src/USER-OMP/pppm_cg_omp.h new file mode 100644 index 0000000000..10a9d35f29 --- /dev/null +++ b/src/USER-OMP/pppm_cg_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/cg/omp,PPPMCGOMP) + +#else + +#ifndef LMP_PPPM_CG_OMP_H +#define LMP_PPPM_CG_OMP_H + +#include "pppm_cg.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMCGOMP : public PPPMCG, public ThrOMP { + public: + PPPMCGOMP(class LAMMPS *, int, char **); + virtual void compute(int, int); + virtual void setup(); + virtual ~PPPMCGOMP () {}; + + protected: + virtual void allocate(); + virtual void deallocate(); + virtual void fieldforce(); + virtual void fieldforce_peratom(); + virtual void make_rho(); + + void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &); +// void compute_rho_coeff(); +// void slabcorr(int); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp new file mode 100644 index 0000000000..5c400b6f48 --- /dev/null +++ b/src/USER-OMP/pppm_omp.cpp @@ -0,0 +1,611 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "math_const.h" + +#include +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +#define EPS_HOC 1.0e-7 + +/* ---------------------------------------------------------------------- */ + +PPPMOMP::PPPMOMP(LAMMPS *lmp, int narg, char **arg) : + PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOMP::allocate() +{ + PPPM::allocate(); + + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + FFT_SCALAR **rho1d_thr; + memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr"); + ThrData *thr = fix->get_thr(tid); + thr->init_pppm(static_cast(rho1d_thr)); + } + + const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1; + + // reallocate density brick, so it fits our needs + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); +} + +// NOTE: special version of reduce_data for FFT_SCALAR data type. +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// writing to the array. +static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + // this if protects against having more threads than atoms + if (ifrom < nall) { + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} + +/* ---------------------------------------------------------------------- + free memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOMP::deallocate() +{ + PPPM::deallocate(); + for (int i=0; i < comm->nthreads; ++i) { + ThrData * thr = fix->get_thr(i); + FFT_SCALAR ** rho1d_thr = static_cast(thr->get_rho1d()); + memory->destroy2d_offset(rho1d_thr,-order/2); + } +} + +/* ---------------------------------------------------------------------- + adjust PPPM coeffs, called initially and whenever volume has changed +------------------------------------------------------------------------- */ + +void PPPMOMP::setup() +{ + int i,j,k,n; + double *prd; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + const double zprd_slab = zprd*slab_volfactor; + volume = xprd * yprd * zprd_slab; + + delxinv = nx_pppm/xprd; + delyinv = ny_pppm/yprd; + delzinv = nz_pppm/zprd_slab; + + delvolinv = delxinv*delyinv*delzinv; + + const double unitkx = (2.0*MY_PI/xprd); + const double unitky = (2.0*MY_PI/yprd); + const double unitkz = (2.0*MY_PI/zprd_slab); + + // fkx,fky,fkz for my FFT grid pts + + double per; + + for (i = nxlo_fft; i <= nxhi_fft; i++) { + per = i - nx_pppm*(2*i/nx_pppm); + fkx[i] = unitkx*per; + } + + for (i = nylo_fft; i <= nyhi_fft; i++) { + per = i - ny_pppm*(2*i/ny_pppm); + fky[i] = unitky*per; + } + + for (i = nzlo_fft; i <= nzhi_fft; i++) { + per = i - nz_pppm*(2*i/nz_pppm); + fkz[i] = unitkz*per; + } + + // virial coefficients + + double sqk,vterm; + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) { + for (j = nylo_fft; j <= nyhi_fft; j++) { + for (i = nxlo_fft; i <= nxhi_fft; i++) { + sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k]; + if (sqk == 0.0) { + vg[n][0] = 0.0; + vg[n][1] = 0.0; + vg[n][2] = 0.0; + vg[n][3] = 0.0; + vg[n][4] = 0.0; + vg[n][5] = 0.0; + } else { + vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald)); + vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i]; + vg[n][1] = 1.0 + vterm*fky[j]*fky[j]; + vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k]; + vg[n][3] = vterm*fkx[i]*fky[j]; + vg[n][4] = vterm*fkx[i]*fkz[k]; + vg[n][5] = vterm*fky[j]*fkz[k]; + } + n++; + } + } + } + + // modified (Hockney-Eastwood) Coulomb Green's function + + const int nbx = static_cast ((g_ewald*xprd/(MY_PI*nx_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nby = static_cast ((g_ewald*yprd/(MY_PI*ny_pppm)) * + pow(-log(EPS_HOC),0.25)); + const int nbz = static_cast ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * + pow(-log(EPS_HOC),0.25)); + + const double form = 1.0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m; + double snx,sny,snz,sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + const double gew2 = -4.0*g_ewald*g_ewald; + + const int nnx = nxhi_fft-nxlo_fft+1; + const int nny = nyhi_fft-nylo_fft+1; + + loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads); + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + + const double fkzm = fkz[m]; + snz = sin(0.5*fkzm*zprd_slab/nz_pppm); + snz *= snz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + const double fkyl = fky[l]; + sny = sin(0.5*fkyl*yprd/ny_pppm); + sny *= sny; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + + /* only compute the part designated to this thread */ + nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft)); + if ((nn < nnfrom) || (nn >=nnto)) continue; + + const double fkxk = fkx[k]; + snx = sin(0.5*fkxk*xprd/nx_pppm); + snx *= snx; + + sqk = fkxk*fkxk + fkyl*fkyl + fkzm*fkzm; + + if (sqk != 0.0) { + numerator = form*MY_4PI/sqk; + denominator = gf_denom(snx,sny,snz); + sum1 = 0.0; + const double dorder = static_cast(order); + for (nx = -nbx; nx <= nbx; nx++) { + qx = fkxk + unitkx*nx_pppm*nx; + sx = exp(qx*qx/gew2); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,dorder); + wx *=wx; + + for (ny = -nby; ny <= nby; ny++) { + qy = fkyl + unitky*ny_pppm*ny; + sy = exp(qy*qy/gew2); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,dorder); + wy *= wy; + + for (nz = -nbz; nz <= nbz; nz++) { + qz = fkzm + unitkz*nz_pppm*nz; + sz = exp(qz*qz/gew2); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,dorder); + wz *= wz; + + dot1 = fkxk*qx + fkyl*qy + fkzm*qz; + dot2 = qx*qx+qy*qy+qz*qz; + sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; + } + } + } + greensfn[nn] = numerator*sum1/denominator; + } else greensfn[nn] = 0.0; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + run the regular toplevel compute method from plain PPPPM + which will have individual methods replaced by our threaded + versions and then call the obligatory force reduction. +------------------------------------------------------------------------- */ + +void PPPMOMP::compute(int eflag, int vflag) +{ + + PPPM::compute(eflag,vflag); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMOMP::make_rho() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int tid = 0; + const int ifrom = 0; + const int ito = nlocal; +#endif + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // set up clear 3d density array + const int nzoffs = (nzhi_out-nzlo_out+1)*tid; + FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); + memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); + + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + db[mz][my][mx] += x0*r1d[0][l]; + } + } + } + } + } +#if defined(_OPENMP) + // reduce 3d density array + if (nthreads > 1) { + data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); + } +#endif + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMOMP::fieldforce() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + const double qfactor = qqrd2e*scale*q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMOMP::fieldforce_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + if (eflag_atom) eatom[i] += q[i]*u; + if (vflag_atom) { + vatom[i][0] += v0; + vatom[i][1] += v1; + vatom[i][2] += v2; + vatom[i][3] += v3; + vatom[i][4] += v4; + vatom[i][5] += v5; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ +void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} diff --git a/src/USER-OMP/pppm_omp.h b/src/USER-OMP/pppm_omp.h new file mode 100644 index 0000000000..b3c069844c --- /dev/null +++ b/src/USER-OMP/pppm_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/omp,PPPMOMP) + +#else + +#ifndef LMP_PPPM_OMP_H +#define LMP_PPPM_OMP_H + +#include "pppm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMOMP : public PPPM, public ThrOMP { + public: + PPPMOMP(class LAMMPS *, int, char **); + virtual ~PPPMOMP () {}; + virtual void setup(); + virtual void compute(int, int); + + protected: + virtual void allocate(); + virtual void deallocate(); + virtual void fieldforce(); + virtual void fieldforce_peratom(); + virtual void make_rho(); + + void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &); +// void compute_rho_coeff(); +// void slabcorr(int); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_proxy.cpp b/src/USER-OMP/pppm_proxy.cpp new file mode 100644 index 0000000000..25c2158a08 --- /dev/null +++ b/src/USER-OMP/pppm_proxy.cpp @@ -0,0 +1,64 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_proxy.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PPPMProxy::PPPMProxy(LAMMPS *lmp, int narg, char **arg) : + PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) { need_setup=1; } + +/* ---------------------------------------------------------------------- */ + +void PPPMProxy::setup_proxy() +{ + if (need_setup) + PPPM::setup(); + + need_setup = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PPPMProxy::compute(int eflag, int vflag) +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag,vflag,thr); + } +} + +/* ---------------------------------------------------------------------- */ + +void PPPMProxy::compute_proxy(int eflag, int vflag) +{ + setup_proxy(); + PPPM::compute(eflag,vflag); +} diff --git a/src/USER-OMP/pppm_proxy.h b/src/USER-OMP/pppm_proxy.h new file mode 100644 index 0000000000..110abfc9da --- /dev/null +++ b/src/USER-OMP/pppm_proxy.h @@ -0,0 +1,47 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/proxy,PPPMProxy) + +#else + +#ifndef LMP_PPPM_PROXY_H +#define LMP_PPPM_PROXY_H + +#include "pppm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMProxy : public PPPM, public ThrOMP { + public: + PPPMProxy(class LAMMPS *, int, char **); + virtual ~PPPMProxy () {}; + + virtual void compute(int, int); + virtual void compute_proxy(int eflag, int vflag); + + // setup is delayed until the compute proxy is called + virtual void setup() { need_setup=1; }; + virtual void setup_proxy(); + + protected: + int need_setup; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp new file mode 100644 index 0000000000..e59275239b --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_omp.cpp @@ -0,0 +1,402 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_tip4p_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "math_const.h" + +#include +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define OFFSET 16384 + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) : + PPPMOMP(lmp, narg, arg) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- */ + +// NOTE: special version of reduce_data for FFT_SCALAR data type. +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// writing to the array. +static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + // this if protects against having more threads than atoms + if (ifrom < nall) { + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4POMP::init() +{ + // TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms + + if (force->newton == 0) + error->all(FLERR,"Kspace style pppm/tip4p/omp requires newton on"); + + PPPMOMP::init(); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::particle_map() +{ + const int * const type = atom->type; + const double * const * const x = atom->x; + const int nlocal = atom->nlocal; + + int i, flag = 0; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static) +#endif + for (i = 0; i < nlocal; i++) { + int nx,ny,nz,iH1,iH2; + double xM[3]; + + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + } else { + xM[0] = x[i][0]; + xM[1] = x[i][1]; + xM[2] = x[i][2]; + } + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((xM[0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((xM[1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((xM[2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::make_rho() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + const int * const type = atom->type; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int tid = 0; + const int ifrom = 0; + const int ito = nlocal; +#endif + + int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + double xM[3]; + + // set up clear 3d density array + const int nzoffs = (nzhi_out-nzlo_out+1)*tid; + FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); + memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); + + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + } else { + xM[0] = x[i][0]; + xM[1] = x[i][1]; + xM[2] = x[i][2]; + } + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + db[mz][my][mx] += x0*r1d[0][l]; + } + } + } + } + } +#if defined(_OPENMP) + // reduce 3d density array + if (nthreads > 1) { + data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); + } +#endif + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::fieldforce() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int * const type = atom->type; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + int iH1,iH2; + double xM[3], fx,fy,fz; + double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + } else { + xM[0] = x[i][0]; + xM[1] = x[i][1]; + xM[2] = x[i][2]; + } + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + const double qfactor = qqrd2e*scale*q[i]; + + if (type[i] != typeO) { + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + + } else { + fx = qfactor * ekx; + fy = qfactor * eky; + fz = qfactor * ekz; + find_M(i,iH1,iH2,xM); + + rOMx = xM[0] - x[i][0]; + rOMy = xM[1] - x[i][1]; + rOMz = xM[2] - x[i][2]; + + ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist); + + f1x = ddotf * rOMx; + f1y = ddotf * rOMy; + f1z = ddotf * rOMz; + + f[i][0] += fx - alpha * (fx - f1x); + f[i][1] += fy - alpha * (fy - f1y); + f[i][2] += fz - alpha * (fz - f1z); + + f[iH1][0] += 0.5*alpha*(fx - f1x); + f[iH1][1] += 0.5*alpha*(fy - f1y); + f[iH1][2] += 0.5*alpha*(fz - f1z); + + f[iH2][0] += 0.5*alpha*(fx - f1x); + f[iH2][1] += 0.5*alpha*(fy - f1y); + f[iH2][2] += 0.5*alpha*(fz - f1z); + } + } + } + } +} + +/* ---------------------------------------------------------------------- + find 2 H atoms bonded to O atom i + compute position xM of fictitious charge site for O atom + also return local indices iH1,iH2 of H atoms +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::find_M(int i, int &iH1, int &iH2, double *xM) +{ + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + + if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + + const double * const * const x = atom->x; + + double delx1 = x[iH1][0] - x[i][0]; + double dely1 = x[iH1][1] - x[i][1]; + double delz1 = x[iH1][2] - x[i][2]; + domain->minimum_image(delx1,dely1,delz1); + + double delx2 = x[iH2][0] - x[i][0]; + double dely2 = x[iH2][1] - x[i][1]; + double delz2 = x[iH2][2] - x[i][2]; + domain->minimum_image(delx2,dely2,delz2); + + xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2); + xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2); + xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2); +} diff --git a/src/USER-OMP/pppm_tip4p_omp.h b/src/USER-OMP/pppm_tip4p_omp.h new file mode 100644 index 0000000000..11801ccb35 --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_omp.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/tip4p/omp,PPPMTIP4POMP) + +#else + +#ifndef LMP_PPPM_TIP4P_OMP_H +#define LMP_PPPM_TIP4P_OMP_H + +#include "pppm_omp.h" + +namespace LAMMPS_NS { + +class PPPMTIP4POMP : public PPPMOMP { + public: + PPPMTIP4POMP(class LAMMPS *, int, char **); + virtual ~PPPMTIP4POMP () {}; + virtual void init(); + + protected: + virtual void particle_map(); + virtual void fieldforce(); + virtual void make_rho(); + + private: + void find_M(int, int &, int &, double *); + +// void slabcorr(int); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Kspace style pppm/tip4p/omp requires newton on + +Self-explanatory. + +*/ diff --git a/src/USER-OMP/pppm_tip4p_proxy.cpp b/src/USER-OMP/pppm_tip4p_proxy.cpp new file mode 100644 index 0000000000..db851dec5b --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_proxy.cpp @@ -0,0 +1,64 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_tip4p_proxy.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PPPMTIP4PProxy::PPPMTIP4PProxy(LAMMPS *lmp, int narg, char **arg) : + PPPMTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) { need_setup=1; } + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4PProxy::setup_proxy() +{ + if (need_setup) + PPPMTIP4P::setup(); + + need_setup=0; +} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4PProxy::compute(int eflag, int vflag) +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag,vflag,thr); + } +} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4PProxy::compute_proxy(int eflag, int vflag) +{ + setup_proxy(); + PPPMTIP4P::compute(eflag,vflag); +} diff --git a/src/USER-OMP/pppm_tip4p_proxy.h b/src/USER-OMP/pppm_tip4p_proxy.h new file mode 100644 index 0000000000..0a670d8b63 --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_proxy.h @@ -0,0 +1,47 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/tip4p/proxy,PPPMTIP4PProxy) + +#else + +#ifndef LMP_PPPM_TIP4P_PROXY_H +#define LMP_PPPM_TIP4P_PROXY_H + +#include "pppm_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMTIP4PProxy : public PPPMTIP4P, public ThrOMP { + public: + PPPMTIP4PProxy(class LAMMPS *, int, char **); + virtual ~PPPMTIP4PProxy () {}; + + virtual void compute(int, int); + virtual void compute_proxy(int eflag, int vflag); + + // setup is delayed until the compute_proxy is called + virtual void setup() { need_setup=1; }; + virtual void setup_proxy(); + + protected: + int need_setup; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp new file mode 100644 index 0000000000..cebed27721 --- /dev/null +++ b/src/USER-OMP/thr_data.cpp @@ -0,0 +1,231 @@ +/* ------------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + per-thread data management for LAMMPS + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "thr_data.h" + +#include +#include + +using namespace LAMMPS_NS; + + +ThrData::ThrData(int tid) + : _f(NULL), _torque(NULL), _erforce(NULL), _de(NULL), _drho(NULL), _mu(NULL), + _lambda(NULL), _rhoB(NULL), _D_values(NULL), _rho(NULL), _fp(NULL), + _rho1d(NULL), _tid(tid) +{ + // nothing else to do here. +} + + +/* ---------------------------------------------------------------------- */ + +void ThrData::check_tid(int tid) +{ + if (tid != _tid) + fprintf(stderr,"WARNING: external and internal tid mismatch %d != %d\n",tid,_tid); +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_force(int nall, double **f, double **torque, + double *erforce, double *de, double *drho) +{ + eng_vdwl=eng_coul=eng_bond=eng_angle=eng_dihed=eng_imprp=eng_kspce=0.0; + memset(virial_pair,0,6*sizeof(double)); + memset(virial_bond,0,6*sizeof(double)); + memset(virial_angle,0,6*sizeof(double)); + memset(virial_dihed,0,6*sizeof(double)); + memset(virial_imprp,0,6*sizeof(double)); + memset(virial_kspce,0,6*sizeof(double)); + + eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL; + vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL; + + _f = f + _tid*nall; + if (nall > 0) + memset(&(_f[0][0]),0,nall*3*sizeof(double)); + + if (torque) { + _torque = torque + _tid*nall; + if (nall > 0) + memset(&(_torque[0][0]),0,nall*3*sizeof(double)); + } else _torque = NULL; + + if (erforce) { + _erforce = erforce + _tid*nall; + if (nall > 0) + memset(&(_erforce[0]),0,nall*sizeof(double)); + } else _erforce = NULL; + + if (de) { + _de = de + _tid*nall; + if (nall > 0) + memset(&(_de[0]),0,nall*sizeof(double)); + } else _de = NULL; + + if (drho) { + _drho = drho + _tid*nall; + if (nall > 0) + memset(&(_drho[0]),0,nall*sizeof(double)); + } else _drho = NULL; +} + +/* ---------------------------------------------------------------------- + set up and clear out locally managed per atom arrays +------------------------------------------------------------------------- */ + +void ThrData::init_eam(int nall, double *rho) +{ + _rho = rho + _tid*nall; + if (nall > 0) + memset(_rho, 0, nall*sizeof(double)); +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_adp(int nall, double *rho, double **mu, double **lambda) +{ + init_eam(nall, rho); + + _mu = mu + _tid*nall; + _lambda = lambda + _tid*nall; + if (nall > 0) { + memset(&(_mu[0][0]), 0, nall*3*sizeof(double)); + memset(&(_lambda[0][0]), 0, nall*6*sizeof(double)); + } +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_cdeam(int nall, double *rho, double *rhoB, double *D_values) +{ + init_eam(nall, rho); + + _rhoB = rhoB + _tid*nall; + _D_values = D_values + _tid*nall; + if (nall > 0) { + memset(_rhoB, 0, nall*sizeof(double)); + memset(_D_values, 0, nall*sizeof(double)); + } +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_eim(int nall, double *rho, double *fp) +{ + init_eam(nall, rho); + + _fp = fp + _tid*nall; + if (nall > 0) + memset(_fp,0,nall*sizeof(double)); +} + +/* ---------------------------------------------------------------------- + compute global pair virial via summing F dot r over own & ghost atoms + at this point, only pairwise forces have been accumulated in atom->f +------------------------------------------------------------------------- */ + +void ThrData::virial_fdotr_compute(double **x, int nlocal, int nghost, int nfirst) +{ + + // sum over force on all particles including ghosts + + if (nfirst < 0) { + int nall = nlocal + nghost; + for (int i = 0; i < nall; i++) { + virial_pair[0] += _f[i][0]*x[i][0]; + virial_pair[1] += _f[i][1]*x[i][1]; + virial_pair[2] += _f[i][2]*x[i][2]; + virial_pair[3] += _f[i][1]*x[i][0]; + virial_pair[4] += _f[i][2]*x[i][0]; + virial_pair[5] += _f[i][2]*x[i][1]; + } + + // neighbor includegroup flag is set + // sum over force on initial nfirst particles and ghosts + + } else { + int nall = nfirst; + for (int i = 0; i < nall; i++) { + virial_pair[0] += _f[i][0]*x[i][0]; + virial_pair[1] += _f[i][1]*x[i][1]; + virial_pair[2] += _f[i][2]*x[i][2]; + virial_pair[3] += _f[i][1]*x[i][0]; + virial_pair[4] += _f[i][2]*x[i][0]; + virial_pair[5] += _f[i][2]*x[i][1]; + } + nall = nlocal + nghost; + for (int i = nlocal; i < nall; i++) { + virial_pair[0] += _f[i][0]*x[i][0]; + virial_pair[1] += _f[i][1]*x[i][1]; + virial_pair[2] += _f[i][2]*x[i][2]; + virial_pair[3] += _f[i][1]*x[i][0]; + virial_pair[4] += _f[i][2]*x[i][0]; + virial_pair[5] += _f[i][2]*x[i][1]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double ThrData::memory_usage() +{ + double bytes = (7 + 6*6) * sizeof(double); + bytes += 2 * sizeof(double*); + bytes += 4 * sizeof(int); + + return bytes; +} + +/* additional helper functions */ + +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// with writing to the array . +void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + // this if protects against having more threads than atoms + if (ifrom < nvals) { + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} diff --git a/src/USER-OMP/thr_data.h b/src/USER-OMP/thr_data.h new file mode 100644 index 0000000000..6345a203c7 --- /dev/null +++ b/src/USER-OMP/thr_data.h @@ -0,0 +1,132 @@ +/* -*- c++ -*- ------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifndef LMP_THR_DATA_H +#define LMP_THR_DATA_H + +#if defined(_OPENMP) +#include +#endif + +namespace LAMMPS_NS { + +// per thread data accumulators +// there should be one instance +// of this class for each thread. +class ThrData { + friend class FixOMP; + friend class ThrOMP; + + public: + ThrData(int tid); + ~ThrData() {}; + + void check_tid(int); // thread id consistency check + int get_tid() const { return _tid; }; // our thread id. + + // erase accumulator contents and hook up force arrays + void init_force(int, double **, double **, double *, double *, double *); + + // give access to per-thread offset arrays + double **get_f() const { return _f; }; + double **get_torque() const { return _torque; }; + double *get_de() const { return _de; }; + double *get_drho() const { return _drho; }; + + // setup and erase per atom arrays + void init_adp(int, double *, double **, double **); // ADP (+ EAM) + void init_cdeam(int, double *, double *, double *); // CDEAM (+ EAM) + void init_eam(int, double *); // EAM + void init_eim(int, double *, double *); // EIM (+ EAM) + + void init_pppm(void *r1d) { _rho1d = r1d; }; + + // access methods for arrays that we handle in this class + double **get_lambda() const { return _lambda; }; + double **get_mu() const { return _mu; }; + double *get_D_values() const { return _D_values; }; + double *get_fp() const { return _fp; }; + double *get_rho() const { return _rho; }; + double *get_rhoB() const { return _rhoB; }; + void *get_rho1d() const { return _rho1d; }; + + private: + double eng_vdwl; // non-bonded non-coulomb energy + double eng_coul; // non-bonded coulomb energy + double eng_bond; // bond energy + double eng_angle; // angle energy + double eng_dihed; // dihedral energy + double eng_imprp; // improper energy + double eng_kspce; // kspace energy + double virial_pair[6]; // virial contribution from non-bonded + double virial_bond[6]; // virial contribution from bonds + double virial_angle[6]; // virial contribution from angles + double virial_dihed[6]; // virial contribution from dihedrals + double virial_imprp[6]; // virial contribution from impropers + double virial_kspce[6]; // virial contribution from kspace + double *eatom_pair; + double *eatom_bond; + double *eatom_angle; + double *eatom_dihed; + double *eatom_imprp; + double *eatom_kspce; + double **vatom_pair; + double **vatom_bond; + double **vatom_angle; + double **vatom_dihed; + double **vatom_imprp; + double **vatom_kspce; + + // per thread segments of various force or similar arrays + + // these are maintained by atom styles + double **_f; + double **_torque; + double *_erforce; + double *_de; + double *_drho; + + // these are maintained by individual pair styles + double **_mu, **_lambda; // ADP (+ EAM) + double *_rhoB, *_D_values; // CDEAM (+ EAM) + double *_rho; // EAM + double *_fp; // EIM (+ EAM) + + // this is for pppm/omp + void *_rho1d; + + // my thread id + const int _tid; + + public: + // compute global per thread virial contribution from global forces and positions + void virial_fdotr_compute(double **, int, int, int); + + double memory_usage(); + + // disabled default methods + private: + ThrData() : _tid(-1) {}; +}; + +//////////////////////////////////////////////////////////////////////// +// helper functions operating on data replicated for thread support // +//////////////////////////////////////////////////////////////////////// +// generic per thread data reduction for continous arrays of nthreads*nmax size +void data_reduce_thr(double *, int, int, int, int); +} +#endif diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp new file mode 100644 index 0000000000..f42b4d327f --- /dev/null +++ b/src/USER-OMP/thr_omp.cpp @@ -0,0 +1,1195 @@ +/* ------------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + OpenMP based threading support for LAMMPS + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" + +#include "thr_omp.h" + +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +ThrOMP::ThrOMP(LAMMPS *ptr, int style) + : lmp(ptr), fix(NULL), thr_style(style), thr_error(0) +{ + // register fix omp with this class + int ifix = lmp->modify->find_fix("package_omp"); + if (ifix < 0) + lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles"); + fix = static_cast(lmp->modify->fix[ifix]); +} + +/* ---------------------------------------------------------------------- */ + +ThrOMP::~ThrOMP() +{ + // nothing to do? +} + +/* ---------------------------------------------------------------------- + Hook up per thread per atom arrays into the tally infrastructure + ---------------------------------------------------------------------- */ + +void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom, + double **vatom, ThrData *thr) +{ + const int tid = thr->get_tid(); + if (tid == 0) thr_error = 0; + + if (thr_style & THR_PAIR) { + if (eflag & 2) { + thr->eatom_pair = eatom + tid*nall; + if (nall > 0) + memset(&(thr->eatom_pair[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_pair = vatom + tid*nall; + if (nall > 0) + memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double)); + } + } + + if (thr_style & THR_BOND) { + if (eflag & 2) { + thr->eatom_bond = eatom + tid*nall; + if (nall > 0) + memset(&(thr->eatom_bond[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_bond = vatom + tid*nall; + if (nall > 0) + memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double)); + } + } + + if (thr_style & THR_ANGLE) { + if (eflag & 2) { + thr->eatom_angle = eatom + tid*nall; + if (nall > 0) + memset(&(thr->eatom_angle[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_angle = vatom + tid*nall; + if (nall > 0) + memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double)); + } + } + + if (thr_style & THR_DIHEDRAL) { + if (eflag & 2) { + thr->eatom_dihed = eatom + tid*nall; + if (nall > 0) + memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_dihed = vatom + tid*nall; + if (nall > 0) + memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double)); + } + } + + if (thr_style & THR_IMPROPER) { + if (eflag & 2) { + thr->eatom_imprp = eatom + tid*nall; + if (nall > 0) + memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double)); + } + if (vflag & 4) { + thr->vatom_imprp = vatom + tid*nall; + if (nall > 0) + memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double)); + } + } + + // nothing to do for THR_KSPACE +} + +/* ---------------------------------------------------------------------- + Reduce per thread data into the regular structures + Reduction of global properties is serialized with a "critical" + directive, so that only one thread at a time will access the + global variables. Since we are not synchronized, this should + come with little overhead. The reduction of per-atom properties + in contrast is parallelized over threads in the same way as forces. + ---------------------------------------------------------------------- */ + +void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag, + ThrData *const thr, const int nproxy) +{ + const int nlocal = lmp->atom->nlocal; + const int nghost = lmp->atom->nghost; + const int nall = nlocal + nghost; + const int nfirst = lmp->atom->nfirst; + const int nthreads = lmp->comm->nthreads; + const int evflag = eflag | vflag; + + const int tid = thr->get_tid(); + double **f = lmp->atom->f; + double **x = lmp->atom->x; + + int need_force_reduce = 1; + + if (evflag) + sync_threads(); + + switch (thr_style) { + + case THR_PAIR: { + Pair * const pair = lmp->force->pair; + + if (pair->vflag_fdotr) { + + if (style == fix->last_pair_hybrid) { + // pair_style hybrid will compute fdotr for us + // but we first need to reduce the forces + data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + need_force_reduce = 0; + } + + // this is a non-hybrid pair style. compute per thread fdotr + if (fix->last_pair_hybrid == NULL) { + if (lmp->neighbor->includegroup == 0) + thr->virial_fdotr_compute(x, nlocal, nghost, -1); + else + thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } + } + + if (evflag) { +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + pair->virial[i] += thr->virial_pair[i]; + thr->virial_pair[i] = 0.0; + } + } + if (eflag & 2) { + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } + } + break; + + case THR_PAIR|THR_PROXY: { + Pair * const pair = lmp->force->pair; + + if (tid >= nproxy && pair->vflag_fdotr) { + + if (fix->last_pair_hybrid) { + if (tid == nproxy) + lmp->error->all(FLERR, + "Cannot use hybrid pair style with kspace proxy"); + else return; + } + + // this is a non-hybrid pair style. compute per thread fdotr + if (fix->last_pair_hybrid == NULL) { + if (lmp->neighbor->includegroup == 0) + thr->virial_fdotr_compute(x, nlocal, nghost, -1); + else + thr->virial_fdotr_compute(x, nlocal, nghost, nfirst); + } + } + + if (evflag) { +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (tid < nproxy) { + // nothing to collect for kspace proxy threads + // just reset pair accumulators to 0.0. + if (eflag & 1) { + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + thr->virial_pair[i] = 0.0; + } + } else { + if (eflag & 1) { + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + if (vflag & 3) + for (int i=0; i < 6; ++i) { + pair->virial[i] += thr->virial_pair[i]; + thr->virial_pair[i] = 0.0; + } + } + } + if (eflag & 2) { + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } + } + break; + + case THR_BOND: + + if (evflag) { + Bond * const bond = lmp->force->bond; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + bond->energy += thr->eng_bond; + thr->eng_bond = 0.0; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + bond->virial[i] += thr->virial_bond[i]; + thr->virial_bond[i] = 0.0; + } + } + } + + if (eflag & 2) { + data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid); + } + + } + break; + + case THR_ANGLE: + + if (evflag) { + Angle * const angle = lmp->force->angle; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + angle->energy += thr->eng_angle; + thr->eng_angle = 0.0; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + angle->virial[i] += thr->virial_angle[i]; + thr->virial_angle[i] = 0.0; + } + } + } + + if (eflag & 2) { + data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid); + } + + } + break; + + case THR_DIHEDRAL: + + if (evflag) { + Dihedral * const dihedral = lmp->force->dihedral; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + dihedral->energy += thr->eng_dihed; + thr->eng_dihed = 0.0; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + dihedral->virial[i] += thr->virial_dihed[i]; + thr->virial_dihed[i] = 0.0; + } + } + } + + if (eflag & 2) { + data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); + } + + } + break; + + case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals + + if (evflag) { + Dihedral * const dihedral = lmp->force->dihedral; + Pair * const pair = lmp->force->pair; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + dihedral->energy += thr->eng_dihed; + pair->eng_vdwl += thr->eng_vdwl; + pair->eng_coul += thr->eng_coul; + thr->eng_dihed = 0.0; + thr->eng_vdwl = 0.0; + thr->eng_coul = 0.0; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + dihedral->virial[i] += thr->virial_dihed[i]; + pair->virial[i] += thr->virial_pair[i]; + thr->virial_dihed[i] = 0.0; + thr->virial_pair[i] = 0.0; + } + } + } + + if (eflag & 2) { + data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid); + data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid); + data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid); + } + } + break; + + case THR_IMPROPER: + + if (evflag) { + Improper *improper = lmp->force->improper; +#if defined(_OPENMP) +#pragma omp critical +#endif + { + if (eflag & 1) { + improper->energy += thr->eng_imprp; + thr->eng_imprp = 0.0; + } + + if (vflag & 3) { + for (int i=0; i < 6; ++i) { + improper->virial[i] += thr->virial_imprp[i]; + thr->virial_imprp[i] = 0.0; + } + } + } + + if (eflag & 2) { + data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid); + } + if (vflag & 4) { + data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid); + } + + } + break; + + case THR_KSPACE|THR_PROXY: // fallthrough + case THR_KSPACE: + // nothing to do + break; + + default: + printf("tid:%d unhandled thr_style case %d\n", tid, thr_style); + break; + } + + if (style == fix->last_omp_style) { + if (need_force_reduce) + data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid); + + if (lmp->atom->torque) + data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid); + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and eng_coul into per thread global and per-atom accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double evdwl, const double ecoul, ThrData * const thr) +{ + if (pair->eflag_global) { + if (newton_pair) { + thr->eng_vdwl += evdwl; + thr->eng_coul += ecoul; + } else { + const double evdwlhalf = 0.5*evdwl; + const double ecoulhalf = 0.5*ecoul; + if (i < nlocal) { + thr->eng_vdwl += evdwlhalf; + thr->eng_coul += ecoulhalf; + } + if (j < nlocal) { + thr->eng_vdwl += evdwlhalf; + thr->eng_coul += ecoulhalf; + } + } + } + if (pair->eflag_atom) { + const double epairhalf = 0.5 * (evdwl + ecoul); + if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf; + if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf; + } +} + +/* helper functions */ +static void v_tally(double * const vout, const double * const vin) +{ + vout[0] += vin[0]; + vout[1] += vin[1]; + vout[2] += vin[2]; + vout[3] += vin[3]; + vout[4] += vin[4]; + vout[5] += vin[5]; +} + +static void v_tally(double * const vout, const double scale, const double * const vin) +{ + vout[0] += scale*vin[0]; + vout[1] += scale*vin[1]; + vout[2] += scale*vin[2]; + vout[3] += scale*vin[3]; + vout[4] += scale*vin[4]; + vout[5] += scale*vin[5]; +} + +/* ---------------------------------------------------------------------- + tally virial into per thread global and per-atom accumulators +------------------------------------------------------------------------- */ +void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double * const v, ThrData * const thr) +{ + if (pair->vflag_global) { + double * const va = thr->virial_pair; + if (newton_pair) { + v_tally(va,v); + } else { + if (i < nlocal) v_tally(va,0.5,v); + if (j < nlocal) v_tally(va,0.5,v); + } + } + + if (pair->vflag_atom) { + if (newton_pair || i < nlocal) { + double * const va = thr->vatom_pair[i]; + v_tally(va,0.5,v); + } + if (newton_pair || j < nlocal) { + double * const va = thr->vatom_pair[j]; + v_tally(va,0.5,v); + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into per thread global and per-atom accumulators + need i < nlocal test since called by bond_quartic and dihedral_charmm +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal, + const int newton_pair, const double evdwl, const double ecoul, + const double fpair, const double delx, const double dely, + const double delz, ThrData * const thr) +{ + + if (pair->eflag_either) + e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); + + if (pair->vflag_either) { + double v[6]; + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + for virial, have delx,dely,delz and fx,fy,fz +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j, + const int nlocal, const int newton_pair, + const double evdwl, const double ecoul, + const double fx, const double fy, const double fz, + const double delx, const double dely, const double delz, + ThrData * const thr) +{ + + if (pair->eflag_either) + e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr); + + if (pair->vflag_either) { + double v[6]; + v[0] = delx*fx; + v[1] = dely*fy; + v[2] = delz*fz; + v[3] = delx*fy; + v[4] = delx*fz; + v[5] = dely*fz; + + v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr); + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + called by SW and hbond potentials, newton_pair is always on + virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk + ------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k, + const double evdwl, const double ecoul, + const double * const fj, const double * const fk, + const double * const drji, const double * const drki, + ThrData * const thr) +{ + if (pair->eflag_either) { + if (pair->eflag_global) { + thr->eng_vdwl += evdwl; + thr->eng_coul += ecoul; + } + if (pair->eflag_atom) { + const double epairthird = THIRD * (evdwl + ecoul); + thr->eatom_pair[i] += epairthird; + thr->eatom_pair[j] += epairthird; + thr->eatom_pair[k] += epairthird; + } + } + + if (pair->vflag_either) { + double v[6]; + + v[0] = drji[0]*fj[0] + drki[0]*fk[0]; + v[1] = drji[1]*fj[1] + drki[1]*fk[1]; + v[2] = drji[2]*fj[2] + drki[2]*fk[2]; + v[3] = drji[0]*fj[1] + drki[0]*fk[1]; + v[4] = drji[0]*fj[2] + drki[0]*fk[2]; + v[5] = drji[1]*fj[2] + drki[1]*fk[2]; + + if (pair->vflag_global) v_tally(thr->virial_pair,v); + + if (pair->vflag_atom) { + v_tally(thr->vatom_pair[i],THIRD,v); + v_tally(thr->vatom_pair[j],THIRD,v); + v_tally(thr->vatom_pair[k],THIRD,v); + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + called by AIREBO potential, newton_pair is always on + ------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j, + const int k, const int m, const double evdwl, + const double * const fi, const double * const fj, + const double * const fk, const double * const drim, + const double * const drjm, const double * const drkm, + ThrData * const thr) +{ + double v[6]; + + if (pair->eflag_either) { + if (pair->eflag_global) thr->eng_vdwl += evdwl; + if (pair->eflag_atom) { + const double epairfourth = 0.25 * evdwl; + thr->eatom_pair[i] += epairfourth; + thr->eatom_pair[j] += epairfourth; + thr->eatom_pair[k] += epairfourth; + thr->eatom_pair[m] += epairfourth; + } + } + + if (pair->vflag_atom) { + v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); + v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); + v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); + v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); + v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); + v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); + + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); + v_tally(thr->vatom_pair[m],v); + } +} + +/* ---------------------------------------------------------------------- + tally ecoul and virial into each of n atoms in list + called by TIP4P potential, newton_pair is always on + changes v values by dividing by n + ------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_list_thr(Pair * const pair, const int n, + const int * const list, const double ecoul, + const double * const v, ThrData * const thr) +{ + if (pair->eflag_either) { + if (pair->eflag_global) thr->eng_coul += ecoul; + if (pair->eflag_atom) { + double epairatom = ecoul/static_cast(n); + for (int i = 0; i < n; i++) thr->eatom_pair[list[i]] += epairatom; + } + } + + if (pair->vflag_either) { + if (pair->vflag_global) + v_tally(thr->virial_pair,v); + + if (pair->vflag_atom) { + const double s = 1.0/static_cast(n); + double vtmp[6]; + + vtmp[0] = s * v[0]; + vtmp[1] = s * v[1]; + vtmp[2] = s * v[2]; + vtmp[3] = s * v[3]; + vtmp[4] = s * v[4]; + vtmp[5] = s * v[5]; + + for (int i = 0; i < n; i++) { + const int j = list[i]; + v_tally(thr->vatom_pair[j],vtmp); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal, + const int newton_bond, const double ebond, const double fbond, + const double delx, const double dely, const double delz, + ThrData * const thr) +{ + if (bond->eflag_either) { + const double ebondhalf = 0.5*ebond; + if (newton_bond) { + if (bond->eflag_global) + thr->eng_bond += ebond; + if (bond->eflag_atom) { + thr->eatom_bond[i] += ebondhalf; + thr->eatom_bond[j] += ebondhalf; + } + } else { + if (bond->eflag_global) { + if (i < nlocal) thr->eng_bond += ebondhalf; + if (j < nlocal) thr->eng_bond += ebondhalf; + } + if (bond->eflag_atom) { + if (i < nlocal) thr->eatom_bond[i] += ebondhalf; + if (j < nlocal) thr->eatom_bond[j] += ebondhalf; + } + } + } + + if (bond->vflag_either) { + double v[6]; + + v[0] = delx*delx*fbond; + v[1] = dely*dely*fbond; + v[2] = delz*delz*fbond; + v[3] = delx*dely*fbond; + v[4] = delx*delz*fbond; + v[5] = dely*delz*fbond; + + if (bond->vflag_global) { + if (newton_bond) + v_tally(thr->virial_bond,v); + else { + if (i < nlocal) + v_tally(thr->virial_bond,0.5,v); + if (j < nlocal) + v_tally(thr->virial_bond,0.5,v); + } + } + + if (bond->vflag_atom) { + v[0] *= 0.5; + v[1] *= 0.5; + v[2] *= 0.5; + v[3] *= 0.5; + v[4] *= 0.5; + v[5] *= 0.5; + + if (newton_bond) { + v_tally(thr->vatom_bond[i],v); + v_tally(thr->vatom_bond[j],v); + } else { + if (j < nlocal) + v_tally(thr->vatom_bond[i],v); + if (j < nlocal) + v_tally(thr->vatom_bond[j],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k, + const int nlocal, const int newton_bond, const double eangle, + const double * const f1, const double * const f3, + const double delx1, const double dely1, const double delz1, + const double delx2, const double dely2, const double delz2, + ThrData * const thr) +{ + if (angle->eflag_either) { + const double eanglethird = THIRD*eangle; + if (newton_bond) { + if (angle->eflag_global) + thr->eng_angle += eangle; + if (angle->eflag_atom) { + thr->eatom_angle[i] += eanglethird; + thr->eatom_angle[j] += eanglethird; + thr->eatom_angle[k] += eanglethird; + } + } else { + if (angle->eflag_global) { + if (i < nlocal) thr->eng_angle += eanglethird; + if (j < nlocal) thr->eng_angle += eanglethird; + if (k < nlocal) thr->eng_angle += eanglethird; + } + if (angle->eflag_atom) { + if (i < nlocal) thr->eatom_angle[i] += eanglethird; + if (j < nlocal) thr->eatom_angle[j] += eanglethird; + if (k < nlocal) thr->eatom_angle[k] += eanglethird; + } + } + } + + if (angle->vflag_either) { + double v[6]; + + v[0] = delx1*f1[0] + delx2*f3[0]; + v[1] = dely1*f1[1] + dely2*f3[1]; + v[2] = delz1*f1[2] + delz2*f3[2]; + v[3] = delx1*f1[1] + delx2*f3[1]; + v[4] = delx1*f1[2] + delx2*f3[2]; + v[5] = dely1*f1[2] + dely2*f3[2]; + + if (angle->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_angle,v); + } else { + int cnt = 0; + if (i < nlocal) ++cnt; + if (j < nlocal) ++cnt; + if (k < nlocal) ++cnt; + v_tally(thr->virial_angle,cnt*THIRD,v); + } + } + + if (angle->vflag_atom) { + v[0] *= THIRD; + v[1] *= THIRD; + v[2] *= THIRD; + v[3] *= THIRD; + v[4] *= THIRD; + v[5] *= THIRD; + + if (newton_bond) { + v_tally(thr->vatom_angle[i],v); + v_tally(thr->vatom_angle[j],v); + v_tally(thr->vatom_angle[k],v); + } else { + if (j < nlocal) v_tally(thr->vatom_angle[i],v); + if (j < nlocal) v_tally(thr->vatom_angle[j],v); + if (k < nlocal) v_tally(thr->vatom_angle[k],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial from 1-3 repulsion of SDK angle into accumulators +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3, + const int nlocal, const int newton_bond, + const double epair, const double fpair, + const double delx, const double dely, + const double delz, ThrData * const thr) +{ + + if (angle->eflag_either) { + const double epairhalf = 0.5 * epair; + + if (angle->eflag_global) { + if (newton_bond || i1 < nlocal) + thr->eng_angle += epairhalf; + if (newton_bond || i3 < nlocal) + thr->eng_angle += epairhalf; + } + + if (angle->eflag_atom) { + if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf; + if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf; + } + } + + if (angle->vflag_either) { + double v[6]; + v[0] = delx*delx*fpair; + v[1] = dely*dely*fpair; + v[2] = delz*delz*fpair; + v[3] = delx*dely*fpair; + v[4] = delx*delz*fpair; + v[5] = dely*delz*fpair; + + if (angle->vflag_global) { + double * const va = thr->virial_angle; + if (newton_bond || i1 < nlocal) v_tally(va,0.5,v); + if (newton_bond || i3 < nlocal) v_tally(va,0.5,v); + } + + if (angle->vflag_atom) { + if (newton_bond || i1 < nlocal) { + double * const va = thr->vatom_angle[i1]; + v_tally(va,0.5,v); + } + if (newton_bond || i3 < nlocal) { + double * const va = thr->vatom_angle[i3]; + v_tally(va,0.5,v); + } + } + } +} + + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2, + const int i3, const int i4, const int nlocal, + const int newton_bond, const double edihedral, + const double * const f1, const double * const f3, + const double * const f4, const double vb1x, + const double vb1y, const double vb1z, const double vb2x, + const double vb2y, const double vb2z, const double vb3x, + const double vb3y, const double vb3z, ThrData * const thr) +{ + + if (dihed->eflag_either) { + if (dihed->eflag_global) { + if (newton_bond) { + thr->eng_dihed += edihedral; + } else { + const double edihedralquarter = 0.25*edihedral; + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + thr->eng_dihed += static_cast(cnt)*edihedralquarter; + } + } + if (dihed->eflag_atom) { + const double edihedralquarter = 0.25*edihedral; + if (newton_bond) { + thr->eatom_dihed[i1] += edihedralquarter; + thr->eatom_dihed[i2] += edihedralquarter; + thr->eatom_dihed[i3] += edihedralquarter; + thr->eatom_dihed[i4] += edihedralquarter; + } else { + if (i1 < nlocal) thr->eatom_dihed[i1] += edihedralquarter; + if (i2 < nlocal) thr->eatom_dihed[i2] += edihedralquarter; + if (i3 < nlocal) thr->eatom_dihed[i3] += edihedralquarter; + if (i4 < nlocal) thr->eatom_dihed[i4] += edihedralquarter; + } + } + } + + if (dihed->vflag_either) { + double v[6]; + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (dihed->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_dihed,v); + } else { + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + v_tally(thr->virial_dihed,0.25*static_cast(cnt),v); + } + } + + v[0] *= 0.25; + v[1] *= 0.25; + v[2] *= 0.25; + v[3] *= 0.25; + v[4] *= 0.25; + v[5] *= 0.25; + + if (dihed->vflag_atom) { + if (newton_bond) { + v_tally(thr->vatom_dihed[i1],v); + v_tally(thr->vatom_dihed[i2],v); + v_tally(thr->vatom_dihed[i3],v); + v_tally(thr->vatom_dihed[i4],v); + } else { + if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v); + if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v); + if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v); + if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally energy and virial into global and per-atom accumulators + virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4 + = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4 + = vb1*f1 + vb2*f3 + (vb3+vb2)*f4 +------------------------------------------------------------------------- */ + +void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2, + const int i3, const int i4, const int nlocal, + const int newton_bond, const double eimproper, + const double * const f1, const double * const f3, + const double * const f4, const double vb1x, + const double vb1y, const double vb1z, const double vb2x, + const double vb2y, const double vb2z, const double vb3x, + const double vb3y, const double vb3z, ThrData * const thr) +{ + + if (imprp->eflag_either) { + if (imprp->eflag_global) { + if (newton_bond) { + thr->eng_imprp += eimproper; + } else { + const double eimproperquarter = 0.25*eimproper; + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + thr->eng_imprp += static_cast(cnt)*eimproperquarter; + } + } + if (imprp->eflag_atom) { + const double eimproperquarter = 0.25*eimproper; + if (newton_bond) { + thr->eatom_imprp[i1] += eimproperquarter; + thr->eatom_imprp[i2] += eimproperquarter; + thr->eatom_imprp[i3] += eimproperquarter; + thr->eatom_imprp[i4] += eimproperquarter; + } else { + if (i1 < nlocal) thr->eatom_imprp[i1] += eimproperquarter; + if (i2 < nlocal) thr->eatom_imprp[i2] += eimproperquarter; + if (i3 < nlocal) thr->eatom_imprp[i3] += eimproperquarter; + if (i4 < nlocal) thr->eatom_imprp[i4] += eimproperquarter; + } + } + } + + if (imprp->vflag_either) { + double v[6]; + v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0]; + v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1]; + v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2]; + v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1]; + v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2]; + v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2]; + + if (imprp->vflag_global) { + if (newton_bond) { + v_tally(thr->virial_imprp,v); + } else { + int cnt = 0; + if (i1 < nlocal) ++cnt; + if (i2 < nlocal) ++cnt; + if (i3 < nlocal) ++cnt; + if (i4 < nlocal) ++cnt; + v_tally(thr->virial_imprp,0.25*static_cast(cnt),v); + } + } + + v[0] *= 0.25; + v[1] *= 0.25; + v[2] *= 0.25; + v[3] *= 0.25; + v[4] *= 0.25; + v[5] *= 0.25; + + if (imprp->vflag_atom) { + if (newton_bond) { + v_tally(thr->vatom_imprp[i1],v); + v_tally(thr->vatom_imprp[i2],v); + v_tally(thr->vatom_imprp[i3],v); + v_tally(thr->vatom_imprp[i4],v); + } else { + if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v); + if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v); + if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v); + if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v); + } + } + } +} + +/* ---------------------------------------------------------------------- + tally virial into per-atom accumulators + called by AIREBO potential, newton_pair is always on + fpair is magnitude of force on atom I +------------------------------------------------------------------------- */ + +void ThrOMP::v_tally2_thr(const int i, const int j, const double fpair, + const double * const drij, ThrData * const thr) +{ + double v[6]; + + v[0] = 0.5 * drij[0]*drij[0]*fpair; + v[1] = 0.5 * drij[1]*drij[1]*fpair; + v[2] = 0.5 * drij[2]*drij[2]*fpair; + v[3] = 0.5 * drij[0]*drij[1]*fpair; + v[4] = 0.5 * drij[0]*drij[2]*fpair; + v[5] = 0.5 * drij[1]*drij[2]*fpair; + + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); +} + +/* ---------------------------------------------------------------------- + tally virial into per-atom accumulators + called by AIREBO and Tersoff potential, newton_pair is always on +------------------------------------------------------------------------- */ + +void ThrOMP::v_tally3_thr(const int i, const int j, const int k, + const double * const fi, const double * const fj, + const double * const drik, const double * const drjk, + ThrData * const thr) +{ + double v[6]; + + v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]); + v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]); + v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]); + v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]); + v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]); + v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]); + + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); +} + +/* ---------------------------------------------------------------------- + tally virial into per-atom accumulators + called by AIREBO potential, newton_pair is always on +------------------------------------------------------------------------- */ + +void ThrOMP::v_tally4_thr(const int i, const int j, const int k, const int m, + const double * const fi, const double * const fj, + const double * const fk, const double * const drim, + const double * const drjm, const double * const drkm, + ThrData * const thr) +{ + double v[6]; + + v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]); + v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]); + v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]); + v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]); + v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]); + v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]); + + v_tally(thr->vatom_pair[i],v); + v_tally(thr->vatom_pair[j],v); + v_tally(thr->vatom_pair[k],v); + v_tally(thr->vatom_pair[m],v); +} + +/* ---------------------------------------------------------------------- */ + +double ThrOMP::memory_usage_thr() +{ + double bytes=0.0; + + return bytes; +} diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h new file mode 100644 index 0000000000..19e50031f8 --- /dev/null +++ b/src/USER-OMP/thr_omp.h @@ -0,0 +1,187 @@ +/* -*- c++ -*- ------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifndef LMP_THR_OMP_H +#define LMP_THR_OMP_H + +#include "pointers.h" +#include "error.h" +#include "fix_omp.h" +#include "thr_data.h" + +namespace LAMMPS_NS { + +// forward declarations +class Pair; +class Bond; +class Angle; +class Dihedral; +class Improper; +class KSpace; +class Fix; + +class ThrOMP { + + protected: + LAMMPS *lmp; // reference to base lammps object. + FixOMP *fix; // pointer to fix_omp; + + const int thr_style; + int thr_error; + + public: + ThrOMP(LAMMPS *, int); + virtual ~ThrOMP(); + + double memory_usage_thr(); + + inline void sync_threads() { +#if defined(_OPENMP) +#pragma omp barrier +#endif + { ; } + }; + + enum {THR_NONE=0,THR_PAIR=1,THR_BOND=1<<1,THR_ANGLE=1<<2, + THR_DIHEDRAL=1<<3,THR_IMPROPER=1<<4,THR_KSPACE=1<<5, + THR_CHARMM=1<<6,THR_PROXY=1<<7,THR_HYBRID=1<<8,THR_FIX=1<<9}; + + protected: + // extra ev_tally setup work for threaded styles + void ev_setup_thr(int, int, int, double *, double **, ThrData *); + + // compute global per thread virial contribution from per-thread force + void virial_fdotr_compute_thr(double * const, const double * const * const, + const double * const * const, + const int, const int, const int); + + // reduce per thread data as needed + void reduce_thr(void * const style, const int eflag, const int vflag, + ThrData * const thr, const int nproxy=0); + + // thread safe variant error abort support. + // signals an error condition in any thread by making + // thr_error > 0, if condition "cond" is true. + // will abort from thread 0 if thr_error is > 0 + // otherwise return true. + // returns false if no error found on any thread. + // use return value to jump/return to end of threaded region. + + bool check_error_thr(const bool cond, const int tid, const char *fname, + const int line, const char *errmsg) { + if (cond) { +#if defined(_OPENMP) +#pragma omp atomic + ++thr_error; +#endif + if (tid > 0) return true; + else lmp->error->one(fname,line,errmsg); + } else { + if (thr_error > 0) { + if (tid == 0) lmp->error->one(fname,line,errmsg); + else return true; + } else return false; + } + return false; + }; + + protected: + + // threading adapted versions of the ev_tally infrastructure + // style specific versions (need access to style class flags) + + // Pair + void e_tally_thr(Pair * const, const int, const int, const int, + const int, const double, const double, ThrData * const); + void v_tally_thr(Pair * const, const int, const int, const int, + const int, const double * const, ThrData * const); + + void ev_tally_thr(Pair * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, const double, ThrData * const); + void ev_tally_xyz_thr(Pair * const, const int, const int, const int, + const int, const double, const double, const double, + const double, const double, const double, + const double, const double, ThrData * const); + void ev_tally3_thr(Pair * const, const int, const int, const int, const double, + const double, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void ev_tally4_thr(Pair * const, const int, const int, const int, const int, + const double, const double * const, const double * const, + const double * const, const double * const, const double * const, + const double * const, ThrData * const); + + // Bond + void ev_tally_thr(Bond * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, ThrData * const); + + // Angle + void ev_tally_thr(Angle * const, const int, const int, const int, const int, const int, + const double, const double * const, const double * const, + const double, const double, const double, const double, const double, + const double, ThrData * const thr); + void ev_tally13_thr(Angle * const, const int, const int, const int, const int, + const double, const double, const double, const double, + const double, ThrData * const thr); + + // Dihedral + void ev_tally_thr(Dihedral * const, const int, const int, const int, const int, const int, + const int, const double, const double * const, const double * const, + const double * const, const double, const double, const double, + const double, const double, const double, const double, const double, + const double, ThrData * const); + + // Improper + void ev_tally_thr(Improper * const, const int, const int, const int, const int, const int, + const int, const double, const double * const, const double * const, + const double * const, const double, const double, const double, + const double, const double, const double, const double, const double, + const double, ThrData * const); + + // style independent versions + void v_tally2_thr(const int, const int, const double, const double * const, ThrData * const); + void v_tally3_thr(const int, const int, const int, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void v_tally4_thr(const int, const int, const int, const int, const double * const, + const double * const, const double * const, const double * const, + const double * const, const double * const, ThrData * const); + void ev_tally_list_thr(Pair * const, const int, const int * const, + const double , const double * const , ThrData * const); + +}; + +// set loop range thread id, and force array offset for threaded runs. +static inline void loop_setup_thr(int &ifrom, int &ito, int &tid, + int inum, int nthreads, int nproxy=0) +{ +#if defined(_OPENMP) + tid = omp_get_thread_num(); + + // each thread works on a fixed chunk of atoms. + const int idelta = 1 + inum/(nthreads-nproxy); + ifrom = (tid-nproxy)*idelta; + ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + tid = 0; + ifrom = 0; + ito = inum; +#endif +} + +} +#endif