From 57a20c2706ced91705943500486597e81b7c53fa Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 1 Dec 2011 17:05:34 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7265 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/USER-OMP/angle_charmm_omp.cpp | 191 ++ src/USER-OMP/angle_charmm_omp.h | 48 + src/USER-OMP/angle_class2_omp.cpp | 234 ++ src/USER-OMP/angle_class2_omp.h | 48 + src/USER-OMP/angle_cosine_delta_omp.cpp | 183 ++ src/USER-OMP/angle_cosine_delta_omp.h | 48 + src/USER-OMP/angle_cosine_omp.cpp | 160 + src/USER-OMP/angle_cosine_omp.h | 48 + src/USER-OMP/angle_cosine_periodic_omp.cpp | 198 ++ src/USER-OMP/angle_cosine_periodic_omp.h | 48 + src/USER-OMP/angle_cosine_shift_exp_omp.cpp | 181 ++ src/USER-OMP/angle_cosine_shift_exp_omp.h | 48 + src/USER-OMP/angle_cosine_shift_omp.cpp | 165 + src/USER-OMP/angle_cosine_shift_omp.h | 48 + src/USER-OMP/angle_cosine_squared_omp.cpp | 165 + src/USER-OMP/angle_cosine_squared_omp.h | 48 + src/USER-OMP/angle_harmonic_omp.cpp | 169 + src/USER-OMP/angle_harmonic_omp.h | 48 + src/USER-OMP/angle_sdk_omp.cpp | 225 ++ src/USER-OMP/angle_sdk_omp.h | 49 + src/USER-OMP/angle_table_omp.cpp | 169 + src/USER-OMP/angle_table_omp.h | 48 + src/USER-OMP/bond_class2_omp.cpp | 124 + src/USER-OMP/bond_class2_omp.h | 48 + src/USER-OMP/bond_fene_expand_omp.cpp | 150 + src/USER-OMP/bond_fene_expand_omp.h | 48 + src/USER-OMP/bond_fene_omp.cpp | 146 + src/USER-OMP/bond_fene_omp.h | 48 + src/USER-OMP/bond_harmonic_omp.cpp | 121 + src/USER-OMP/bond_harmonic_omp.h | 48 + src/USER-OMP/bond_harmonic_shift_cut_omp.cpp | 124 + src/USER-OMP/bond_harmonic_shift_cut_omp.h | 48 + src/USER-OMP/bond_harmonic_shift_omp.cpp | 121 + src/USER-OMP/bond_harmonic_shift_omp.h | 48 + src/USER-OMP/bond_morse_omp.cpp | 122 + src/USER-OMP/bond_morse_omp.h | 48 + src/USER-OMP/bond_nonlinear_omp.cpp | 122 + src/USER-OMP/bond_nonlinear_omp.h | 48 + src/USER-OMP/bond_quartic_omp.cpp | 190 ++ src/USER-OMP/bond_quartic_omp.h | 48 + src/USER-OMP/bond_table_omp.cpp | 119 + src/USER-OMP/bond_table_omp.h | 48 + src/USER-OMP/ewald_omp.cpp | 386 +++ src/USER-OMP/ewald_omp.h | 42 + src/USER-OMP/fix_omp.cpp | 279 ++ src/USER-OMP/fix_omp.h | 71 + src/USER-OMP/fix_peri_neigh_omp.cpp | 50 + src/USER-OMP/fix_peri_neigh_omp.h | 37 + src/USER-OMP/fix_qeq_comb_omp.cpp | 160 + src/USER-OMP/fix_qeq_comb_omp.h | 32 + src/USER-OMP/fix_wall_gran_omp.cpp | 152 + src/USER-OMP/fix_wall_gran_omp.h | 38 + src/USER-OMP/improper_class2_omp.cpp | 697 +++++ src/USER-OMP/improper_class2_omp.h | 52 + src/USER-OMP/improper_cvff_omp.cpp | 295 ++ src/USER-OMP/improper_cvff_omp.h | 48 + src/USER-OMP/improper_harmonic_omp.cpp | 236 ++ src/USER-OMP/improper_harmonic_omp.h | 48 + src/USER-OMP/improper_umbrella_omp.cpp | 252 ++ src/USER-OMP/improper_umbrella_omp.h | 48 + src/USER-OMP/pair_airebo_omp.cpp | 2768 +++++++++++++++++ src/USER-OMP/pair_airebo_omp.h | 53 + src/USER-OMP/pair_comb_omp.cpp | 647 ++++ src/USER-OMP/pair_comb_omp.h | 47 + src/USER-OMP/pair_coul_diel_omp.cpp | 165 + src/USER-OMP/pair_coul_diel_omp.h | 48 + src/USER-OMP/pair_gauss_cut_omp.cpp | 156 + src/USER-OMP/pair_gauss_cut_omp.h | 48 + src/USER-OMP/pair_line_lj_omp.cpp | 339 ++ src/USER-OMP/pair_line_lj_omp.h | 48 + src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp | 260 ++ src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h | 54 + src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp | 245 ++ src/USER-OMP/pair_lj_cut_coul_pppm_omp.h | 54 + .../pair_lj_cut_coul_pppm_tip4p_omp.cpp | 487 +++ .../pair_lj_cut_coul_pppm_tip4p_omp.h | 61 + src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp | 248 ++ src/USER-OMP/pair_lj_sdk_coul_long_omp.h | 49 + src/USER-OMP/pair_lj_sdk_omp.cpp | 184 ++ src/USER-OMP/pair_lj_sdk_omp.h | 49 + src/USER-OMP/pair_rebo_omp.cpp | 33 + src/USER-OMP/pair_rebo_omp.h | 36 + src/USER-OMP/pair_tersoff_table_omp.cpp | 496 +++ src/USER-OMP/pair_tersoff_table_omp.h | 43 + src/USER-OMP/pair_tri_lj_omp.cpp | 409 +++ src/USER-OMP/pair_tri_lj_omp.h | 48 + src/USER-OMP/pppm_omp.cpp | 323 ++ src/USER-OMP/pppm_omp.h | 49 + src/USER-OMP/pppm_proxy.cpp | 61 + src/USER-OMP/pppm_proxy.h | 44 + src/USER-OMP/pppm_tip4p_proxy.cpp | 61 + src/USER-OMP/pppm_tip4p_proxy.h | 44 + src/USER-OMP/thr_data.cpp | 219 ++ src/USER-OMP/thr_data.h | 132 + 94 files changed, 15319 insertions(+) create mode 100644 src/USER-OMP/angle_charmm_omp.cpp create mode 100644 src/USER-OMP/angle_charmm_omp.h create mode 100644 src/USER-OMP/angle_class2_omp.cpp create mode 100644 src/USER-OMP/angle_class2_omp.h create mode 100644 src/USER-OMP/angle_cosine_delta_omp.cpp create mode 100644 src/USER-OMP/angle_cosine_delta_omp.h create mode 100644 src/USER-OMP/angle_cosine_omp.cpp create mode 100644 src/USER-OMP/angle_cosine_omp.h create mode 100644 src/USER-OMP/angle_cosine_periodic_omp.cpp create mode 100644 src/USER-OMP/angle_cosine_periodic_omp.h create mode 100644 src/USER-OMP/angle_cosine_shift_exp_omp.cpp create mode 100644 src/USER-OMP/angle_cosine_shift_exp_omp.h create mode 100644 src/USER-OMP/angle_cosine_shift_omp.cpp create mode 100644 src/USER-OMP/angle_cosine_shift_omp.h create mode 100644 src/USER-OMP/angle_cosine_squared_omp.cpp create mode 100644 src/USER-OMP/angle_cosine_squared_omp.h create mode 100644 src/USER-OMP/angle_harmonic_omp.cpp create mode 100644 src/USER-OMP/angle_harmonic_omp.h create mode 100644 src/USER-OMP/angle_sdk_omp.cpp create mode 100644 src/USER-OMP/angle_sdk_omp.h create mode 100644 src/USER-OMP/angle_table_omp.cpp create mode 100644 src/USER-OMP/angle_table_omp.h create mode 100644 src/USER-OMP/bond_class2_omp.cpp create mode 100644 src/USER-OMP/bond_class2_omp.h create mode 100644 src/USER-OMP/bond_fene_expand_omp.cpp create mode 100644 src/USER-OMP/bond_fene_expand_omp.h create mode 100644 src/USER-OMP/bond_fene_omp.cpp create mode 100644 src/USER-OMP/bond_fene_omp.h create mode 100644 src/USER-OMP/bond_harmonic_omp.cpp create mode 100644 src/USER-OMP/bond_harmonic_omp.h create mode 100644 src/USER-OMP/bond_harmonic_shift_cut_omp.cpp create mode 100644 src/USER-OMP/bond_harmonic_shift_cut_omp.h create mode 100644 src/USER-OMP/bond_harmonic_shift_omp.cpp create mode 100644 src/USER-OMP/bond_harmonic_shift_omp.h create mode 100644 src/USER-OMP/bond_morse_omp.cpp create mode 100644 src/USER-OMP/bond_morse_omp.h create mode 100644 src/USER-OMP/bond_nonlinear_omp.cpp create mode 100644 src/USER-OMP/bond_nonlinear_omp.h create mode 100644 src/USER-OMP/bond_quartic_omp.cpp create mode 100644 src/USER-OMP/bond_quartic_omp.h create mode 100644 src/USER-OMP/bond_table_omp.cpp create mode 100644 src/USER-OMP/bond_table_omp.h create mode 100644 src/USER-OMP/ewald_omp.cpp create mode 100644 src/USER-OMP/ewald_omp.h create mode 100644 src/USER-OMP/fix_omp.cpp create mode 100644 src/USER-OMP/fix_omp.h create mode 100644 src/USER-OMP/fix_peri_neigh_omp.cpp create mode 100644 src/USER-OMP/fix_peri_neigh_omp.h create mode 100644 src/USER-OMP/fix_qeq_comb_omp.cpp create mode 100644 src/USER-OMP/fix_qeq_comb_omp.h create mode 100644 src/USER-OMP/fix_wall_gran_omp.cpp create mode 100644 src/USER-OMP/fix_wall_gran_omp.h create mode 100644 src/USER-OMP/improper_class2_omp.cpp create mode 100644 src/USER-OMP/improper_class2_omp.h create mode 100644 src/USER-OMP/improper_cvff_omp.cpp create mode 100644 src/USER-OMP/improper_cvff_omp.h create mode 100644 src/USER-OMP/improper_harmonic_omp.cpp create mode 100644 src/USER-OMP/improper_harmonic_omp.h create mode 100644 src/USER-OMP/improper_umbrella_omp.cpp create mode 100644 src/USER-OMP/improper_umbrella_omp.h create mode 100644 src/USER-OMP/pair_airebo_omp.cpp create mode 100644 src/USER-OMP/pair_airebo_omp.h create mode 100644 src/USER-OMP/pair_comb_omp.cpp create mode 100644 src/USER-OMP/pair_comb_omp.h create mode 100644 src/USER-OMP/pair_coul_diel_omp.cpp create mode 100644 src/USER-OMP/pair_coul_diel_omp.h create mode 100644 src/USER-OMP/pair_gauss_cut_omp.cpp create mode 100644 src/USER-OMP/pair_gauss_cut_omp.h create mode 100644 src/USER-OMP/pair_line_lj_omp.cpp create mode 100644 src/USER-OMP/pair_line_lj_omp.h create mode 100644 src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp create mode 100644 src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h create mode 100644 src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp create mode 100644 src/USER-OMP/pair_lj_cut_coul_pppm_omp.h create mode 100644 src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp create mode 100644 src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h create mode 100644 src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp create mode 100644 src/USER-OMP/pair_lj_sdk_coul_long_omp.h create mode 100644 src/USER-OMP/pair_lj_sdk_omp.cpp create mode 100644 src/USER-OMP/pair_lj_sdk_omp.h create mode 100644 src/USER-OMP/pair_rebo_omp.cpp create mode 100644 src/USER-OMP/pair_rebo_omp.h create mode 100644 src/USER-OMP/pair_tersoff_table_omp.cpp create mode 100644 src/USER-OMP/pair_tersoff_table_omp.h create mode 100644 src/USER-OMP/pair_tri_lj_omp.cpp create mode 100644 src/USER-OMP/pair_tri_lj_omp.h create mode 100644 src/USER-OMP/pppm_omp.cpp create mode 100644 src/USER-OMP/pppm_omp.h create mode 100644 src/USER-OMP/pppm_proxy.cpp create mode 100644 src/USER-OMP/pppm_proxy.h create mode 100644 src/USER-OMP/pppm_tip4p_proxy.cpp create mode 100644 src/USER-OMP/pppm_tip4p_proxy.h create mode 100644 src/USER-OMP/thr_data.cpp create mode 100644 src/USER-OMP/thr_data.h diff --git a/src/USER-OMP/angle_charmm_omp.cpp b/src/USER-OMP/angle_charmm_omp.cpp new file mode 100644 index 0000000000..989bb08ef9 --- /dev/null +++ b/src/USER-OMP/angle_charmm_omp.cpp @@ -0,0 +1,191 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_charmm_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCharmmOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCharmmOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,tk; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + double delxUB,delyUB,delzUB,rsqUB,rUB,dr,rk,forceUB; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // Urey-Bradley bond + + delxUB = x[i3][0] - x[i1][0]; + delyUB = x[i3][1] - x[i1][1]; + delzUB = x[i3][2] - x[i1][2]; + domain->minimum_image(delxUB,delyUB,delzUB); + + rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB; + rUB = sqrt(rsqUB); + + // Urey-Bradley force & energy + + dr = rUB - r_ub[type]; + rk = k_ub[type] * dr; + + if (rUB > 0.0) forceUB = -2.0*rk/rUB; + else forceUB = 0.0; + + if (EFLAG) eangle = rk*dr; + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // harmonic force & energy + + dtheta = acos(c) - theta0[type]; + tk = k[type] * dtheta; + + if (EFLAG) eangle += tk*dtheta; + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2 - delxUB*forceUB; + f1[1] = a11*dely1 + a12*dely2 - delyUB*forceUB; + f1[2] = a11*delz1 + a12*delz2 - delzUB*forceUB; + + f3[0] = a22*delx2 + a12*delx1 + delxUB*forceUB; + f3[1] = a22*dely2 + a12*dely1 + delyUB*forceUB; + f3[2] = a22*delz2 + a12*delz1 + delzUB*forceUB; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_charmm_omp.h b/src/USER-OMP/angle_charmm_omp.h new file mode 100644 index 0000000000..77ed6c74d7 --- /dev/null +++ b/src/USER-OMP/angle_charmm_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(charmm/omp,AngleCharmmOMP) + +#else + +#ifndef LMP_ANGLE_CHARMM_OMP_H +#define LMP_ANGLE_CHARMM_OMP_H + +#include "angle_charmm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCharmmOMP : public AngleCharmm, public ThrOMP { + + public: + AngleCharmmOMP(class LAMMPS *lmp) : + AngleCharmm(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_class2_omp.cpp b/src/USER-OMP/angle_class2_omp.cpp new file mode 100644 index 0000000000..eb2cde294a --- /dev/null +++ b/src/USER-OMP/angle_class2_omp.cpp @@ -0,0 +1,234 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,dtheta2,dtheta3,dtheta4,de_angle; + double dr1,dr2,tk1,tk2,aa1,aa2,aa11,aa12,aa21,aa22; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22,b1,b2; + double vx11,vx12,vy11,vy12,vz11,vz12,vx21,vx22,vy21,vy22,vz21,vz22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy for angle term + + dtheta = acos(c) - theta0[type]; + dtheta2 = dtheta*dtheta; + dtheta3 = dtheta2*dtheta; + dtheta4 = dtheta3*dtheta; + + de_angle = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 + + 4.0*k4[type]*dtheta3; + + a = -de_angle*s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + if (EFLAG) eangle = k2[type]*dtheta2 + k3[type]*dtheta3 + k4[type]*dtheta4; + + // force & energy for bond-bond term + + dr1 = r1 - bb_r1[type]; + dr2 = r2 - bb_r2[type]; + tk1 = bb_k[type] * dr1; + tk2 = bb_k[type] * dr2; + + f1[0] -= delx1*tk2/r1; + f1[1] -= dely1*tk2/r1; + f1[2] -= delz1*tk2/r1; + + f3[0] -= delx2*tk1/r2; + f3[1] -= dely2*tk1/r2; + f3[2] -= delz2*tk1/r2; + + if (EFLAG) eangle += bb_k[type]*dr1*dr2; + + // force & energy for bond-angle term + + aa1 = s * dr1 * ba_k1[type]; + aa2 = s * dr2 * ba_k2[type]; + + aa11 = aa1 * c / rsq1; + aa12 = -aa1 / (r1 * r2); + aa21 = aa2 * c / rsq1; + aa22 = -aa2 / (r1 * r2); + + vx11 = (aa11 * delx1) + (aa12 * delx2); + vx12 = (aa21 * delx1) + (aa22 * delx2); + vy11 = (aa11 * dely1) + (aa12 * dely2); + vy12 = (aa21 * dely1) + (aa22 * dely2); + vz11 = (aa11 * delz1) + (aa12 * delz2); + vz12 = (aa21 * delz1) + (aa22 * delz2); + + aa11 = aa1 * c / rsq2; + aa21 = aa2 * c / rsq2; + + vx21 = (aa11 * delx2) + (aa12 * delx1); + vx22 = (aa21 * delx2) + (aa22 * delx1); + vy21 = (aa11 * dely2) + (aa12 * dely1); + vy22 = (aa21 * dely2) + (aa22 * dely1); + vz21 = (aa11 * delz2) + (aa12 * delz1); + vz22 = (aa21 * delz2) + (aa22 * delz1); + + b1 = ba_k1[type] * dtheta / r1; + b2 = ba_k2[type] * dtheta / r2; + + f1[0] -= vx11 + b1*delx1 + vx12; + f1[1] -= vy11 + b1*dely1 + vy12; + f1[2] -= vz11 + b1*delz1 + vz12; + + f3[0] -= vx21 + b2*delx2 + vx22; + f3[1] -= vy21 + b2*dely2 + vy22; + f3[2] -= vz21 + b2*delz2 + vz22; + + if (EFLAG) eangle += ba_k1[type]*dr1*dtheta + ba_k2[type]*dr2*dtheta; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_class2_omp.h b/src/USER-OMP/angle_class2_omp.h new file mode 100644 index 0000000000..039d86b8af --- /dev/null +++ b/src/USER-OMP/angle_class2_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(class2/omp,AngleClass2OMP) + +#else + +#ifndef LMP_ANGLE_CLASS2_OMP_H +#define LMP_ANGLE_CLASS2_OMP_H + +#include "angle_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleClass2OMP : public AngleClass2, public ThrOMP { + + public: + AngleClass2OMP(class LAMMPS *lmp) : + AngleClass2(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_delta_omp.cpp b/src/USER-OMP/angle_cosine_delta_omp.cpp new file mode 100644 index 0000000000..bcd4d56bf4 --- /dev/null +++ b/src/USER-OMP/angle_cosine_delta_omp.cpp @@ -0,0 +1,183 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_cosine_delta_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCosineDeltaOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineDeltaOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2,theta,dtheta,dcostheta,tk; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,a,cot,a11,a12,a22,b11,b12,b22,c0,s0,s; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + theta = acos(c); + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + cot = c/s; + + // force & energy + + dtheta = theta - theta0[type]; + dcostheta = cos(dtheta); + tk = k[type] * (1.0-dcostheta); + + if (EFLAG) eangle = tk; + + a = -k[type]; + + // expand dtheta for cos and sin contribution to force + + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + b11 = -a*c*cot / rsq1; + b12 = a*cot / (r1*r2); + b22 = -a*c*cot / rsq2; + + c0 = cos(theta0[type]); + s0 = sin(theta0[type]); + + f1[0] = (a11*delx1 + a12*delx2)*c0 + (b11*delx1 + b12*delx2)*s0; + f1[1] = (a11*dely1 + a12*dely2)*c0 + (b11*dely1 + b12*dely2)*s0; + f1[2] = (a11*delz1 + a12*delz2)*c0 + (b11*delz1 + b12*delz2)*s0; + f3[0] = (a22*delx2 + a12*delx1)*c0 + (b22*delx2 + b12*delx1)*s0; + f3[1] = (a22*dely2 + a12*dely1)*c0 + (b22*dely2 + b12*dely1)*s0; + f3[2] = (a22*delz2 + a12*delz1)*c0 + (b22*delz2 + b12*delz1)*s0; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_delta_omp.h b/src/USER-OMP/angle_cosine_delta_omp.h new file mode 100644 index 0000000000..86e96eb900 --- /dev/null +++ b/src/USER-OMP/angle_cosine_delta_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/delta/omp,AngleCosineDeltaOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_DELTA_OMP_H +#define LMP_ANGLE_COSINE_DELTA_OMP_H + +#include "angle_cosine_delta.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineDeltaOMP : public AngleCosineDelta, public ThrOMP { + + public: + AngleCosineDeltaOMP(class LAMMPS *lmp) : + AngleCosineDelta(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_omp.cpp b/src/USER-OMP/angle_cosine_omp.cpp new file mode 100644 index 0000000000..e35fa5489a --- /dev/null +++ b/src/USER-OMP/angle_cosine_omp.cpp @@ -0,0 +1,160 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_cosine_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCosineOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + + if (EFLAG) eangle = k[type]*(1.0+c); + + a = k[type]; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_omp.h b/src/USER-OMP/angle_cosine_omp.h new file mode 100644 index 0000000000..29b3e8b86b --- /dev/null +++ b/src/USER-OMP/angle_cosine_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/omp,AngleCosineOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_OMP_H +#define LMP_ANGLE_COSINE_OMP_H + +#include "angle_cosine.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineOMP : public AngleCosine, public ThrOMP { + + public: + AngleCosineOMP(class LAMMPS *lmp) : + AngleCosine(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_periodic_omp.cpp b/src/USER-OMP/angle_cosine_periodic_omp.cpp new file mode 100644 index 0000000000..d7578cac9f --- /dev/null +++ b/src/USER-OMP/angle_cosine_periodic_omp.cpp @@ -0,0 +1,198 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_cosine_periodic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCosinePeriodicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i,i1,i2,i3,n,m,type,b_factor; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + double tn,tn_1,tn_2,un,un_1,un_2; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + m = multiplicity[type]; + b_factor = b[type]; + + // cos(n*x) = Tn(cos(x)) + // Tn(x) = Chebyshev polynomials of the first kind: T_0 = 1, T_1 = x, ... + // recurrence relationship: + // Tn(x) = 2*x*T[n-1](x) - T[n-2](x) where T[-1](x) = 0 + // also, dTn(x)/dx = n*U[n-1](x) + // where Un(x) = 2*x*U[n-1](x) - U[n-2](x) and U[-1](x) = 0 + // finally need to handle special case for n = 1 + + tn = 1.0; + tn_1 = 1.0; + tn_2 = 0.0; + un = 1.0; + un_1 = 2.0; + un_2 = 0.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy + + tn_2 = c; + for (i = 1; i <= m; i++) { + tn = 2*c*tn_1 - tn_2; + tn_2 = tn_1; + tn_1 = tn; + } + + for (i = 2; i <= m; i++) { + un = 2*c*un_1 - un_2; + un_2 = un_1; + un_1 = un; + } + tn = b_factor*pow(-1.0,m)*tn; + un = b_factor*pow(-1.0,m)*m*un; + + if (EFLAG) eangle = 2*k[type]*(1.0 - tn); + + a = -k[type]*un; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_periodic_omp.h b/src/USER-OMP/angle_cosine_periodic_omp.h new file mode 100644 index 0000000000..63325ff8f5 --- /dev/null +++ b/src/USER-OMP/angle_cosine_periodic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/periodic/omp,AngleCosinePeriodicOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_PERIODIC_OMP_H +#define LMP_ANGLE_COSINE_PERIODIC_OMP_H + +#include "angle_cosine_periodic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosinePeriodicOMP : public AngleCosinePeriodic, public ThrOMP { + + public: + AngleCosinePeriodicOMP(class LAMMPS *lmp) : + AngleCosinePeriodic(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.cpp b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp new file mode 100644 index 0000000000..3959678c94 --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_exp_omp.cpp @@ -0,0 +1,181 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_cosine_shift_exp_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCosineShiftExpOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineShiftExpOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3],ff; + double rsq1,rsq2,r1,r2,c,s,a11,a12,a22; + double exp2,aa,uumin,cccpsss,cssmscc; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // C= sine of angle + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // force & energy + + aa=a[type]; + uumin=umin[type]; + + cccpsss = c*cost[type]+s*sint[type]; + cssmscc = c*sint[type]-s*cost[type]; + + if (doExpansion[type]) + { // |a|<0.01 so use expansions relative precision <1e-5 +// std::cout << "Using expansion\n"; + if (EFLAG) eangle = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin; + ff=0.25*uumin*cssmscc*(2+aa*cccpsss)/s; + } + else + { +// std::cout << "Not using expansion\n"; + exp2=exp(0.5*aa*(1+cccpsss)); + if (EFLAG) eangle = opt1[type]*(1-exp2); + ff=0.5*a[type]*opt1[type]*exp2*cssmscc/s; + } + + a11 = ff*c/ rsq1; + a12 = -ff / (r1*r2); + a22 = ff*c/ rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_shift_exp_omp.h b/src/USER-OMP/angle_cosine_shift_exp_omp.h new file mode 100644 index 0000000000..b9adbf104d --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_exp_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/shift/exp/omp,AngleCosineShiftExpOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H +#define LMP_ANGLE_COSINE_SHIFT_EXP_OMP_H + +#include "angle_cosine_shift_exp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineShiftExpOMP : public AngleCosineShiftExp, public ThrOMP { + + public: + AngleCosineShiftExpOMP(class LAMMPS *lmp) : + AngleCosineShiftExp(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_shift_omp.cpp b/src/USER-OMP/angle_cosine_shift_omp.cpp new file mode 100644 index 0000000000..5cb3e793a3 --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_omp.cpp @@ -0,0 +1,165 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_cosine_shift_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCosineShiftOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineShiftOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double rsq1,rsq2,r1,r2,c,s,cps,kcos,ksin,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // c = cosine of angle + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // C= sine of angle + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // force & energy + if (EFLAG) eangle = -k[type]-kcos*c-ksin*s; + + kcos=kcost[type]; + ksin=ksint[type]; + cps = c/s; // NOTE absorbed one c + + a11 = (-kcos +ksin*cps )*c/ rsq1; + a12 = ( kcos -ksin*cps ) / (r1*r2); + a22 = (-kcos +ksin*cps )*c/ rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_shift_omp.h b/src/USER-OMP/angle_cosine_shift_omp.h new file mode 100644 index 0000000000..dadbd1cf53 --- /dev/null +++ b/src/USER-OMP/angle_cosine_shift_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/shift/omp,AngleCosineShiftOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_SHIFT_OMP_H +#define LMP_ANGLE_COSINE_SHIFT_OMP_H + +#include "angle_cosine_shift.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineShiftOMP : public AngleCosineShift, public ThrOMP { + + public: + AngleCosineShiftOMP(class LAMMPS *lmp) : + AngleCosineShift(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_cosine_squared_omp.cpp b/src/USER-OMP/angle_cosine_squared_omp.cpp new file mode 100644 index 0000000000..8c39d08959 --- /dev/null +++ b/src/USER-OMP/angle_cosine_squared_omp.cpp @@ -0,0 +1,165 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_cosine_squared_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleCosineSquaredOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleCosineSquaredOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dcostheta,tk; + double rsq1,rsq2,r1,r2,c,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + + dcostheta = c - cos(theta0[type]); + tk = k[type] * dcostheta; + + if (EFLAG) eangle = tk*dcostheta; + + a = 2.0 * tk; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_cosine_squared_omp.h b/src/USER-OMP/angle_cosine_squared_omp.h new file mode 100644 index 0000000000..82c7c06e89 --- /dev/null +++ b/src/USER-OMP/angle_cosine_squared_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(cosine/squared/omp,AngleCosineSquaredOMP) + +#else + +#ifndef LMP_ANGLE_COSINE_SQUARED_OMP_H +#define LMP_ANGLE_COSINE_SQUARED_OMP_H + +#include "angle_cosine_squared.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleCosineSquaredOMP : public AngleCosineSquared, public ThrOMP { + + public: + AngleCosineSquaredOMP(class LAMMPS *lmp) : + AngleCosineSquared(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_harmonic_omp.cpp b/src/USER-OMP/angle_harmonic_omp.cpp new file mode 100644 index 0000000000..46f4654e33 --- /dev/null +++ b/src/USER-OMP/angle_harmonic_omp.cpp @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2; + double eangle,f1[3],f3[3]; + double dtheta,tk; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // force & energy + + dtheta = acos(c) - theta0[type]; + tk = k[type] * dtheta; + + if (EFLAG) eangle = tk*dtheta; + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_harmonic_omp.h b/src/USER-OMP/angle_harmonic_omp.h new file mode 100644 index 0000000000..3855726192 --- /dev/null +++ b/src/USER-OMP/angle_harmonic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(harmonic/omp,AngleHarmonicOMP) + +#else + +#ifndef LMP_ANGLE_HARMONIC_OMP_H +#define LMP_ANGLE_HARMONIC_OMP_H + +#include "angle_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleHarmonicOMP : public AngleHarmonic, public ThrOMP { + + public: + AngleHarmonicOMP(class LAMMPS *lmp) : + AngleHarmonic(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_sdk_omp.cpp b/src/USER-OMP/angle_sdk_omp.cpp new file mode 100644 index 0000000000..3b6c1fee7a --- /dev/null +++ b/src/USER-OMP/angle_sdk_omp.cpp @@ -0,0 +1,225 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_sdk_omp.h" +#include "atom.h" +#include "neighbor.h" +#include "domain.h" +#include "comm.h" +#include "force.h" +#include "math_const.h" + +#include + +#include "lj_sdk_common.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace LJSDKParms; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleSDKOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleSDKOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double delx1,dely1,delz1,delx2,dely2,delz2,delx3,dely3,delz3; + double eangle,f1[3],f3[3],e13,f13; + double dtheta,tk; + double rsq1,rsq2,rsq3,r1,r2,c,s,a,a11,a12,a22; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // 1-3 LJ interaction. + // we only want to use the repulsive part, + // and it can be scaled (or off). + // so this has to be done here and not in the + // general non-bonded code. + + if (repflag) { + + delx3 = x[i1][0] - x[i3][0]; + dely3 = x[i1][1] - x[i3][1]; + delz3 = x[i1][2] - x[i3][2]; + domain->minimum_image(delx3,dely3,delz3); + rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3; + + const int type1 = atom->type[i1]; + const int type3 = atom->type[i3]; + + f13=0.0; + e13=0.0; + + if (rsq3 < rminsq[type1][type3]) { + const int ljt = lj_type[type1][type3]; + const double r2inv = 1.0/rsq3; + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + + f13 = r4inv*(lj1[type1][type3]*r4inv*r4inv - lj2[type1][type3]); + if (EFLAG) e13 = r4inv*(lj3[type1][type3]*r4inv*r4inv - lj4[type1][type3]); + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + + f13 = r6inv*(lj1[type1][type3]*r3inv - lj2[type1][type3]); + if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r3inv - lj4[type1][type3]); + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + + f13 = r6inv*(lj1[type1][type3]*r6inv - lj2[type1][type3]); + if (EFLAG) e13 = r6inv*(lj3[type1][type3]*r6inv - lj4[type1][type3]); + } + + // make sure energy is 0.0 at the cutoff. + if (EFLAG) e13 -= emin[type1][type3]; + + f13 *= r2inv; + } + } + + // force & energy + + dtheta = acos(c) - theta0[type]; + tk = k[type] * dtheta; + + if (EFLAG) eangle = tk*dtheta; + + a = -2.0 * tk * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of the 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0] + f13*delx3; + f[i1][1] += f1[1] + f13*dely3; + f[i1][2] += f1[2] + f13*delz3; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0] - f13*delx3; + f[i3][1] += f3[1] - f13*dely3; + f[i3][2] += f3[2] - f13*delz3; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + if (EVFLAG) ev_tally13_thr(this,i1,i3,nlocal,NEWTON_BOND, + e13,f13,delx3,dely3,delz3,thr); + + } +} diff --git a/src/USER-OMP/angle_sdk_omp.h b/src/USER-OMP/angle_sdk_omp.h new file mode 100644 index 0000000000..ed9e1bc40c --- /dev/null +++ b/src/USER-OMP/angle_sdk_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(sdk/omp,AngleSDKOMP) +AngleStyle(cg/cmm/omp,AngleSDKOMP) + +#else + +#ifndef LMP_ANGLE_SDK_OMP_H +#define LMP_ANGLE_SDK_OMP_H + +#include "angle_sdk.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleSDKOMP : public AngleSDK, public ThrOMP { + + public: + AngleSDKOMP(class LAMMPS *lmp) : + AngleSDK(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/angle_table_omp.cpp b/src/USER-OMP/angle_table_omp.cpp new file mode 100644 index 0000000000..ef37c00433 --- /dev/null +++ b/src/USER-OMP/angle_table_omp.cpp @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "angle_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include "math_const.h" + +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void AngleTableOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nanglelist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void AngleTableOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,n,type; + double eangle,f1[3],f3[3]; + double delx1,dely1,delz1,delx2,dely2,delz2; + double rsq1,rsq2,r1,r2,c,s,a,a11,a12,a22; + double theta,u,mdu; //mdu: minus du, -du/dx=f + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const anglelist = neighbor->anglelist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = anglelist[n][0]; + i2 = anglelist[n][1]; + i3 = anglelist[n][2]; + type = anglelist[n][3]; + + // 1st bond + + delx1 = x[i1][0] - x[i2][0]; + dely1 = x[i1][1] - x[i2][1]; + delz1 = x[i1][2] - x[i2][2]; + domain->minimum_image(delx1,dely1,delz1); + + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + r1 = sqrt(rsq1); + + // 2nd bond + + delx2 = x[i3][0] - x[i2][0]; + dely2 = x[i3][1] - x[i2][1]; + delz2 = x[i3][2] - x[i2][2]; + domain->minimum_image(delx2,dely2,delz2); + + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + r2 = sqrt(rsq2); + + // angle (cos and sin) + + c = delx1*delx2 + dely1*dely2 + delz1*delz2; + c /= r1*r2; + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + s = 1.0/s; + + // tabulated force & energy + + theta = acos(c); + uf_lookup(type,theta,u,mdu); + + if (EFLAG) eangle = u; + + a = mdu * s; + a11 = a*c / rsq1; + a12 = -a / (r1*r2); + a22 = a*c / rsq2; + + f1[0] = a11*delx1 + a12*delx2; + f1[1] = a11*dely1 + a12*dely2; + f1[2] = a11*delz1 + a12*delz2; + f3[0] = a22*delx2 + a12*delx1; + f3[1] = a22*dely2 + a12*dely1; + f3[2] = a22*delz2 + a12*delz1; + + // apply force to each of 3 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= f1[0] + f3[0]; + f[i2][1] -= f1[1] + f3[1]; + f[i2][2] -= f1[2] + f3[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,i3,nlocal,NEWTON_BOND,eangle,f1,f3, + delx1,dely1,delz1,delx2,dely2,delz2,thr); + } +} diff --git a/src/USER-OMP/angle_table_omp.h b/src/USER-OMP/angle_table_omp.h new file mode 100644 index 0000000000..b0bb9c7113 --- /dev/null +++ b/src/USER-OMP/angle_table_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef ANGLE_CLASS + +AngleStyle(table/omp,AngleTableOMP) + +#else + +#ifndef LMP_ANGLE_TABLE_OMP_H +#define LMP_ANGLE_TABLE_OMP_H + +#include "angle_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class AngleTableOMP : public AngleTable, public ThrOMP { + + public: + AngleTableOMP(class LAMMPS *lmp) : + AngleTable(lmp), ThrOMP(lmp,THR_ANGLE) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_class2_omp.cpp b/src/USER-OMP/bond_class2_omp.cpp new file mode 100644 index 0000000000..93c1dae044 --- /dev/null +++ b/src/USER-OMP/bond_class2_omp.cpp @@ -0,0 +1,124 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,dr2,dr3,dr4,de_bond; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + dr2 = dr*dr; + dr3 = dr2*dr; + dr4 = dr3*dr; + + // force & energy + + de_bond = 2.0*k2[type]*dr + 3.0*k3[type]*dr2 + 4.0*k4[type]*dr3; + if (r > 0.0) fbond = -de_bond/r; + else fbond = 0.0; + + if (EFLAG) ebond = k2[type]*dr2 + k3[type]*dr3 + k4[type]*dr4; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_class2_omp.h b/src/USER-OMP/bond_class2_omp.h new file mode 100644 index 0000000000..181ed4a441 --- /dev/null +++ b/src/USER-OMP/bond_class2_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(class2/omp,BondClass2OMP) + +#else + +#ifndef LMP_BOND_CLASS2_OMP_H +#define LMP_BOND_CLASS2_OMP_H + +#include "bond_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondClass2OMP : public BondClass2, public ThrOMP { + + public: + BondClass2OMP(class LAMMPS *lmp) : + BondClass2(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_fene_expand_omp.cpp b/src/USER-OMP/bond_fene_expand_omp.cpp new file mode 100644 index 0000000000..f77e11d1df --- /dev/null +++ b/src/USER-OMP/bond_fene_expand_omp.cpp @@ -0,0 +1,150 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_fene_expand_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" +#include "error.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondFENEExpandOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondFENEExpandOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r0sq,rlogarg,sr2,sr6; + double r,rshift,rshiftsq; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + rshift = r - shift[type]; + rshiftsq = rshift*rshift; + r0sq = r0[type] * r0[type]; + rlogarg = 1.0 - rshiftsq/r0sq; + + // if r -> r0, then rlogarg < 0.0 which is an error + // issue a warning and reset rlogarg = epsilon + // if r > 2*r0 something serious is wrong, abort + + if (rlogarg < 0.1) { + char str[128]; + sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g", + update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq)); + error->warning(FLERR,str,0); + if (rlogarg <= -3.0) error->one(FLERR,"Bad FENE bond"); + rlogarg = 0.1; + } + + fbond = -k[type]*rshift/rlogarg/r; + + // force from LJ term + + if (rshiftsq < TWO_1_3*sigma[type]*sigma[type]) { + sr2 = sigma[type]*sigma[type]/rshiftsq; + sr6 = sr2*sr2*sr2; + fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rshift/r; + } + + // energy + + if (EFLAG) { + ebond = -0.5 * k[type]*r0sq*log(rlogarg); + if (rshiftsq < TWO_1_3*sigma[type]*sigma[type]) + ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type]; + } + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_fene_expand_omp.h b/src/USER-OMP/bond_fene_expand_omp.h new file mode 100644 index 0000000000..fd7fe4b04f --- /dev/null +++ b/src/USER-OMP/bond_fene_expand_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(fene/expand/omp,BondFENEExpandOMP) + +#else + +#ifndef LMP_BOND_FENE_EXPAND_OMP_H +#define LMP_BOND_FENE_EXPAND_OMP_H + +#include "bond_fene_expand.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondFENEExpandOMP : public BondFENEExpand, public ThrOMP { + + public: + BondFENEExpandOMP(class LAMMPS *lmp) : + BondFENEExpand(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_fene_omp.cpp b/src/USER-OMP/bond_fene_omp.cpp new file mode 100644 index 0000000000..2758ed9e9e --- /dev/null +++ b/src/USER-OMP/bond_fene_omp.cpp @@ -0,0 +1,146 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_fene_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" +#include "error.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondFENEOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondFENEOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r0sq,rlogarg,sr2,sr6; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r0sq = r0[type] * r0[type]; + rlogarg = 1.0 - rsq/r0sq; + + // if r -> r0, then rlogarg < 0.0 which is an error + // issue a warning and reset rlogarg = epsilon + // if r > 2*r0 something serious is wrong, abort + + if (rlogarg < 0.1) { + char str[128]; + sprintf(str,"FENE bond too long: " BIGINT_FORMAT " %d %d %g", + update->ntimestep,atom->tag[i1],atom->tag[i2],sqrt(rsq)); + error->warning(FLERR,str,0); + if (rlogarg <= -3.0) error->one(FLERR,"Bad FENE bond"); + rlogarg = 0.1; + } + + fbond = -k[type]/rlogarg; + + // force from LJ term + + if (rsq < TWO_1_3*sigma[type]*sigma[type]) { + sr2 = sigma[type]*sigma[type]/rsq; + sr6 = sr2*sr2*sr2; + fbond += 48.0*epsilon[type]*sr6*(sr6-0.5)/rsq; + } + + // energy + + if (EFLAG) { + ebond = -0.5 * k[type]*r0sq*log(rlogarg); + if (rsq < TWO_1_3*sigma[type]*sigma[type]) + ebond += 4.0*epsilon[type]*sr6*(sr6-1.0) + epsilon[type]; + } + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_fene_omp.h b/src/USER-OMP/bond_fene_omp.h new file mode 100644 index 0000000000..b617aa1363 --- /dev/null +++ b/src/USER-OMP/bond_fene_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(fene/omp,BondFENEOMP) + +#else + +#ifndef LMP_BOND_FENE_OMP_H +#define LMP_BOND_FENE_OMP_H + +#include "bond_fene.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondFENEOMP : public BondFENE, public ThrOMP { + + public: + BondFENEOMP(class LAMMPS *lmp) : + BondFENE(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_harmonic_omp.cpp b/src/USER-OMP/bond_harmonic_omp.cpp new file mode 100644 index 0000000000..d29cce234f --- /dev/null +++ b/src/USER-OMP/bond_harmonic_omp.cpp @@ -0,0 +1,121 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,rk; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + rk = k[type] * dr; + + // force & energy + + if (r > 0.0) fbond = -2.0*rk/r; + else fbond = 0.0; + + if (EFLAG) ebond = rk*dr; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_harmonic_omp.h b/src/USER-OMP/bond_harmonic_omp.h new file mode 100644 index 0000000000..79c12aa12a --- /dev/null +++ b/src/USER-OMP/bond_harmonic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(harmonic/omp,BondHarmonicOMP) + +#else + +#ifndef LMP_BOND_HARMONIC_OMP_H +#define LMP_BOND_HARMONIC_OMP_H + +#include "bond_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondHarmonicOMP : public BondHarmonic, public ThrOMP { + + public: + BondHarmonicOMP(class LAMMPS *lmp) : + BondHarmonic(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp new file mode 100644 index 0000000000..05f49ac6f8 --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.cpp @@ -0,0 +1,124 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_harmonic_shift_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondHarmonicShiftCutOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondHarmonicShiftCutOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,rk; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + + if (r>r1[type]) continue; + + dr = r - r0[type]; + rk = k[type] * dr; + + // force & energy + + if (r > 0.0) fbond = -2.0*rk/r; + else fbond = 0.0; + + if (EFLAG) ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) ); + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_harmonic_shift_cut_omp.h b/src/USER-OMP/bond_harmonic_shift_cut_omp.h new file mode 100644 index 0000000000..e324da73ac --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(harmonic/shift/cut/omp,BondHarmonicShiftCutOMP) + +#else + +#ifndef LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H +#define LMP_BOND_HARMONIC_SHIFT_CUT_OMP_H + +#include "bond_harmonic_shift_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondHarmonicShiftCutOMP : public BondHarmonicShiftCut, public ThrOMP { + + public: + BondHarmonicShiftCutOMP(class LAMMPS *lmp) : + BondHarmonicShiftCut(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_harmonic_shift_omp.cpp b/src/USER-OMP/bond_harmonic_shift_omp.cpp new file mode 100644 index 0000000000..917fff30be --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_omp.cpp @@ -0,0 +1,121 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_harmonic_shift_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondHarmonicShiftOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondHarmonicShiftOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,rk; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + rk = k[type] * dr; + + // force & energy + + if (r > 0.0) fbond = -2.0*rk/r; + else fbond = 0.0; + + if (EFLAG) ebond = k[type]*(dr*dr -(r0[type]-r1[type])*(r0[type]-r1[type]) ); + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_harmonic_shift_omp.h b/src/USER-OMP/bond_harmonic_shift_omp.h new file mode 100644 index 0000000000..d5a45472a6 --- /dev/null +++ b/src/USER-OMP/bond_harmonic_shift_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(harmonic/shift/omp,BondHarmonicShiftOMP) + +#else + +#ifndef LMP_BOND_HARMONIC_SHIFT_OMP_H +#define LMP_BOND_HARMONIC_SHIFT_OMP_H + +#include "bond_harmonic_shift.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondHarmonicShiftOMP : public BondHarmonicShift, public ThrOMP { + + public: + BondHarmonicShiftOMP(class LAMMPS *lmp) : + BondHarmonicShift(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_morse_omp.cpp b/src/USER-OMP/bond_morse_omp.cpp new file mode 100644 index 0000000000..6b89507a65 --- /dev/null +++ b/src/USER-OMP/bond_morse_omp.cpp @@ -0,0 +1,122 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_morse_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondMorseOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondMorseOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,ralpha; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + + dr = r - r0[type]; + ralpha = exp(-alpha[type]*dr); + + // force & energy + + if (r > 0.0) fbond = -2.0*d0[type]*alpha[type]*(1-ralpha)*ralpha/r; + else fbond = 0.0; + + if (EFLAG) ebond = d0[type]*(1-ralpha)*(1-ralpha); + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_morse_omp.h b/src/USER-OMP/bond_morse_omp.h new file mode 100644 index 0000000000..0deb731e2c --- /dev/null +++ b/src/USER-OMP/bond_morse_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(morse/omp,BondMorseOMP) + +#else + +#ifndef LMP_BOND_MORSE_OMP_H +#define LMP_BOND_MORSE_OMP_H + +#include "bond_morse.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondMorseOMP : public BondMorse, public ThrOMP { + + public: + BondMorseOMP(class LAMMPS *lmp) : + BondMorse(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_nonlinear_omp.cpp b/src/USER-OMP/bond_nonlinear_omp.cpp new file mode 100644 index 0000000000..574022d86d --- /dev/null +++ b/src/USER-OMP/bond_nonlinear_omp.cpp @@ -0,0 +1,122 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_nonlinear_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondNonlinearOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondNonlinearOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r,dr,drsq,lamdasq,denom,denomsq; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + dr = r - r0[type]; + drsq = dr*dr; + lamdasq = lamda[type]*lamda[type]; + denom = lamdasq - drsq; + denomsq = denom*denom; + + // force & energy + + fbond = -epsilon[type]/r * 2.0*dr*lamdasq/denomsq; + if (EFLAG) ebond = epsilon[type] * drsq / denom; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_nonlinear_omp.h b/src/USER-OMP/bond_nonlinear_omp.h new file mode 100644 index 0000000000..a104a9af44 --- /dev/null +++ b/src/USER-OMP/bond_nonlinear_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(nonlinear/omp,BondNonlinearOMP) + +#else + +#ifndef LMP_BOND_NONLINEAR_OMP_H +#define LMP_BOND_NONLINEAR_OMP_H + +#include "bond_nonlinear.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondNonlinearOMP : public BondNonlinear, public ThrOMP { + + public: + BondNonlinearOMP(class LAMMPS *lmp) : + BondNonlinear(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_quartic_omp.cpp b/src/USER-OMP/bond_quartic_omp.cpp new file mode 100644 index 0000000000..23b3bc3fbd --- /dev/null +++ b/src/USER-OMP/bond_quartic_omp.cpp @@ -0,0 +1,190 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_quartic_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" +#include "pair.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondQuarticOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + // insure pair->ev_tally() will use 1-4 virial contribution + + if (vflag_global == 2) + force->pair->vflag_either = force->pair->vflag_global = 1; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondQuarticOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,m,type,itype,jtype; + double delx,dely,delz,ebond,fbond,evdwl,fpair; + double r,rsq,dr,r2,ra,rb,sr2,sr6; + + ebond = evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int * const * const bondlist = neighbor->bondlist; + const double * const * const cutsq = force->pair->cutsq; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + + // skip bond if already broken + + if (bondlist[n][2] <= 0) continue; + + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + + // if bond breaks, set type to 0 + // both in temporary bondlist and permanent bond_type + // if this proc owns both atoms, + // negate bond_type twice if other atom stores it + // if other proc owns 2nd atom, other proc will also break bond + + if (rsq > rc[type]*rc[type]) { + bondlist[n][2] = 0; + for (m = 0; m < atom->num_bond[i1]; m++) + if (atom->bond_atom[i1][m] == atom->tag[i2]) + atom->bond_type[i1][m] = 0; + if (i2 < atom->nlocal) + for (m = 0; m < atom->num_bond[i2]; m++) + if (atom->bond_atom[i2][m] == atom->tag[i1]) + atom->bond_type[i2][m] = 0; + continue; + } + + // quartic bond + // 1st portion is from quartic term + // 2nd portion is from LJ term cut at 2^(1/6) with eps = sigma = 1.0 + + r = sqrt(rsq); + dr = r - rc[type]; + r2 = dr*dr; + ra = dr - b1[type]; + rb = dr - b2[type]; + fbond = -k[type]/r * (r2*(ra+rb) + 2.0*dr*ra*rb); + + if (rsq < TWO_1_3) { + sr2 = 1.0/rsq; + sr6 = sr2*sr2*sr2; + fbond += 48.0*sr6*(sr6-0.5)/rsq; + } + + if (EFLAG) { + ebond = k[type]*r2*ra*rb + u0[type]; + if (rsq < TWO_1_3) ebond += 4.0*sr6*(sr6-1.0) + 1.0; + } + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND,ebond,fbond,delx,dely,delz,thr); + + // subtract out pairwise contribution from 2 atoms via pair->single() + // required since special_bond = 1,1,1 + // tally energy/virial in pair, using newton_bond as newton flag + + itype = atom->type[i1]; + jtype = atom->type[i2]; + + if (rsq < cutsq[itype][jtype]) { + evdwl = -force->pair->single(i1,i2,itype,jtype,rsq,1.0,1.0,fpair); + fpair = -fpair; + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fpair; + f[i1][1] += dely*fpair; + f[i1][2] += delz*fpair; + } + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fpair; + f[i2][1] -= dely*fpair; + f[i2][2] -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(force->pair,i1,i2,nlocal,NEWTON_BOND, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} diff --git a/src/USER-OMP/bond_quartic_omp.h b/src/USER-OMP/bond_quartic_omp.h new file mode 100644 index 0000000000..f2dcec833b --- /dev/null +++ b/src/USER-OMP/bond_quartic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(quartic/omp,BondQuarticOMP) + +#else + +#ifndef LMP_BOND_QUARTIC_OMP_H +#define LMP_BOND_QUARTIC_OMP_H + +#include "bond_quartic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondQuarticOMP : public BondQuartic, public ThrOMP { + + public: + BondQuarticOMP(class LAMMPS *lmp) : + BondQuartic(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/bond_table_omp.cpp b/src/USER-OMP/bond_table_omp.cpp new file mode 100644 index 0000000000..f822f01fa2 --- /dev/null +++ b/src/USER-OMP/bond_table_omp.cpp @@ -0,0 +1,119 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "bond_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "domain.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondTableOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nbondlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void BondTableOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,n,type; + double delx,dely,delz,ebond,fbond; + double rsq,r; + double u,mdu; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const bondlist = neighbor->bondlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = bondlist[n][0]; + i2 = bondlist[n][1]; + type = bondlist[n][2]; + + delx = x[i1][0] - x[i2][0]; + dely = x[i1][1] - x[i2][1]; + delz = x[i1][2] - x[i2][2]; + domain->minimum_image(delx,dely,delz); + + rsq = delx*delx + dely*dely + delz*delz; + r = sqrt(rsq); + + // force & energy + + uf_lookup(type,r,u,mdu); + fbond = mdu/r; + ebond = u; + + // apply force to each of 2 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += delx*fbond; + f[i1][1] += dely*fbond; + f[i1][2] += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] -= delx*fbond; + f[i2][1] -= dely*fbond; + f[i2][2] -= delz*fbond; + } + + if (EVFLAG) ev_tally_thr(this,i1,i2,nlocal,NEWTON_BOND, + ebond,fbond,delx,dely,delz,thr); + } +} diff --git a/src/USER-OMP/bond_table_omp.h b/src/USER-OMP/bond_table_omp.h new file mode 100644 index 0000000000..3cf8022226 --- /dev/null +++ b/src/USER-OMP/bond_table_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef BOND_CLASS + +BondStyle(table/omp,BondTableOMP) + +#else + +#ifndef LMP_BOND_TABLE_OMP_H +#define LMP_BOND_TABLE_OMP_H + +#include "bond_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class BondTableOMP : public BondTable, public ThrOMP { + + public: + BondTableOMP(class LAMMPS *lmp) : + BondTable(lmp), ThrOMP(lmp,THR_BOND) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/ewald_omp.cpp b/src/USER-OMP/ewald_omp.cpp new file mode 100644 index 0000000000..ea2c33d5d9 --- /dev/null +++ b/src/USER-OMP/ewald_omp.cpp @@ -0,0 +1,386 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "ewald_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" + +#include + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define SMALL 0.00001 + +/* ---------------------------------------------------------------------- */ + +EwaldOMP::EwaldOMP(LAMMPS *lmp, int narg, char **arg) + : Ewald(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ } + +/* ---------------------------------------------------------------------- */ +void EwaldOMP::allocate() +{ + Ewald::allocate(); + + // always re-allocate for simplicity. + delete[] sfacrl; + delete[] sfacim; + + sfacrl = new double[kmax3d*comm->nthreads]; + sfacim = new double[kmax3d*comm->nthreads]; +} + +/* ---------------------------------------------------------------------- + compute the Ewald long-range force, energy, virial +------------------------------------------------------------------------- */ + +void EwaldOMP::compute(int eflag, int vflag) +{ + // clear out global energy/virial + + energy = 0.0; + if (vflag) for (int n = 0; n < 6; n++) virial[n] = 0.0; + + // extend size of per-atom arrays if necessary + + if (atom->nlocal > nmax) { + memory->destroy(ek); + memory->destroy3d_offset(cs,-kmax_created); + memory->destroy3d_offset(sn,-kmax_created); + nmax = atom->nmax; + memory->create(ek,nmax,3,"ewald:ek"); + memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); + memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); + kmax_created = kmax; + } + + // partial structure factors on each processor + // total structure factor by summing over procs + + eik_dot_r(); + MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); + + // K-space portion of electric field + // double loop over K-vectors and local atoms + + double * const * const f = atom->f; + const double * const q = atom->q; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qscale = force->qqrd2e * scale; + + double eng_tmp = 0.0; + double v0,v1,v2,v3,v4,v5; + v0=v1=v2=v3=v4=v5=0.0; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5) +#endif + { + + int i,k,ifrom,ito,tid; + int kx,ky,kz; + double cypz,sypz,exprl,expim,partial; + + loop_setup_thr(ifrom, ito, tid, nlocal, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr); + + for (i = ifrom; i < ito; i++) { + ek[i][0] = 0.0; + ek[i][1] = 0.0; + ek[i][2] = 0.0; + } + + for (k = 0; k < kcount; k++) { + kx = kxvecs[k]; + ky = kyvecs[k]; + kz = kzvecs[k]; + + for (i = ifrom; i < ito; i++) { + cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; + sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; + exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; + expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; + partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; + ek[i][0] += partial*eg[k][0]; + ek[i][1] += partial*eg[k][1]; + ek[i][2] += partial*eg[k][2]; + } + } + + // convert E-field to force + + for (i = ifrom; i < ito; i++) { + const double fac = qscale*q[i]; + f[i][0] += fac*ek[i][0]; + f[i][1] += fac*ek[i][1]; + f[i][2] += fac*ek[i][2]; + } + + // energy if requested + + if (eflag) { +#if defined(_OPENMP) +#pragma omp for private(k) +#endif + for (k = 0; k < kcount; k++) + eng_tmp += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + + sfacim_all[k]*sfacim_all[k]); + } + + // virial if requested + + if (vflag) { +#if defined(_OPENMP) +#pragma omp for private(k) +#endif + for (k = 0; k < kcount; k++) { + double uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); + v0 += uk*vg[k][0]; + v1 += uk*vg[k][1]; + v2 += uk*vg[k][2]; + v3 += uk*vg[k][3]; + v4 += uk*vg[k][4]; + v5 += uk*vg[k][5]; + } + } + + reduce_thr(this, eflag,vflag,thr); + } // end of omp parallel region + + if (eflag) { + eng_tmp -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy = eng_tmp * qscale; + } + + if (vflag) { + virial[0] = v0 * qscale; + virial[1] = v1 * qscale; + virial[2] = v2 * qscale; + virial[3] = v3 * qscale; + virial[4] = v4 * qscale; + virial[5] = v5 * qscale; + } + + if (slabflag) slabcorr(eflag); +} + +/* ---------------------------------------------------------------------- */ + +void EwaldOMP::eik_dot_r() +{ + const double * const * const x = atom->x; + const double * const q = atom->q; + const int nlocal = atom->nlocal; + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int i,ifrom,ito,k,l,m,n,ic,tid; + double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; + double sqk,clpm,slpm; + + loop_setup_thr(ifrom, ito, tid, nlocal, nthreads); + + double * const sfacrl_thr = sfacrl + tid*kmax3d; + double * const sfacim_thr = sfacim + tid*kmax3d; + + n = 0; + + // (k,0,0), (0,l,0), (0,0,m) + + for (ic = 0; ic < 3; ic++) { + sqk = unitk[ic]*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = ifrom; i < ito; i++) { + cs[0][ic][i] = 1.0; + sn[0][ic][i] = 0.0; + cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); + sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); + cs[-1][ic][i] = cs[1][ic][i]; + sn[-1][ic][i] = -sn[1][ic][i]; + cstr1 += q[i]*cs[1][ic][i]; + sstr1 += q[i]*sn[1][ic][i]; + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + } + } + + for (m = 2; m <= kmax; m++) { + for (ic = 0; ic < 3; ic++) { + sqk = m*unitk[ic] * m*unitk[ic]; + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + for (i = ifrom; i < ito; i++) { + cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - + sn[m-1][ic][i]*sn[1][ic][i]; + sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + + cs[m-1][ic][i]*sn[1][ic][i]; + cs[-m][ic][i] = cs[m][ic][i]; + sn[-m][ic][i] = -sn[m][ic][i]; + cstr1 += q[i]*cs[m][ic][i]; + sstr1 += q[i]*sn[m][ic][i]; + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + } + } + } + + // 1 = (k,l,0), 2 = (k,-l,0) + + for (k = 1; k <= kmax; k++) { + for (l = 1; l <= kmax; l++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = ifrom; i < ito; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + } + } + } + + // 1 = (0,l,m), 2 = (0,l,-m) + + for (l = 1; l <= kmax; l++) { + for (m = 1; m <= kmax; m++) { + sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = ifrom; i < ito; i++) { + cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + } + } + } + + // 1 = (k,0,m), 2 = (k,0,-m) + + for (k = 1; k <= kmax; k++) { + for (m = 1; m <= kmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + for (i = ifrom; i < ito; i++) { + cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); + sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); + cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); + sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + } + } + } + + // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) + + for (k = 1; k <= kmax; k++) { + for (l = 1; l <= kmax; l++) { + for (m = 1; m <= kmax; m++) { + sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + + (m*unitk[2] * m*unitk[2]); + if (sqk <= gsqmx) { + cstr1 = 0.0; + sstr1 = 0.0; + cstr2 = 0.0; + sstr2 = 0.0; + cstr3 = 0.0; + sstr3 = 0.0; + cstr4 = 0.0; + sstr4 = 0.0; + for (i = ifrom; i < ito; i++) { + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; + cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; + slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + + clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; + slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; + cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); + sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); + } + sfacrl_thr[n] = cstr1; + sfacim_thr[n++] = sstr1; + sfacrl_thr[n] = cstr2; + sfacim_thr[n++] = sstr2; + sfacrl_thr[n] = cstr3; + sfacim_thr[n++] = sstr3; + sfacrl_thr[n] = cstr4; + sfacim_thr[n++] = sstr4; + } + } + } + } + + sync_threads(); + data_reduce_thr(sfacrl,kmax3d,nthreads,1,tid); + data_reduce_thr(sfacim,kmax3d,nthreads,1,tid); + + } // end of parallel region +} diff --git a/src/USER-OMP/ewald_omp.h b/src/USER-OMP/ewald_omp.h new file mode 100644 index 0000000000..515a74ed35 --- /dev/null +++ b/src/USER-OMP/ewald_omp.h @@ -0,0 +1,42 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(ewald/omp,EwaldOMP) + +#else + +#ifndef LMP_EWALD_OMP_H +#define LMP_EWALD_OMP_H + +#include "ewald.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class EwaldOMP : public Ewald, public ThrOMP { + public: + EwaldOMP(class LAMMPS *, int, char **); + virtual ~EwaldOMP() { }; + virtual void allocate(); + virtual void compute(int, int); + + protected: + virtual void eik_dot_r(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_omp.cpp b/src/USER-OMP/fix_omp.cpp new file mode 100644 index 0000000000..29c577c489 --- /dev/null +++ b/src/USER-OMP/fix_omp.cpp @@ -0,0 +1,279 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + OpenMP based threading support for LAMMPS + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "min.h" + +#include "fix_omp.h" +#include "thr_data.h" +#include "thr_omp.h" + +#include "pair_hybrid.h" +#include "bond_hybrid.h" +#include "angle_hybrid.h" +#include "dihedral_hybrid.h" +#include "improper_hybrid.h" + +#include +#include +#include + +using namespace LAMMPS_NS; + +static int get_tid() +{ + int tid = 0; +#if defined(_OPENMP) + tid = omp_get_thread_num(); +#endif + return tid; +} + +/* ---------------------------------------------------------------------- */ + +FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), + thr(NULL), last_omp_style(NULL), _nthr(-1), _neighbor(true), _newton(false) +{ + if ((narg < 4) || (narg > 6)) error->all(FLERR,"Illegal fix OMP command"); + if (strcmp(arg[1],"all") != 0) error->all(FLERR,"Illegal fix OMP command"); + + int nthreads = 1; + if (narg > 3) { +#if defined(_OPENMP) + if (strcmp(arg[3],"*") == 0) +#pragma omp parallel default(none) shared(nthreads) + nthreads = omp_get_num_threads(); + else + nthreads = atoi(arg[3]); +#endif + } + + if (nthreads < 1) + error->all(FLERR,"Illegal number of threads requested."); + + if (nthreads != comm->nthreads) { +#if defined(_OPENMP) + omp_set_num_threads(nthreads); +#endif + comm->nthreads = nthreads; + if (comm->me == 0) { + if (screen) + fprintf(screen," reset %d OpenMP thread(s) per MPI task\n", nthreads); + if (logfile) + fprintf(logfile," reset %d OpenMP thread(s) per MPI task\n", nthreads); + } + } + + if (narg > 4) { + if (strcmp(arg[4],"force/neigh") == 0) + _neighbor = true; + else if (strcmp(arg[4],"force") == 0) + _neighbor = false; + else + error->all(FLERR,"Illegal fix omp mode requested."); + + if (comm->me == 0) { + const char * const mode = _neighbor ? "OpenMP capable" : "serial"; + + if (screen) + fprintf(screen," using %s neighbor list subroutines\n", mode); + if (logfile) + fprintf(logfile," using %s neighbor list subroutines\n", mode); + } + } + +#if 0 /* to be enabled when we can switch between half and full neighbor lists */ + if (narg > 5) { + if (strcmp(arg[5],"neigh/half") == 0) + _newton = true; + else if (strcmp(arg[5],"neigh/full") == 0) + _newton = false; + else + error->all(FLERR,"Illegal fix OMP command"); + + if (comm->me == 0) { + const char * const mode = _newton ? "half" : "full"; + + if (screen) + fprintf(screen," using /omp styles with %s neighbor list builds\n", mode); + if (logfile) + fprintf(logfile," using /omp styles with %s neighbor list builds\n", mode); + } + } +#endif + + // allocate list for per thread accumulator manager class instances + // and then have each thread create an instance of this class to + // encourage the OS to use storage that is "close" to each thread's CPU. + thr = new ThrData *[nthreads]; + _nthr = nthreads; +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const int tid = get_tid(); + thr[tid] = new ThrData(tid); + } +} + +/* ---------------------------------------------------------------------- */ + +FixOMP::~FixOMP() +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const int tid = get_tid(); + delete thr[tid]; + } + delete[] thr; +} + +/* ---------------------------------------------------------------------- */ + +int FixOMP::setmask() +{ + int mask = 0; + mask |= PRE_FORCE; + mask |= PRE_FORCE_RESPA; + mask |= MIN_PRE_FORCE; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixOMP::init() +{ + if (strstr(update->integrate_style,"respa") != NULL) + error->all(FLERR,"Cannot use r-RESPA with /omp styles"); + + int check_hybrid; + last_omp_style = NULL; + char *last_omp_name = NULL; + +// determine which is the last force style with OpenMP +// support as this is the one that has to reduce the forces + +#define CheckStyleForOMP(name) \ + check_hybrid = 0; \ + if (force->name) { \ + if ( (strcmp(force->name ## _style,"hybrid") == 0) || \ + (strcmp(force->name ## _style,"hybrid/overlay") == 0) ) \ + check_hybrid=1; \ + int len = strlen(force->name ## _style); \ + char *suffix = force->name ## _style + len - 4; \ + if (strcmp(suffix,"/omp") == 0) { \ + last_omp_name = force->name ## _style; \ + last_omp_style = (void *) force->name; \ + } \ + } + +#define CheckHybridForOMP(name,Class) \ + if (check_hybrid) { \ + Class ## Hybrid *style = (Class ## Hybrid *) force->name; \ + for (int i=0; i < style->nstyles; i++) { \ + int len = strlen(style->keywords[i]); \ + char *suffix = style->keywords[i] + len - 4; \ + if (strcmp(suffix,"/omp") == 0) { \ + last_omp_name = force->name ## _style; \ + last_omp_style = (void *) force->name; \ + } \ + } \ + } + + CheckStyleForOMP(pair); + CheckHybridForOMP(pair,Pair); + + CheckStyleForOMP(bond); + CheckHybridForOMP(bond,Bond); + + CheckStyleForOMP(angle); + CheckHybridForOMP(angle,Angle); + + CheckStyleForOMP(dihedral); + CheckHybridForOMP(dihedral,Dihedral); + + CheckStyleForOMP(improper); + CheckHybridForOMP(improper,Improper); + + CheckStyleForOMP(kspace); + +#undef CheckStyleForOMP +#undef CheckHybridForOMP + + set_neighbor_omp(); +} + +/* ---------------------------------------------------------------------- */ + +void FixOMP::set_neighbor_omp() +{ + // select or deselect multi-threaded neighbor + // list build depending on setting in package omp. + // NOTE: since we are at the top of the list of + // fixes, we cannot adjust neighbor lists from + // other fixes. those have to be re-implemented + // as /omp fix styles. :-( + + const int neigh_omp = _neighbor ? 1 : 0; + const int nrequest = neighbor->nrequest; + + for (int i = 0; i < nrequest; ++i) + neighbor->requests[i]->omp = neigh_omp; +} + +/* ---------------------------------------------------------------------- */ + +// adjust size and clear out per thread accumulator arrays +void FixOMP::pre_force(int) +{ + const int nall = atom->nlocal + atom->nghost; + + double **f = atom->f; + double **torque = atom->torque; + double *erforce = atom->erforce; + double *de = atom->de; + double *drho = atom->drho; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(f,torque,erforce,de,drho) +#endif + { + const int tid = get_tid(); + thr[tid]->check_tid(tid); + thr[tid]->init_force(nall,f,torque,erforce,de,drho); + } +} + +/* ---------------------------------------------------------------------- */ + +double FixOMP::memory_usage() +{ + double bytes = comm->nthreads * (sizeof(ThrData *) + sizeof(ThrData)); + bytes += comm->nthreads * thr[0]->memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/fix_omp.h b/src/USER-OMP/fix_omp.h new file mode 100644 index 0000000000..2167c281ec --- /dev/null +++ b/src/USER-OMP/fix_omp.h @@ -0,0 +1,71 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(OMP,FixOMP) + +#else + +#ifndef LMP_FIX_OMP_H +#define LMP_FIX_OMP_H + +#include "fix.h" + + +namespace LAMMPS_NS { + +class ThrData; + +class FixOMP : public Fix { + friend class ThrOMP; + + public: + FixOMP(class LAMMPS *, int, char **); + virtual ~FixOMP(); + virtual int setmask(); + virtual void init(); + virtual void pre_force(int); + + virtual void setup_pre_force(int vflag) { pre_force(vflag); }; + virtual void min_setup_pre_force(int vflag) { pre_force(vflag); }; + virtual void min_pre_force(int vflag) { pre_force(vflag); }; + virtual void setup_pre_force_respa(int vflag,int) { pre_force(vflag); }; + virtual void pre_force_respa(int vflag,int,int) { pre_force(vflag); }; + + virtual double memory_usage(); + + ThrData *get_thr(int tid) { return thr[tid]; }; + int get_nthr() const { return _nthr; } + + protected: + ThrData **thr; + void *last_omp_style; // pointer to the style that needs + // to do the force reduction + + public: + bool get_neighbor() const {return _neighbor;}; + bool get_newton() const {return _newton;}; + + private: + int _nthr; // number of currently active ThrData object + bool _neighbor; // en/disable threads for neighbor list construction + bool _newton; // en/disable newton's 3rd law for local atoms. + + void set_neighbor_omp(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_peri_neigh_omp.cpp b/src/USER-OMP/fix_peri_neigh_omp.cpp new file mode 100644 index 0000000000..b8e07403a9 --- /dev/null +++ b/src/USER-OMP/fix_peri_neigh_omp.cpp @@ -0,0 +1,50 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Parks (SNL) +------------------------------------------------------------------------- */ + +#include "fix_peri_neigh_omp.h" +#include "fix_omp.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_request.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void FixPeriNeighOMP::init() +{ + if (!first) return; + + // determine status of neighbor flag of the omp package command + int ifix = modify->find_fix("package_omp"); + int use_omp = 0; + if (ifix >=0) { + FixOMP * fix = static_cast(lmp->modify->fix[ifix]); + if (fix->get_neighbor()) use_omp = 1; + } + + // need a full neighbor list once + + int irequest = neighbor->request((void *) this); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->omp = use_omp; + neighbor->requests[irequest]->occasional = 1; +} + diff --git a/src/USER-OMP/fix_peri_neigh_omp.h b/src/USER-OMP/fix_peri_neigh_omp.h new file mode 100644 index 0000000000..605bbae5f4 --- /dev/null +++ b/src/USER-OMP/fix_peri_neigh_omp.h @@ -0,0 +1,37 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(PERI_NEIGH_OMP,FixPeriNeighOMP) + +#else + +#ifndef LMP_FIX_PERI_NEIGH_OMP_H +#define LMP_FIX_PERI_NEIGH_OMP_H + +#include "fix_peri_neigh.h" + +namespace LAMMPS_NS { + +class FixPeriNeighOMP : public FixPeriNeigh { + + public: + FixPeriNeighOMP(class LAMMPS *lmp, int narg, char **argv) : FixPeriNeigh(lmp,narg,argv) {}; + virtual void init(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_qeq_comb_omp.cpp b/src/USER-OMP/fix_qeq_comb_omp.cpp new file mode 100644 index 0000000000..f0a477fef6 --- /dev/null +++ b/src/USER-OMP/fix_qeq_comb_omp.cpp @@ -0,0 +1,160 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include +#include "fix_qeq_comb_omp.h" +#include "atom.h" +#include "force.h" +#include "group.h" +#include "memory.h" +#include "error.h" +#include "respa.h" +#include "update.h" +#include "pair_comb_omp.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) + : FixQEQComb(lmp, narg, arg) +{ + if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEQCombOMP::init() +{ + if (!atom->q_flag) + error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q"); + + comb = (PairComb *) force->pair_match("comb/omp",1); + if (comb == NULL) + comb = (PairComb *) force->pair_match("comb",1); + if (comb == NULL) error->all(FLERR,"Must use pair_style comb or comb/omp with fix qeq/comb"); + + if (strstr(update->integrate_style,"respa")) + nlevels_respa = ((Respa *) update->integrate)->nlevels; + + ngroup = group->count(igroup); + if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEQCombOMP::post_force(int vflag) +{ + int i,iloop,loopmax; + double heatpq,qmass,dtq,dtq2; + double enegchkall,enegmaxall; + + if (update->ntimestep % nevery) return; + + // reallocate work arrays if necessary + // qf = charge force + // q1 = charge displacement + // q2 = tmp storage of charge force for next iteration + + if (atom->nmax > nmax) { + memory->destroy(qf); + memory->destroy(q1); + memory->destroy(q2); + nmax = atom->nmax; + memory->create(qf,nmax,"qeq:qf"); + memory->create(q1,nmax,"qeq:q1"); + memory->create(q2,nmax,"qeq:q2"); + vector_atom = qf; + } + + // more loops for first-time charge equilibrium + + iloop = 0; + if (firstflag) loopmax = 5000; + else loopmax = 2000; + + // charge-equilibration loop + + if (me == 0 && fp) + fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n", + update->ntimestep); + + heatpq = 0.05; + qmass = 0.000548580; + dtq = 0.0006; + dtq2 = 0.5*dtq*dtq/qmass; + + double enegchk = 0.0; + double enegtot = 0.0; + double enegmax = 0.0; + + double *q = atom->q; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) + q1[i] = q2[i] = qf[i] = 0.0; + + for (iloop = 0; iloop < loopmax; iloop ++ ) { + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + q1[i] += qf[i]*dtq2 - heatpq*q1[i]; + q[i] += q1[i]; + } + + enegtot = comb->yasu_char(qf,igroup); + enegtot /= ngroup; + enegchk = enegmax = 0.0; + + for (i = 0; i < nlocal ; i++) + if (mask[i] & groupbit) { + q2[i] = enegtot-qf[i]; + enegmax = MAX(enegmax,fabs(q2[i])); + enegchk += fabs(q2[i]); + qf[i] = q2[i]; + } + + MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world); + enegchk = enegchkall/ngroup; + MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world); + enegmax = enegmaxall; + + if (enegchk <= precision && enegmax <= 100.0*precision) break; + + if (me == 0 && fp) + fprintf(fp," iteration: %d, enegtot %.6g, " + "enegmax %.6g, fq deviation: %.6g\n", + iloop,enegtot,enegmax,enegchk); + + for (i = 0; i < nlocal; i++) + if (mask[i] & groupbit) + q1[i] += qf[i]*dtq2 - heatpq*q1[i]; + } + + if (me == 0 && fp) { + if (iloop == loopmax) + fprintf(fp,"Charges did not converge in %d iterations\n",iloop); + else + fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n", + iloop,enegchk); + } +} + diff --git a/src/USER-OMP/fix_qeq_comb_omp.h b/src/USER-OMP/fix_qeq_comb_omp.h new file mode 100644 index 0000000000..0febe6b0aa --- /dev/null +++ b/src/USER-OMP/fix_qeq_comb_omp.h @@ -0,0 +1,32 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(qeq/comb/omp,FixQEQCombOMP) + +#else + +#ifndef LMP_FIX_QEQ_COMB_OMP_H +#define LMP_FIX_QEQ_COMB_OMP_H + +#include "fix_qeq_comb.h" + +namespace LAMMPS_NS { + +class FixQEQCombOMP : public FixQEQComb { + public: + FixQEQCombOMP(class LAMMPS *, int, char **); + virtual void init(); + virtual void post_force(int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_wall_gran_omp.cpp b/src/USER-OMP/fix_wall_gran_omp.cpp new file mode 100644 index 0000000000..5857c921a6 --- /dev/null +++ b/src/USER-OMP/fix_wall_gran_omp.cpp @@ -0,0 +1,152 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "fix_wall_gran_omp.h" +#include "atom.h" +#include "update.h" + +using namespace LAMMPS_NS; + + +enum{XPLANE,YPLANE,ZPLANE,ZCYLINDER}; // XYZ PLANE need to be 0,1,2 +enum{HOOKE,HOOKE_HISTORY,HERTZ_HISTORY}; + +#define BIG 1.0e20 + +/* ---------------------------------------------------------------------- */ + +FixWallGranOMP::FixWallGranOMP(LAMMPS *lmp, int narg, char **arg) : + FixWallGran(lmp, narg, arg) { } + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOMP::post_force(int vflag) +{ + double vwall[3]; + + // set position of wall to initial settings and velocity to 0.0 + // if wiggle or shear, set wall position and velocity accordingly + + double wlo = lo; + double whi = hi; + vwall[0] = vwall[1] = vwall[2] = 0.0; + if (wiggle) { + double arg = omega * (update->ntimestep - time_origin) * dt; + if (wallstyle == axis) { + wlo = lo + amplitude - amplitude*cos(arg); + whi = hi + amplitude - amplitude*cos(arg); + } + vwall[axis] = amplitude*omega*sin(arg); + } else if (wshear) vwall[axis] = vshear; + + // loop over all my atoms + // rsq = distance from wall + // dx,dy,dz = signed distance from wall + // for rotating cylinder, reset vwall based on particle position + // skip atom if not close enough to wall + // if wall was set to NULL, it's skipped since lo/hi are infinity + // compute force and torque on atom if close enough to wall + // via wall potential matched to pair potential + // set shear if pair potential stores history + + double * const * const x = atom->x; + double * const * const v = atom->v; + double * const * const f = atom->f; + double * const * const omega = atom->omega; + double * const * const torque = atom->torque; + double * const radius = atom->radius; + double * const rmass = atom->rmass; + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + + if (update->ntimestep > laststep) shearupdate = 1; + else shearupdate = 0; + + int i; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) firstprivate(vwall,wlo,whi) +#endif + for (i = 0; i < nlocal; i++) { + + if (mask[i] & groupbit) { + double dx,dy,dz,del1,del2,delxy,delr,rsq; + + dx = dy = dz = 0.0; + + if (wallstyle == XPLANE) { + del1 = x[i][0] - wlo; + del2 = whi - x[i][0]; + if (del1 < del2) dx = del1; + else dx = -del2; + } else if (wallstyle == YPLANE) { + del1 = x[i][1] - wlo; + del2 = whi - x[i][1]; + if (del1 < del2) dy = del1; + else dy = -del2; + } else if (wallstyle == ZPLANE) { + del1 = x[i][2] - wlo; + del2 = whi - x[i][2]; + if (del1 < del2) dz = del1; + else dz = -del2; + } else if (wallstyle == ZCYLINDER) { + delxy = sqrt(x[i][0]*x[i][0] + x[i][1]*x[i][1]); + delr = cylradius - delxy; + if (delr > radius[i]) dz = cylradius; + else { + dx = -delr/delxy * x[i][0]; + dy = -delr/delxy * x[i][1]; + if (wshear && axis != 2) { + vwall[0] = vshear * x[i][1]/delxy; + vwall[1] = -vshear * x[i][0]/delxy; + vwall[2] = 0.0; + } + } + } + + rsq = dx*dx + dy*dy + dz*dz; + + if (rsq > radius[i]*radius[i]) { + if (pairstyle != HOOKE) { + shear[i][0] = 0.0; + shear[i][1] = 0.0; + shear[i][2] = 0.0; + } + } else { + if (pairstyle == HOOKE) + hooke(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i], + radius[i],rmass[i]); + else if (pairstyle == HOOKE_HISTORY) + hooke_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i], + radius[i],rmass[i],shear[i]); + else if (pairstyle == HERTZ_HISTORY) + hertz_history(rsq,dx,dy,dz,vwall,v[i],f[i],omega[i],torque[i], + radius[i],rmass[i],shear[i]); + } + } + } + + laststep = update->ntimestep; +} + +/* ---------------------------------------------------------------------- */ + +void FixWallGranOMP::post_force_respa(int vflag, int ilevel, int iloop) +{ + if (ilevel == nlevels_respa-1) post_force(vflag); +} + diff --git a/src/USER-OMP/fix_wall_gran_omp.h b/src/USER-OMP/fix_wall_gran_omp.h new file mode 100644 index 0000000000..cfdfb9c75d --- /dev/null +++ b/src/USER-OMP/fix_wall_gran_omp.h @@ -0,0 +1,38 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(wall/gran/omp,FixWallGranOMP) + +#else + +#ifndef LMP_FIX_WALL_GRAN_OMP_H +#define LMP_FIX_WALL_GRAN_OMP_H + +#include "fix_wall_gran.h" + +namespace LAMMPS_NS { + +class FixWallGranOMP : public FixWallGran { + + public: + FixWallGranOMP(class LAMMPS *, int, char **); + virtual void post_force(int); + virtual void post_force_respa(int, int, int); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_class2_omp.cpp b/src/USER-OMP/improper_class2_omp.cpp new file mode 100644 index 0000000000..cfadde435b --- /dev/null +++ b/src/USER-OMP/improper_class2_omp.cpp @@ -0,0 +1,697 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "improper_class2_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void ImproperClass2OMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperClass2OMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,i,j,k,n,type; + double eimproper; + double delr[3][3],rmag[3],rinvmag[3],rmag2[3]; + double theta[3],costheta[3],sintheta[3]; + double cossqtheta[3],sinsqtheta[3],invstheta[3]; + double rABxrCB[3],rDBxrAB[3],rCBxrDB[3]; + double ddelr[3][4],dr[3][4][3],dinvr[3][4][3]; + double dthetadr[3][4][3],dinvsth[3][4][3]; + double dinv3r[4][3],dinvs3r[3][4][3]; + double drCBxrDB[3],rCBxdrDB[3],drDBxrAB[3],rDBxdrAB[3]; + double drABxrCB[3],rABxdrCB[3]; + double dot1,dot2,dd[3]; + double fdot[3][4][3],ftmp,invs3r[3],inv3r; + double t,tt1,tt3,sc1; + double dotCBDBAB,dotDBABCB,dotABCBDB; + double chi,deltachi,d2chi,cossin2; + double drAB[3][4][3],drCB[3][4][3],drDB[3][4][3]; + double dchi[3][4][3],dtotalchi[4][3]; + double schiABCD,chiABCD,schiCBDA,chiCBDA,schiDBAC,chiDBAC; + double fabcd[4][3]; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) { + dthetadr[i][j][k] = 0.0; + drAB[i][j][k] = 0.0; + drCB[i][j][k] = 0.0; + drDB[i][j][k] = 0.0; + } + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + if (k0[type] == 0.0) continue; + + // difference vectors + + delr[0][0] = x[i1][0] - x[i2][0]; + delr[0][1] = x[i1][1] - x[i2][1]; + delr[0][2] = x[i1][2] - x[i2][2]; + domain->minimum_image(delr[0]); + + delr[1][0] = x[i3][0] - x[i2][0]; + delr[1][1] = x[i3][1] - x[i2][1]; + delr[1][2] = x[i3][2] - x[i2][2]; + domain->minimum_image(delr[1]); + + delr[2][0] = x[i4][0] - x[i2][0]; + delr[2][1] = x[i4][1] - x[i2][1]; + delr[2][2] = x[i4][2] - x[i2][2]; + domain->minimum_image(delr[2]); + + // bond lengths and associated values + + for (i = 0; i < 3; i++) { + rmag2[i] = delr[i][0]*delr[i][0] + delr[i][1]*delr[i][1] + + delr[i][2]*delr[i][2]; + rmag[i] = sqrt(rmag2[i]); + rinvmag[i] = 1.0/rmag[i]; + } + + // angle ABC, CBD, ABD + + costheta[0] = (delr[0][0]*delr[1][0] + delr[0][1]*delr[1][1] + + delr[0][2]*delr[1][2]) / (rmag[0]*rmag[1]); + costheta[1] = (delr[1][0]*delr[2][0] + delr[1][1]*delr[2][1] + + delr[1][2]*delr[2][2]) / (rmag[1]*rmag[2]); + costheta[2] = (delr[0][0]*delr[2][0] + delr[0][1]*delr[2][1] + + delr[0][2]*delr[2][2]) / (rmag[0]*rmag[2]); + + // angle error check + + for (i = 0; i < 3; i++) { + if (costheta[i] == -1.0) { + int me = comm->me; + if (screen) { + char str[128]; + sprintf(str, + "Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me, thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + } + + for (i = 0; i < 3; i++) { + if (costheta[i] > 1.0) costheta[i] = 1.0; + if (costheta[i] < -1.0) costheta[i] = -1.0; + theta[i] = acos(costheta[i]); + cossqtheta[i] = costheta[i]*costheta[i]; + sintheta[i] = sin(theta[i]); + invstheta[i] = 1.0/sintheta[i]; + sinsqtheta[i] = sintheta[i]*sintheta[i]; + } + + // cross & dot products + + cross(delr[0],delr[1],rABxrCB); + cross(delr[2],delr[0],rDBxrAB); + cross(delr[1],delr[2],rCBxrDB); + + dotCBDBAB = dot(rCBxrDB,delr[0]); + dotDBABCB = dot(rDBxrAB,delr[1]); + dotABCBDB = dot(rABxrCB,delr[2]); + + t = rmag[0] * rmag[1] * rmag[2]; + inv3r = 1.0/t; + invs3r[0] = invstheta[1] * inv3r; + invs3r[1] = invstheta[2] * inv3r; + invs3r[2] = invstheta[0] * inv3r; + + // chi ABCD, CBDA, DBAC + // final chi is average of three + + schiABCD = dotCBDBAB * invs3r[0]; + chiABCD = asin(schiABCD); + schiCBDA = dotDBABCB * invs3r[1]; + chiCBDA = asin(schiCBDA); + schiDBAC = dotABCBDB * invs3r[2]; + chiDBAC = asin(schiDBAC); + + chi = (chiABCD + chiCBDA + chiDBAC) / 3.0; + deltachi = chi - chi0[type]; + d2chi = deltachi * deltachi; + + // energy + + if (EFLAG) eimproper = k0[type]*d2chi; + + // forces + // define d(delr) + // i = bond AB/CB/DB, j = atom A/B/C/D + + ddelr[0][0] = 1.0; + ddelr[0][1] = -1.0; + ddelr[0][2] = 0.0; + ddelr[0][3] = 0.0; + ddelr[1][0] = 0.0; + ddelr[1][1] = -1.0; + ddelr[1][2] = 1.0; + ddelr[1][3] = 0.0; + ddelr[2][0] = 0.0; + ddelr[2][1] = -1.0; + ddelr[2][2] = 0.0; + ddelr[2][3] = 1.0; + + // compute d(|r|)/dr and d(1/|r|)/dr for each direction, bond and atom + // define d(r) for each r + // i = bond AB/CB/DB, j = atom A/B/C/D, k = X/Y/Z + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) { + dr[i][j][k] = delr[i][k] * ddelr[i][j] / rmag[i]; + dinvr[i][j][k] = -dr[i][j][k] / rmag2[i]; + } + + // compute d(1 / (|r_AB| * |r_CB| * |r_DB|) / dr + // i = atom A/B/C/D, j = X/Y/Z + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + dinv3r[i][j] = rinvmag[1] * (rinvmag[2] * dinvr[0][i][j] + + rinvmag[0] * dinvr[2][i][j]) + + rinvmag[2] * rinvmag[0] * dinvr[1][i][j]; + + // compute d(theta)/d(r) for 3 angles + // angleABC + + tt1 = costheta[0] / rmag2[0]; + tt3 = costheta[0] / rmag2[1]; + sc1 = 1.0 / sqrt(1.0 - cossqtheta[0]); + + dthetadr[0][0][0] = sc1 * ((tt1 * delr[0][0]) - + (delr[1][0] * rinvmag[0] * rinvmag[1])); + dthetadr[0][0][1] = sc1 * ((tt1 * delr[0][1]) - + (delr[1][1] * rinvmag[0] * rinvmag[1])); + dthetadr[0][0][2] = sc1 * ((tt1 * delr[0][2]) - + (delr[1][2] * rinvmag[0] * rinvmag[1])); + dthetadr[0][1][0] = -sc1 * ((tt1 * delr[0][0]) - + (delr[1][0] * rinvmag[0] * rinvmag[1]) + + (tt3 * delr[1][0]) - + (delr[0][0] * rinvmag[0] * rinvmag[1])); + dthetadr[0][1][1] = -sc1 * ((tt1 * delr[0][1]) - + (delr[1][1] * rinvmag[0] * rinvmag[1]) + + (tt3 * delr[1][1]) - + (delr[0][1] * rinvmag[0] * rinvmag[1])); + dthetadr[0][1][2] = -sc1 * ((tt1 * delr[0][2]) - + (delr[1][2] * rinvmag[0] * rinvmag[1]) + + (tt3 * delr[1][2]) - + (delr[0][2] * rinvmag[0] * rinvmag[1])); + dthetadr[0][2][0] = sc1 * ((tt3 * delr[1][0]) - + (delr[0][0] * rinvmag[0] * rinvmag[1])); + dthetadr[0][2][1] = sc1 * ((tt3 * delr[1][1]) - + (delr[0][1] * rinvmag[0] * rinvmag[1])); + dthetadr[0][2][2] = sc1 * ((tt3 * delr[1][2]) - + (delr[0][2] * rinvmag[0] * rinvmag[1])); + + // angleCBD + + tt1 = costheta[1] / rmag2[1]; + tt3 = costheta[1] / rmag2[2]; + sc1 = 1.0 / sqrt(1.0 - cossqtheta[1]); + + dthetadr[1][2][0] = sc1 * ((tt1 * delr[1][0]) - + (delr[2][0] * rinvmag[1] * rinvmag[2])); + dthetadr[1][2][1] = sc1 * ((tt1 * delr[1][1]) - + (delr[2][1] * rinvmag[1] * rinvmag[2])); + dthetadr[1][2][2] = sc1 * ((tt1 * delr[1][2]) - + (delr[2][2] * rinvmag[1] * rinvmag[2])); + dthetadr[1][1][0] = -sc1 * ((tt1 * delr[1][0]) - + (delr[2][0] * rinvmag[1] * rinvmag[2]) + + (tt3 * delr[2][0]) - + (delr[1][0] * rinvmag[2] * rinvmag[1])); + dthetadr[1][1][1] = -sc1 * ((tt1 * delr[1][1]) - + (delr[2][1] * rinvmag[1] * rinvmag[2]) + + (tt3 * delr[2][1]) - + (delr[1][1] * rinvmag[2] * rinvmag[1])); + dthetadr[1][1][2] = -sc1 * ((tt1 * delr[1][2]) - + (delr[2][2] * rinvmag[1] * rinvmag[2]) + + (tt3 * delr[2][2]) - + (delr[1][2] * rinvmag[2] * rinvmag[1])); + dthetadr[1][3][0] = sc1 * ((tt3 * delr[2][0]) - + (delr[1][0] * rinvmag[2] * rinvmag[1])); + dthetadr[1][3][1] = sc1 * ((tt3 * delr[2][1]) - + (delr[1][1] * rinvmag[2] * rinvmag[1])); + dthetadr[1][3][2] = sc1 * ((tt3 * delr[2][2]) - + (delr[1][2] * rinvmag[2] * rinvmag[1])); + + // angleABD + + tt1 = costheta[2] / rmag2[0]; + tt3 = costheta[2] / rmag2[2]; + sc1 = 1.0 / sqrt(1.0 - cossqtheta[2]); + + dthetadr[2][0][0] = sc1 * ((tt1 * delr[0][0]) - + (delr[2][0] * rinvmag[0] * rinvmag[2])); + dthetadr[2][0][1] = sc1 * ((tt1 * delr[0][1]) - + (delr[2][1] * rinvmag[0] * rinvmag[2])); + dthetadr[2][0][2] = sc1 * ((tt1 * delr[0][2]) - + (delr[2][2] * rinvmag[0] * rinvmag[2])); + dthetadr[2][1][0] = -sc1 * ((tt1 * delr[0][0]) - + (delr[2][0] * rinvmag[0] * rinvmag[2]) + + (tt3 * delr[2][0]) - + (delr[0][0] * rinvmag[2] * rinvmag[0])); + dthetadr[2][1][1] = -sc1 * ((tt1 * delr[0][1]) - + (delr[2][1] * rinvmag[0] * rinvmag[2]) + + (tt3 * delr[2][1]) - + (delr[0][1] * rinvmag[2] * rinvmag[0])); + dthetadr[2][1][2] = -sc1 * ((tt1 * delr[0][2]) - + (delr[2][2] * rinvmag[0] * rinvmag[2]) + + (tt3 * delr[2][2]) - + (delr[0][2] * rinvmag[2] * rinvmag[0])); + dthetadr[2][3][0] = sc1 * ((tt3 * delr[2][0]) - + (delr[0][0] * rinvmag[2] * rinvmag[0])); + dthetadr[2][3][1] = sc1 * ((tt3 * delr[2][1]) - + (delr[0][1] * rinvmag[2] * rinvmag[0])); + dthetadr[2][3][2] = sc1 * ((tt3 * delr[2][2]) - + (delr[0][2] * rinvmag[2] * rinvmag[0])); + + // compute d( 1 / sin(theta))/dr + // i = angle, j = atom, k = direction + + for (i = 0; i < 3; i++) { + cossin2 = -costheta[i] / sinsqtheta[i]; + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dinvsth[i][j][k] = cossin2 * dthetadr[i][j][k]; + } + + // compute d(1 / sin(theta) * |r_AB| * |r_CB| * |r_DB|)/dr + // i = angle, j = atom + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) { + dinvs3r[0][i][j] = (invstheta[1] * dinv3r[i][j]) + + (inv3r * dinvsth[1][i][j]); + dinvs3r[1][i][j] = (invstheta[2] * dinv3r[i][j]) + + (inv3r * dinvsth[2][i][j]); + dinvs3r[2][i][j] = (invstheta[0] * dinv3r[i][j]) + + (inv3r * dinvsth[0][i][j]); + } + + // drCB(i,j,k), etc + // i = vector X'/Y'/Z', j = atom A/B/C/D, k = direction X/Y/Z + + for (i = 0; i < 3; i++) { + drCB[i][1][i] = -1.0; + drAB[i][1][i] = -1.0; + drDB[i][1][i] = -1.0; + drDB[i][3][i] = 1.0; + drCB[i][2][i] = 1.0; + drAB[i][0][i] = 1.0; + } + + // d((r_CB x r_DB) dot r_AB) + // r_CB x d(r_DB) + // d(r_CB) x r_DB + // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) + // (r_CB x d(r_DB)) + (d(r_CB) x r_DB) dot r_AB + // d(r_AB) dot (r_CB x r_DB) + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) { + cross(delr[1],drDB[i][j],rCBxdrDB); + cross(drCB[i][j],delr[2],drCBxrDB); + for (k = 0; k < 3; k++) dd[k] = rCBxdrDB[k] + drCBxrDB[k]; + dot1 = dot(dd,delr[0]); + dot2 = dot(rCBxrDB,drAB[i][j]); + fdot[0][j][i] = dot1 + dot2; + } + + // d((r_DB x r_AB) dot r_CB) + // r_DB x d(r_AB) + // d(r_DB) x r_AB + // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) + // (r_DB x d(r_AB)) + (d(r_DB) x r_AB) dot r_CB + // d(r_CB) dot (r_DB x r_AB) + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) { + cross(delr[2],drAB[i][j],rDBxdrAB); + cross(drDB[i][j],delr[0],drDBxrAB); + for (k = 0; k < 3; k++) dd[k] = rDBxdrAB[k] + drDBxrAB[k]; + dot1 = dot(dd,delr[1]); + dot2 = dot(rDBxrAB,drCB[i][j]); + fdot[1][j][i] = dot1 + dot2; + } + + // d((r_AB x r_CB) dot r_DB) + // r_AB x d(r_CB) + // d(r_AB) x r_CB + // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) + // (r_AB x d(r_CB)) + (d(r_AB) x r_CB) dot r_DB + // d(r_DB) dot (r_AB x r_CB) + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) { + cross(delr[0],drCB[i][j],rABxdrCB); + cross(drAB[i][j],delr[1],drABxrCB); + for (k = 0; k < 3; k++) dd[k] = rABxdrCB[k] + drABxrCB[k]; + dot1 = dot(dd,delr[2]); + dot2 = dot(rABxrCB,drDB[i][j]); + fdot[2][j][i] = dot1 + dot2; + } + + // force on each atom + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) { + ftmp = (fdot[0][i][j] * invs3r[0]) + + (dinvs3r[0][i][j] * dotCBDBAB); + dchi[0][i][j] = ftmp / cos(chiABCD); + ftmp = (fdot[1][i][j] * invs3r[1]) + + (dinvs3r[1][i][j] * dotDBABCB); + dchi[1][i][j] = ftmp / cos(chiCBDA); + ftmp = (fdot[2][i][j] * invs3r[2]) + + (dinvs3r[2][i][j] * dotABCBDB); + dchi[2][i][j] = ftmp / cos(chiDBAC); + dtotalchi[i][j] = (dchi[0][i][j]+dchi[1][i][j]+dchi[2][i][j]) / 3.0; + } + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] = -2.0*k0[type] * deltachi*dtotalchi[i][j]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += fabcd[0][0]; + f[i1][1] += fabcd[0][1]; + f[i1][2] += fabcd[0][2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += fabcd[1][0]; + f[i2][1] += fabcd[1][1]; + f[i2][2] += fabcd[1][2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += fabcd[2][0]; + f[i3][1] += fabcd[2][1]; + f[i3][2] += fabcd[2][2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += fabcd[3][0]; + f[i4][1] += fabcd[3][1]; + f[i4][2] += fabcd[3][2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper, + fabcd[0],fabcd[2],fabcd[3], + delr[0][0],delr[0][1],delr[0][2], + delr[1][0],delr[1][1],delr[1][2], + delr[2][0]-delr[1][0],delr[2][1]-delr[1][1], + delr[2][2]-delr[1][2],thr); + } + + // compute angle-angle interactions + angleangle_thr(nfrom,nto,thr); +} + +/* ---------------------------------------------------------------------- + angle-angle interactions within improper +------------------------------------------------------------------------- */ +template +void ImproperClass2OMP::angleangle_thr(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,i,j,k,n,type; + double eimproper; + double delxAB,delyAB,delzAB,rABmag2,rAB; + double delxBC,delyBC,delzBC,rBCmag2,rBC; + double delxBD,delyBD,delzBD,rBDmag2,rBD; + double costhABC,thetaABC,costhABD; + double thetaABD,costhCBD,thetaCBD,dthABC,dthCBD,dthABD; + double sc1,t1,t3,r12; + double dthetadr[3][4][3],fabcd[4][3]; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // difference vectors + + delxAB = x[i1][0] - x[i2][0]; + delyAB = x[i1][1] - x[i2][1]; + delzAB = x[i1][2] - x[i2][2]; + domain->minimum_image(delxAB,delyAB,delzAB); + + delxBC = x[i3][0] - x[i2][0]; + delyBC = x[i3][1] - x[i2][1]; + delzBC = x[i3][2] - x[i2][2]; + domain->minimum_image(delxBC,delyBC,delzBC); + + delxBD = x[i4][0] - x[i2][0]; + delyBD = x[i4][1] - x[i2][1]; + delzBD = x[i4][2] - x[i2][2]; + domain->minimum_image(delxBD,delyBD,delzBD); + + // bond lengths + + rABmag2 = delxAB*delxAB + delyAB*delyAB + delzAB*delzAB; + rAB = sqrt(rABmag2); + rBCmag2 = delxBC*delxBC + delyBC*delyBC + delzBC*delzBC; + rBC = sqrt(rBCmag2); + rBDmag2 = delxBD*delxBD + delyBD*delyBD + delzBD*delzBD; + rBD = sqrt(rBDmag2); + + // angle ABC, ABD, CBD + + costhABC = (delxAB*delxBC + delyAB*delyBC + delzAB*delzBC) / (rAB * rBC); + if (costhABC > 1.0) costhABC = 1.0; + if (costhABC < -1.0) costhABC = -1.0; + thetaABC = acos(costhABC); + + costhABD = (delxAB*delxBD + delyAB*delyBD + delzAB*delzBD) / (rAB * rBD); + if (costhABD > 1.0) costhABD = 1.0; + if (costhABD < -1.0) costhABD = -1.0; + thetaABD = acos(costhABD); + + costhCBD = (delxBC*delxBD + delyBC*delyBD + delzBC*delzBD) /(rBC * rBD); + if (costhCBD > 1.0) costhCBD = 1.0; + if (costhCBD < -1.0) costhCBD = -1.0; + thetaCBD = acos(costhCBD); + + dthABC = thetaABC - aa_theta0_1[type]; + dthABD = thetaABD - aa_theta0_2[type]; + dthCBD = thetaCBD - aa_theta0_3[type]; + + // energy + + if (EFLAG) eimproper = aa_k2[type] * dthABC * dthABD + + aa_k1[type] * dthABC * dthCBD + + aa_k3[type] * dthABD * dthCBD; + + // d(theta)/d(r) array + // angle i, atom j, coordinate k + + for (i = 0; i < 3; i++) + for (j = 0; j < 4; j++) + for (k = 0; k < 3; k++) + dthetadr[i][j][k] = 0.0; + + // angle ABC + + sc1 = sqrt(1.0/(1.0 - costhABC*costhABC)); + t1 = costhABC / rABmag2; + t3 = costhABC / rBCmag2; + r12 = 1.0 / (rAB * rBC); + + dthetadr[0][0][0] = sc1 * ((t1 * delxAB) - (delxBC * r12)); + dthetadr[0][0][1] = sc1 * ((t1 * delyAB) - (delyBC * r12)); + dthetadr[0][0][2] = sc1 * ((t1 * delzAB) - (delzBC * r12)); + dthetadr[0][1][0] = sc1 * ((-t1 * delxAB) + (delxBC * r12) + + (-t3 * delxBC) + (delxAB * r12)); + dthetadr[0][1][1] = sc1 * ((-t1 * delyAB) + (delyBC * r12) + + (-t3 * delyBC) + (delyAB * r12)); + dthetadr[0][1][2] = sc1 * ((-t1 * delzAB) + (delzBC * r12) + + (-t3 * delzBC) + (delzAB * r12)); + dthetadr[0][2][0] = sc1 * ((t3 * delxBC) - (delxAB * r12)); + dthetadr[0][2][1] = sc1 * ((t3 * delyBC) - (delyAB * r12)); + dthetadr[0][2][2] = sc1 * ((t3 * delzBC) - (delzAB * r12)); + + // angle CBD + + sc1 = sqrt(1.0/(1.0 - costhCBD*costhCBD)); + t1 = costhCBD / rBCmag2; + t3 = costhCBD / rBDmag2; + r12 = 1.0 / (rBC * rBD); + + dthetadr[1][2][0] = sc1 * ((t1 * delxBC) - (delxBD * r12)); + dthetadr[1][2][1] = sc1 * ((t1 * delyBC) - (delyBD * r12)); + dthetadr[1][2][2] = sc1 * ((t1 * delzBC) - (delzBD * r12)); + dthetadr[1][1][0] = sc1 * ((-t1 * delxBC) + (delxBD * r12) + + (-t3 * delxBD) + (delxBC * r12)); + dthetadr[1][1][1] = sc1 * ((-t1 * delyBC) + (delyBD * r12) + + (-t3 * delyBD) + (delyBC * r12)); + dthetadr[1][1][2] = sc1 * ((-t1 * delzBC) + (delzBD * r12) + + (-t3 * delzBD) + (delzBC * r12)); + dthetadr[1][3][0] = sc1 * ((t3 * delxBD) - (delxBC * r12)); + dthetadr[1][3][1] = sc1 * ((t3 * delyBD) - (delyBC * r12)); + dthetadr[1][3][2] = sc1 * ((t3 * delzBD) - (delzBC * r12)); + + // angle ABD + + sc1 = sqrt(1.0/(1.0 - costhABD*costhABD)); + t1 = costhABD / rABmag2; + t3 = costhABD / rBDmag2; + r12 = 1.0 / (rAB * rBD); + + dthetadr[2][0][0] = sc1 * ((t1 * delxAB) - (delxBD * r12)); + dthetadr[2][0][1] = sc1 * ((t1 * delyAB) - (delyBD * r12)); + dthetadr[2][0][2] = sc1 * ((t1 * delzAB) - (delzBD * r12)); + dthetadr[2][1][0] = sc1 * ((-t1 * delxAB) + (delxBD * r12) + + (-t3 * delxBD) + (delxAB * r12)); + dthetadr[2][1][1] = sc1 * ((-t1 * delyAB) + (delyBD * r12) + + (-t3 * delyBD) + (delyAB * r12)); + dthetadr[2][1][2] = sc1 * ((-t1 * delzAB) + (delzBD * r12) + + (-t3 * delzBD) + (delzAB * r12)); + dthetadr[2][3][0] = sc1 * ((t3 * delxBD) - (delxAB * r12)); + dthetadr[2][3][1] = sc1 * ((t3 * delyBD) - (delyAB * r12)); + dthetadr[2][3][2] = sc1 * ((t3 * delzBD) - (delzAB * r12)); + + // angleangle forces + + for (i = 0; i < 4; i++) + for (j = 0; j < 3; j++) + fabcd[i][j] = - + ((aa_k1[type] * + (dthABC*dthetadr[1][i][j] + dthCBD*dthetadr[0][i][j])) + + (aa_k2[type] * + (dthABC*dthetadr[2][i][j] + dthABD*dthetadr[0][i][j])) + + (aa_k3[type] * + (dthABD*dthetadr[1][i][j] + dthCBD*dthetadr[2][i][j]))); + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += fabcd[0][0]; + f[i1][1] += fabcd[0][1]; + f[i1][2] += fabcd[0][2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += fabcd[1][0]; + f[i2][1] += fabcd[1][1]; + f[i2][2] += fabcd[1][2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += fabcd[2][0]; + f[i3][1] += fabcd[2][1]; + f[i3][2] += fabcd[2][2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += fabcd[3][0]; + f[i4][1] += fabcd[3][1]; + f[i4][2] += fabcd[3][2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper, + fabcd[0],fabcd[2],fabcd[3],delxAB,delyAB,delzAB, + delxBC,delyBC,delzBC,delxBD,delyBD,delzBD,thr); + } +} diff --git a/src/USER-OMP/improper_class2_omp.h b/src/USER-OMP/improper_class2_omp.h new file mode 100644 index 0000000000..130a680d06 --- /dev/null +++ b/src/USER-OMP/improper_class2_omp.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(class2/omp,ImproperClass2OMP) + +#else + +#ifndef LMP_IMPROPER_CLASS2_OMP_H +#define LMP_IMPROPER_CLASS2_OMP_H + +#include "improper_class2.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperClass2OMP : public ImproperClass2, public ThrOMP { + + public: + ImproperClass2OMP(class LAMMPS *lmp) : + ImproperClass2(lmp), ThrOMP(lmp,THR_IMPROPER) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + template + void angleangle_thr(int, int, ThrData * const thr); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_cvff_omp.cpp b/src/USER-OMP/improper_cvff_omp.cpp new file mode 100644 index 0000000000..06f3186e06 --- /dev/null +++ b/src/USER-OMP/improper_cvff_omp.cpp @@ -0,0 +1,295 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "improper_cvff_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void ImproperCvffOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperCvffOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,m,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; + double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double c2mag,sc1,sc2,s1,s2,s12,c,p,pd,rc2,a,a11,a22; + double a33,a12,a13,a23,sx2,sy2,sz2; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // 1st bond + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb2xm = -vb2x; + vb2ym = -vb2y; + vb2zm = -vb2z; + domain->minimum_image(vb2xm,vb2ym,vb2zm); + + // 3rd bond + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c0 calculation + + sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + rb1 = sqrt(sb1); + rb3 = sqrt(sb3); + + c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; + + // 1st and 2nd angle + + b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z; + b1mag = sqrt(b1mag2); + b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z; + b2mag = sqrt(b2mag2); + b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z; + b3mag = sqrt(b3mag2); + + ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z; + r12c1 = 1.0 / (b1mag*b2mag); + c1mag = ctmp * r12c1; + + ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; + r12c2 = 1.0 / (b2mag*b3mag); + c2mag = ctmp * r12c2; + + // cos and sin of 2 angles and final c + + sc1 = sqrt(1.0 - c1mag*c1mag); + if (sc1 < SMALL) sc1 = SMALL; + sc1 = 1.0/sc1; + + sc2 = sqrt(1.0 - c2mag*c2mag); + if (sc2 < SMALL) sc2 = SMALL; + sc2 = 1.0/sc2; + + s1 = sc1 * sc1; + s2 = sc2 * sc2; + s12 = sc1 * sc2; + c = (c0 + c1mag*c2mag) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + // force & energy + // p = 1 + cos(n*phi) for d = 1 + // p = 1 - cos(n*phi) for d = -1 + // pd = dp/dc / 2 + + m = multiplicity[type]; + + if (m == 2) { + p = 2.0*c*c; + pd = 2.0*c; + } else if (m == 3) { + rc2 = c*c; + p = (4.0*rc2-3.0)*c + 1.0; + pd = 6.0*rc2 - 1.5; + } else if (m == 4) { + rc2 = c*c; + p = 8.0*(rc2-1)*rc2 + 2.0; + pd = (16.0*rc2-8.0)*c; + } else if (m == 6) { + rc2 = c*c; + p = ((32.0*rc2-48.0)*rc2 + 18.0)*rc2; + pd = (96.0*(rc2-1.0)*rc2 + 18.0)*c; + } else if (m == 1) { + p = c + 1.0; + pd = 0.5; + } else if (m == 5) { + rc2 = c*c; + p = ((16.0*rc2-20.0)*rc2 + 5.0)*c + 1.0; + pd = (40.0*rc2-30.0)*rc2 + 2.5; + } else if (m == 0) { + p = 2.0; + pd = 0.0; + } + + if (sign[type] == -1) { + p = 2.0 - p; + pd = -pd; + } + + if (EFLAG) eimproper = k[type]*p; + + a = 2.0 * k[type] * pd; + c = c * a; + s12 = s12 * a; + a11 = c*sb1*s1; + a22 = -sb2*(2.0*c0*s12 - c*(s1+s2)); + a33 = c*sb3*s2; + a12 = -r12c1*(c1mag*c*s1 + c2mag*s12); + a13 = -rb1*rb3*s12; + a23 = r12c2*(c2mag*c*s2 + c1mag*s12); + + sx2 = a12*vb1x + a22*vb2x + a23*vb3x; + sy2 = a12*vb1y + a22*vb2y + a23*vb3y; + sz2 = a12*vb1z + a22*vb2z + a23*vb3z; + + f1[0] = a11*vb1x + a12*vb2x + a13*vb3x; + f1[1] = a11*vb1y + a12*vb2y + a13*vb3y; + f1[2] = a11*vb1z + a12*vb2z + a13*vb3z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a13*vb1x + a23*vb2x + a33*vb3x; + f4[1] = a13*vb1y + a23*vb2y + a33*vb3y; + f4[2] = a13*vb1z + a23*vb2z + a33*vb3z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_cvff_omp.h b/src/USER-OMP/improper_cvff_omp.h new file mode 100644 index 0000000000..f058b10eed --- /dev/null +++ b/src/USER-OMP/improper_cvff_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(cvff/omp,ImproperCvffOMP) + +#else + +#ifndef LMP_IMPROPER_CVFF_OMP_H +#define LMP_IMPROPER_CVFF_OMP_H + +#include "improper_cvff.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperCvffOMP : public ImproperCvff, public ThrOMP { + + public: + ImproperCvffOMP(class LAMMPS *lmp) : + ImproperCvff(lmp), ThrOMP(lmp,THR_IMPROPER) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_harmonic_omp.cpp b/src/USER-OMP/improper_harmonic_omp.cpp new file mode 100644 index 0000000000..44e45a977d --- /dev/null +++ b/src/USER-OMP/improper_harmonic_omp.cpp @@ -0,0 +1,236 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "improper_harmonic_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void ImproperHarmonicOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperHarmonicOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double ss1,ss2,ss3,r1,r2,r3,c0,c1,c2,s1,s2; + double s12,c,s,domega,a,a11,a22,a33,a12,a13,a23; + double sx2,sy2,sz2; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // geometry of 4-body + + vb1x = x[i1][0] - x[i2][0]; + vb1y = x[i1][1] - x[i2][1]; + vb1z = x[i1][2] - x[i2][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + vb2x = x[i3][0] - x[i2][0]; + vb2y = x[i3][1] - x[i2][1]; + vb2z = x[i3][2] - x[i2][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + vb3x = x[i4][0] - x[i3][0]; + vb3y = x[i4][1] - x[i3][1]; + vb3z = x[i4][2] - x[i3][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + ss1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z); + ss2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z); + ss3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z); + + r1 = sqrt(ss1); + r2 = sqrt(ss2); + r3 = sqrt(ss3); + + // sin and cos of angle + + c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * r1 * r3; + c1 = (vb1x * vb2x + vb1y * vb2y + vb1z * vb2z) * r1 * r2; + c2 = -(vb3x * vb2x + vb3y * vb2y + vb3z * vb2z) * r3 * r2; + + s1 = 1.0 - c1*c1; + if (s1 < SMALL) s1 = SMALL; + s1 = 1.0 / s1; + + s2 = 1.0 - c2*c2; + if (s2 < SMALL) s2 = SMALL; + s2 = 1.0 / s2; + + s12 = sqrt(s1*s2); + c = (c1*c2 + c0) * s12; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) c = 1.0; + if (c < -1.0) c = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + + // force & energy + + domega = acos(c) - chi[type]; + a = k[type] * domega; + + if (EFLAG) eimproper = a*domega; + + a = -a * 2.0/s; + c = c * a; + s12 = s12 * a; + a11 = c*ss1*s1; + a22 = -ss2 * (2.0*c0*s12 - c*(s1+s2)); + a33 = c*ss3*s2; + a12 = -r1*r2*(c1*c*s1 + c2*s12); + a13 = -r1*r3*s12; + a23 = r2*r3*(c2*c*s2 + c1*s12); + + sx2 = a22*vb2x + a23*vb3x + a12*vb1x; + sy2 = a22*vb2y + a23*vb3y + a12*vb1y; + sz2 = a22*vb2z + a23*vb3z + a12*vb1z; + + f1[0] = a12*vb2x + a13*vb3x + a11*vb1x; + f1[1] = a12*vb2y + a13*vb3y + a11*vb1y; + f1[2] = a12*vb2z + a13*vb3z + a11*vb1z; + + f2[0] = -sx2 - f1[0]; + f2[1] = -sy2 - f1[1]; + f2[2] = -sz2 - f1[2]; + + f4[0] = a23*vb2x + a33*vb3x + a13*vb1x; + f4[1] = a23*vb2y + a33*vb3y + a13*vb1y; + f4[2] = a23*vb2z + a33*vb3z + a13*vb1z; + + f3[0] = sx2 - f4[0]; + f3[1] = sy2 - f4[1]; + f3[2] = sz2 - f4[2]; + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]; + f[i1][1] += f1[1]; + f[i1][2] += f1[2]; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f2[0]; + f[i2][1] += f2[1]; + f[i2][2] += f2[2]; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f3[0]; + f[i3][1] += f3[1]; + f[i3][2] += f3[2]; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]; + f[i4][1] += f4[1]; + f[i4][2] += f4[2]; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_harmonic_omp.h b/src/USER-OMP/improper_harmonic_omp.h new file mode 100644 index 0000000000..a2a6a0a033 --- /dev/null +++ b/src/USER-OMP/improper_harmonic_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(harmonic/omp,ImproperHarmonicOMP) + +#else + +#ifndef LMP_IMPROPER_HARMONIC_OMP_H +#define LMP_IMPROPER_HARMONIC_OMP_H + +#include "improper_harmonic.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperHarmonicOMP : public ImproperHarmonic, public ThrOMP { + + public: + ImproperHarmonicOMP(class LAMMPS *lmp) : + ImproperHarmonic(lmp), ThrOMP(lmp,THR_IMPROPER) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/improper_umbrella_omp.cpp b/src/USER-OMP/improper_umbrella_omp.cpp new file mode 100644 index 0000000000..1fb1d86be9 --- /dev/null +++ b/src/USER-OMP/improper_umbrella_omp.cpp @@ -0,0 +1,252 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "mpi.h" +#include "math.h" +#include "improper_umbrella_omp.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "domain.h" +#include "force.h" +#include "update.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define TOLERANCE 0.05 +#define SMALL 0.001 + +/* ---------------------------------------------------------------------- */ + +void ImproperUmbrellaOMP::compute(int eflag, int vflag) +{ + + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = neighbor->nimproperlist; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void ImproperUmbrellaOMP::eval(int nfrom, int nto, ThrData * const thr) +{ + int i1,i2,i3,i4,n,type; + double eimproper,f1[3],f2[3],f3[3],f4[3]; + double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z; + double domega,c,a,s,projhfg,dhax,dhay,dhaz,dahx,dahy,dahz,cotphi; + double ax,ay,az,ra2,rh2,ra,rh,rar,rhr,arx,ary,arz,hrx,hry,hrz; + + eimproper = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const * const improperlist = neighbor->improperlist; + const int nlocal = atom->nlocal; + + for (n = nfrom; n < nto; n++) { + i1 = improperlist[n][0]; + i2 = improperlist[n][1]; + i3 = improperlist[n][2]; + i4 = improperlist[n][3]; + type = improperlist[n][4]; + + // 1st bond + + vb1x = x[i2][0] - x[i1][0]; + vb1y = x[i2][1] - x[i1][1]; + vb1z = x[i2][2] - x[i1][2]; + domain->minimum_image(vb1x,vb1y,vb1z); + + // 2nd bond + + vb2x = x[i3][0] - x[i1][0]; + vb2y = x[i3][1] - x[i1][1]; + vb2z = x[i3][2] - x[i1][2]; + domain->minimum_image(vb2x,vb2y,vb2z); + + // 3rd bond + + vb3x = x[i4][0] - x[i1][0]; + vb3y = x[i4][1] - x[i1][1]; + vb3z = x[i4][2] - x[i1][2]; + domain->minimum_image(vb3x,vb3y,vb3z); + + // c0 calculation + // A = vb1 X vb2 is perpendicular to IJK plane + + ax = vb1y*vb2z-vb1z*vb2y; + ay = vb1z*vb2x-vb1x*vb2z; + az = vb1x*vb2y-vb1y*vb2x; + ra2 = ax*ax+ay*ay+az*az; + rh2 = vb3x*vb3x+vb3y*vb3y+vb3z*vb3z; + ra = sqrt(ra2); + rh = sqrt(rh2); + if (ra < SMALL) ra = SMALL; + if (rh < SMALL) rh = SMALL; + + rar = 1/ra; + rhr = 1/rh; + arx = ax*rar; + ary = ay*rar; + arz = az*rar; + hrx = vb3x*rhr; + hry = vb3y*rhr; + hrz = vb3z*rhr; + + c = arx*hrx+ary*hry+arz*hrz; + + // error check + + if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) { + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Improper problem: %d/%d " BIGINT_FORMAT " %d %d %d %d", + me,thr->get_tid(),update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1][0],x[i1][1],x[i1][2]); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2][0],x[i2][1],x[i2][2]); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3][0],x[i3][1],x[i3][2]); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4][0],x[i4][1],x[i4][2]); + } + } + + if (c > 1.0) s = 1.0; + if (c < -1.0) s = -1.0; + + s = sqrt(1.0 - c*c); + if (s < SMALL) s = SMALL; + cotphi = c/s; + + projhfg = (vb3x*vb1x+vb3y*vb1y+vb3z*vb1z) / + sqrt(vb1x*vb1x+vb1y*vb1y+vb1z*vb1z); + projhfg += (vb3x*vb2x+vb3y*vb2y+vb3z*vb2z) / + sqrt(vb2x*vb2x+vb2y*vb2y+vb2z*vb2z); + if (projhfg > 0.0) { + s *= -1.0; + cotphi *= -1.0; + } + + // force and energy + // if w0 = 0: E = k * (1 - cos w) + // if w0 != 0: E = 0.5 * C (cos w - cos w0)^2, C = k/(sin(w0)^2 + + if (w0[type] == 0.0) { + if (EFLAG) eimproper = kw[type] * (1.0-s); + a = -kw[type]; + } else { + domega = s - cos(w0[type]); + a = 0.5 * C[type] * domega; + if (EFLAG) eimproper = a * domega; + a *= 2.0; + } + + // dhax = diffrence between H and A in X direction, etc + + a = a*cotphi; + dhax = hrx-c*arx; + dhay = hry-c*ary; + dhaz = hrz-c*arz; + + dahx = arx-c*hrx; + dahy = ary-c*hry; + dahz = arz-c*hrz; + + f2[0] = (dhay*vb1z - dhaz*vb1y)*rar; + f2[1] = (dhaz*vb1x - dhax*vb1z)*rar; + f2[2] = (dhax*vb1y - dhay*vb1x)*rar; + + f3[0] = (-dhay*vb2z + dhaz*vb2y)*rar; + f3[1] = (-dhaz*vb2x + dhax*vb2z)*rar; + f3[2] = (-dhax*vb2y + dhay*vb2x)*rar; + + f4[0] = dahx*rhr; + f4[1] = dahy*rhr; + f4[2] = dahz*rhr; + + f1[0] = -(f2[0] + f3[0] + f4[0]); + f1[1] = -(f2[1] + f3[1] + f4[1]); + f1[2] = -(f2[2] + f3[2] + f4[2]); + + // apply force to each of 4 atoms + + if (NEWTON_BOND || i1 < nlocal) { + f[i1][0] += f1[0]*a; + f[i1][1] += f1[1]*a; + f[i1][2] += f1[2]*a; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2][0] += f3[0]*a; + f[i2][1] += f3[1]*a; + f[i2][2] += f3[2]*a; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3][0] += f2[0]*a; + f[i3][1] += f2[1]*a; + f[i3][2] += f2[2]*a; + } + + if (NEWTON_BOND || i4 < nlocal) { + f[i4][0] += f4[0]*a; + f[i4][1] += f4[1]*a; + f[i4][2] += f4[2]*a; + } + + if (EVFLAG) + ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,eimproper,f1,f3,f4, + vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,thr); + } +} diff --git a/src/USER-OMP/improper_umbrella_omp.h b/src/USER-OMP/improper_umbrella_omp.h new file mode 100644 index 0000000000..757089212b --- /dev/null +++ b/src/USER-OMP/improper_umbrella_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef IMPROPER_CLASS + +ImproperStyle(umbrella/omp,ImproperUmbrellaOMP) + +#else + +#ifndef LMP_IMPROPER_UMBRELLA_OMP_H +#define LMP_IMPROPER_UMBRELLA_OMP_H + +#include "improper_umbrella.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class ImproperUmbrellaOMP : public ImproperUmbrella, public ThrOMP { + + public: + ImproperUmbrellaOMP(class LAMMPS *lmp) : + ImproperUmbrella(lmp), ThrOMP(lmp,THR_IMPROPER) {}; + + virtual void compute(int, int); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp new file mode 100644 index 0000000000..bb8b38a5b1 --- /dev/null +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -0,0 +1,2768 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_airebo_omp.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#if defined(_OPENMP) +#include +#endif + +using namespace LAMMPS_NS; + +#define TOL 1.0e-9 + +/* ---------------------------------------------------------------------- */ + +PairAIREBOOMP::PairAIREBOOMP(LAMMPS *lmp) : + PairAIREBO(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairAIREBOOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + REBO_neigh_thr(); + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + FREBO_thr(ifrom,ito,evflag,eflag,vflag_atom,thr); + if (ljflag) FLJ_thr(ifrom,ito,evflag,eflag,vflag_atom,thr); + if (torflag) TORSION_thr(ifrom,ito,evflag,eflag,thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- + Bij function +------------------------------------------------------------------------- */ + +double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, + double VA, int vflag_atom, ThrData * const thr) +{ + int atomi,atomj,k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4; + int itype,jtype,ktype,ltype,ntype; + double rik[3],rjl[3],rkn[3],rji[3],rki[3],rlj[3],rknmag,dNki,dwjl,bij; + double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl; + double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3; + double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ,Nki,Nlj,dS; + double lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC; + double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3]; + double dN2[2],dN3[3]; + double dcosjikdrj[3],dcosijldrj[3],dcosijldrl[3]; + double Tij; + double r32[3],r32mag,cos321,r43[3],r13[3]; + double dNlj; + double om1234,rln[3]; + double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag; + double w21,dw21,r34[3],r34mag,cos234,w34,dw34; + double cross321[3],cross234[3],prefactor,SpN; + double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc; + double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom; + double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2; + double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i; + double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij; + double F23[3],F12[3],F34[3],F31[3],F24[3],fi[3],fj[3],fk[3],fl[3]; + double f1[3],f2[3],f3[3],f4[4]; + double dcut321,PijS,PjiS; + double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp; + int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + + atomi = i; + atomj = j; + itype = map[type[i]]; + jtype = map[type[j]]; + wij = Sp(rijmag,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + NijC = nC[i]-(wij*kronecker(jtype,0)); + NijH = nH[i]-(wij*kronecker(jtype,1)); + NjiC = nC[j]-(wij*kronecker(itype,0)); + NjiH = nH[j]-(wij*kronecker(itype,1)); + bij = 0.0; + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + dgdc = 0.0; + dgdN = 0.0; + NconjtmpI = 0.0; + NconjtmpJ = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / + (rijmag*rikmag); + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + Etmp = Etmp+(wik*g*exp(lamdajik)); + tmp3 = tmp3+(wik*dgdN*exp(lamdajik)); + NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS)); + } + } + + PijS = 0.0; + dN2[0] = 0.0; + dN2[1] = 0.0; + PijS = PijSpline(NijC,NijH,itype,jtype,dN2); + pij = 1.0/sqrt(1.0+Etmp+PijS); + tmp = -0.5*pij*pij*pij; + + const double invrijm = 1.0/rijmag; + const double invrijm2 = invrijm*invrijm; + + // pij forces + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + + const double invrikm = 1.0/rikmag; + const double invrijkm = invrijm*invrikm; + const double invrikm2 = invrikm*invrikm; + + cosjik = (rij[0]*rik[0] + rij[1]*rik[1] + rij[2]*rik[2]) * invrijkm; + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) - + (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2))); + dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) - + (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2))); + dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) - + (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2))); + dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2)); + dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2)); + dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2)); + dcosjikdrj[0] = (-rik[0]*invrijkm) + (cosjik*(rij[0]*invrijm2)); + dcosjikdrj[1] = (-rik[1]*invrijkm) + (cosjik*(rij[1]*invrijm2)); + dcosjikdrj[2] = (-rik[2]*invrijkm) + (cosjik*(rij[2]*invrijm2)); + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik)); + fj[0] = -tmp2*dcosjikdrj[0]; + fj[1] = -tmp2*dcosjikdrj[1]; + fj[2] = -tmp2*dcosjikdrj[2]; + fi[0] = -tmp2*dcosjikdri[0]; + fi[1] = -tmp2*dcosjikdri[1]; + fi[2] = -tmp2*dcosjikdri[2]; + fk[0] = -tmp2*dcosjikdrk[0]; + fk[1] = -tmp2*dcosjikdrk[1]; + fk[2] = -tmp2*dcosjikdrk[2]; + + tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1)); + fj[0] -= tmp2*(-rij[0]*invrijm); + fj[1] -= tmp2*(-rij[1]*invrijm); + fj[2] -= tmp2*(-rij[2]*invrijm); + fi[0] -= tmp2*((-rik[0]*invrikm)+(rij[0]*invrijm)); + fi[1] -= tmp2*((-rik[1]*invrikm)+(rij[1]*invrijm)); + fi[2] -= tmp2*((-rik[2]*invrikm)+(rij[2]*invrijm)); + fk[0] -= tmp2*(rik[0]*invrikm); + fk[1] -= tmp2*(rik[1]*invrikm); + fk[2] -= tmp2*(rik[2]*invrikm); + + // coordination forces + + // dwik forces + + tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))*invrikm; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)*invrikm; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // dgdN forces + + tmp2 = VA*.5*(tmp*tmp3*dwik)*invrikm; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2]; + + if (vflag_atom) { + rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2]; + rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2]; + v_tally3_thr(atomi,atomj,atomk,fj,fk,rji,rki,thr); + } + } + } + + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0)) + + nH[atoml]-(wjl*kronecker(jtype,1)); + cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / + (rijmag*rjlmag); + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + Etmp = Etmp+(wjl*g*exp(lamdaijl)); + tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl)); + NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS)); + } + } + + PjiS = 0.0; + dN2[0] = 0.0; + dN2[1] = 0.0; + PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2); + pji = 1.0/sqrt(1.0+Etmp+PjiS); + tmp = -0.5*pji*pji*pji; + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + + const double invrjlm = 1.0/rjlmag; + const double invrijlm = invrijm*invrjlm; + const double invrjlm2 = invrjlm*invrjlm; + + cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) + * invrijlm; + + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + dcosijldri[0] = (-rjl[0]*invrijlm) - (cosijl*rij[0]*invrijm2); + dcosijldri[1] = (-rjl[1]*invrijlm) - (cosijl*rij[1]*invrijm2); + dcosijldri[2] = (-rjl[2]*invrijlm) - (cosijl*rij[2]*invrijm2); + dcosijldrj[0] = ((-rij[0]+rjl[0])*invrijlm) + + (cosijl*((rij[0]*invrijm2)-(rjl[0]*invrjlm2))); + dcosijldrj[1] = ((-rij[1]+rjl[1])*invrijlm) + + (cosijl*((rij[1]*invrijm2)-(rjl[1]*invrjlm2))); + dcosijldrj[2] = ((-rij[2]+rjl[2])*invrijlm) + + (cosijl*((rij[2]*invrijm2)-(rjl[2]*invrjlm2))); + dcosijldrl[0] = (rij[0]*invrijlm)+(cosijl*rjl[0]*invrjlm2); + dcosijldrl[1] = (rij[1]*invrijlm)+(cosijl*rjl[1]*invrjlm2); + dcosijldrl[2] = (rij[2]*invrijlm)+(cosijl*rjl[2]*invrjlm2); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl)); + fi[0] = -tmp2*dcosijldri[0]; + fi[1] = -tmp2*dcosijldri[1]; + fi[2] = -tmp2*dcosijldri[2]; + fj[0] = -tmp2*dcosijldrj[0]; + fj[1] = -tmp2*dcosijldrj[1]; + fj[2] = -tmp2*dcosijldrj[2]; + fl[0] = -tmp2*dcosijldrl[0]; + fl[1] = -tmp2*dcosijldrl[1]; + fl[2] = -tmp2*dcosijldrl[2]; + + tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1)); + fi[0] -= tmp2*(rij[0]*invrijm); + fi[1] -= tmp2*(rij[1]*invrijm); + fi[2] -= tmp2*(rij[2]*invrijm); + fj[0] -= tmp2*((-rjl[0]*invrjlm)-(rij[0]*invrijm)); + fj[1] -= tmp2*((-rjl[1]*invrjlm)-(rij[1]*invrijm)); + fj[2] -= tmp2*((-rjl[2]*invrjlm)-(rij[2]*invrijm)); + fl[0] -= tmp2*(rjl[0]*invrjlm); + fl[1] -= tmp2*(rjl[1]*invrjlm); + fl[2] -= tmp2*(rjl[2]*invrjlm); + + // coordination forces + + // dwik forces + + tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // dgdN forces + + tmp2 = VA*.5*(tmp*tmp3*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2]; + + if (vflag_atom) { + rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2]; + v_tally3_thr(atomi,atomj,atoml,fi,fl,rij,rlj,thr); + } + } + } + + // evaluate Nij conj + + Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ); + piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3); + + // piRC forces + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + if (atomk !=atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) > TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + if (atomn != atomi) { + ntype = map[type[atomn]]; + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // piRC forces + + REBO_neighs = REBO_firstneigh[atomj]; + for (l = 0; l < REBO_numneigh[atomj]; l++) { + atoml = REBO_neighs[l]; + if (atoml !=atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + if (atomn != atomj) { + ntype = map[type[atomn]]; + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + + Tij = 0.0; + dN3[0] = 0.0; + dN3[1] = 0.0; + dN3[2] = 0.0; + if (itype == 0 && jtype == 0) + Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3); + Etmp = 0.0; + + if (fabs(Tij) > TOL) { + atom2 = atomi; + atom3 = atomj; + r32[0] = x[atom3][0]-x[atom2][0]; + r32[1] = x[atom3][1]-x[atom2][1]; + r32[2] = x[atom3][2]-x[atom2][2]; + r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2])); + r23[0] = -r32[0]; + r23[1] = -r32[1]; + r23[2] = -r32[2]; + r23mag = r32mag; + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + atom1 = atomk; + ktype = map[type[atomk]]; + if (atomk != atomj) { + r21[0] = x[atom2][0]-x[atom1][0]; + r21[1] = x[atom2][1]-x[atom1][1]; + r21[2] = x[atom2][2]-x[atom1][2]; + r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]); + cos321 = -1.0*((r21[0]*r32[0])+(r21[1]*r32[1])+(r21[2]*r32[2])) / + (r21mag*r32mag); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + Sp2(cos321,thmin,thmax,dcut321); + sin321 = sqrt(1.0 - cos321*cos321); + sink2i = 1.0/(sin321*sin321); + rik2i = 1.0/(r21mag*r21mag); + if (sin321 != 0.0) { + rr = (r23mag*r23mag)-(r21mag*r21mag); + rjk[0] = r21[0]-r23[0]; + rjk[1] = r21[1]-r23[1]; + rjk[2] = r21[2]-r23[2]; + rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]); + rijrik = 2.0*r23mag*r21mag; + rik2 = r21mag*r21mag; + dctik = (-rr+rjk2)/(rijrik*rik2); + dctij = (rr+rjk2)/(rijrik*r23mag*r23mag); + dctjk = -2.0/rijrik; + w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21); + rijmag = r32mag; + rikmag = r21mag; + rij2 = r32mag*r32mag; + rik2 = r21mag*r21mag; + costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + dtsjik = -dtsjik; + + REBO_neighs_j = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs_j[l]; + atom4 = atoml; + ltype = map[type[atoml]]; + if (!(atoml == atomi || atoml == atomk)) { + r34[0] = x[atom3][0]-x[atom4][0]; + r34[1] = x[atom3][1]-x[atom4][1]; + r34[2] = x[atom3][2]-x[atom4][2]; + r34mag = sqrt((r34[0]*r34[0])+(r34[1]*r34[1])+(r34[2]*r34[2])); + cos234 = (r32[0]*r34[0] + r32[1]*r34[1] + r32[2]*r34[2]) / + (r32mag*r34mag); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + sin234 = sqrt(1.0 - cos234*cos234); + sinl2i = 1.0/(sin234*sin234); + rjl2i = 1.0/(r34mag*r34mag); + + if (sin234 != 0.0) { + w34 = Sp(r34mag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dw34); + rr = (r23mag*r23mag)-(r34mag*r34mag); + ril[0] = r23[0]+r34[0]; + ril[1] = r23[1]+r34[1]; + ril[2] = r23[2]+r34[2]; + ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]); + rijrjl = 2.0*r23mag*r34mag; + rjl2 = r34mag*r34mag; + dctjl = (-rr+ril2)/(rijrjl*rjl2); + dctji = (rr+ril2)/(rijrjl*r23mag*r23mag); + dctil = -2.0/rijrjl; + rjlmag = r34mag; + rjl2 = r34mag*r34mag; + costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + dtsijl = -dtsijl; + prefactor = VA*Tij; + + cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]); + cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]); + cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]); + cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]); + cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]); + cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]); + + cwnum = (cross321[0]*cross234[0]) + + (cross321[1]*cross234[1]) + (cross321[2]*cross234[2]); + cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234; + om1234 = cwnum/cwnom; + cw = om1234; + Etmp += ((1.0-(om1234*om1234))*w21*w34) * + (1.0-tspjik)*(1.0-tspijl); + + dt1dik = (rik2i)-(dctik*sink2i*cos321); + dt1djk = (-dctjk*sink2i*cos321); + dt1djl = (rjl2i)-(dctjl*sinl2i*cos234); + dt1dil = (-dctil*sinl2i*cos234); + dt1dij = (2.0/(r23mag*r23mag))-(dctij*sink2i*cos321) - + (dctji*sinl2i*cos234); + + dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]); + dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]); + dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]); + + dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]); + dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]); + dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]); + + dt2dij[0] = (r21[2]*cross234[1])-(r34[2]*cross321[1]) - + (r21[1]*cross234[2])+(r34[1]*cross321[2]); + dt2dij[1] = (r21[0]*cross234[2])-(r34[0]*cross321[2]) - + (r21[2]*cross234[0])+(r34[2]*cross321[0]); + dt2dij[2] = (r21[1]*cross234[0])-(r34[1]*cross321[0]) - + (r21[0]*cross234[1])+(r34[0]*cross321[1]); + + aa = (prefactor*2.0*cw/cwnom)*w21*w34 * + (1.0-tspjik)*(1.0-tspijl); + aaa1 = -prefactor*(1.0-(om1234*om1234)) * + (1.0-tspjik)*(1.0-tspijl); + aaa2 = aaa1*w21*w34; + at2 = aa*cwnum; + + fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) + + (aaa2*dtsijl*dctji*(1.0-tspjik)); + fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl)); + fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik)); + fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl)); + fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik)); + + F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]); + F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]); + F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]); + + F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]); + F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]); + F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]); + + F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]); + F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]); + F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]); + + F31[0] = (fcjkpc*rjk[0]); + F31[1] = (fcjkpc*rjk[1]); + F31[2] = (fcjkpc*rjk[2]); + + F24[0] = (fcilpc*ril[0]); + F24[1] = (fcilpc*ril[1]); + F24[2] = (fcilpc*ril[2]); + + f1[0] = -F12[0]-F31[0]; + f1[1] = -F12[1]-F31[1]; + f1[2] = -F12[2]-F31[2]; + f2[0] = F23[0]+F12[0]+F24[0]; + f2[1] = F23[1]+F12[1]+F24[1]; + f2[2] = F23[2]+F12[2]+F24[2]; + f3[0] = -F23[0]+F34[0]+F31[0]; + f3[1] = -F23[1]+F34[1]+F31[1]; + f3[2] = -F23[2]+F34[2]+F31[2]; + f4[0] = -F34[0]-F24[0]; + f4[1] = -F34[1]-F24[1]; + f4[2] = -F34[2]-F24[2]; + + // coordination forces + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag; + f2[0] -= tmp2*r21[0]; + f2[1] -= tmp2*r21[1]; + f2[2] -= tmp2*r21[2]; + f1[0] += tmp2*r21[0]; + f1[1] += tmp2*r21[1]; + f1[2] += tmp2*r21[2]; + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag; + f3[0] -= tmp2*r34[0]; + f3[1] -= tmp2*r34[1]; + f3[2] -= tmp2*r34[2]; + f4[0] += tmp2*r34[0]; + f4[1] += tmp2*r34[1]; + f4[2] += tmp2*r34[2]; + + f[atom1][0] += f1[0]; f[atom1][1] += f1[1]; + f[atom1][2] += f1[2]; + f[atom2][0] += f2[0]; f[atom2][1] += f2[1]; + f[atom2][2] += f2[2]; + f[atom3][0] += f3[0]; f[atom3][1] += f3[1]; + f[atom3][2] += f3[2]; + f[atom4][0] += f4[0]; f[atom4][1] += f4[1]; + f[atom4][2] += f4[2]; + + if (vflag_atom) { + r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2]; + r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2]; + v_tally4_thr(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43,thr); + } + } + } + } + } + } + } + + // Tij forces now that we have Etmp + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) > TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + ntype = map[type[atomn]]; + if (atomn != atomi) { + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // Tij forces + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + ntype = map[type[atomn]]; + if (atomn !=atomj) { + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + } + + bij = (0.5*(pij+pji))+piRC+(Tij*Etmp); + return bij; +} + +/* ---------------------------------------------------------------------- + Bij* function +------------------------------------------------------------------------- */ + +double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag, + double VA, double rij0[3], double rij0mag, + int vflag_atom, ThrData * const thr) +{ + int k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4; + int atomi,atomj,itype,jtype,ktype,ltype,ntype; + double rik[3], rjl[3], rkn[3],rknmag,dNki; + double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl; + double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3; + double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ; + double Nki,Nlj,dS,lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC; + double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3]; + double dN2[2],dN3[3]; + double dcosijldrj[3],dcosijldrl[3],dcosjikdrj[3],dwjl; + double Tij,crosskij[3],crosskijmag; + double crossijl[3],crossijlmag,omkijl; + double tmppij,tmppji,dN2PIJ[2],dN2PJI[2],dN3piRC[3],dN3Tij[3]; + double bij,tmp3pij,tmp3pji,Stb,dStb; + double r32[3],r32mag,cos321; + double om1234,rln[3]; + double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag; + double w21,dw21,r34[3],r34mag,cos234,w34,dw34; + double cross321[3],cross234[3],prefactor,SpN; + double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc; + double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom; + double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2; + double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i; + double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij; + double dNlj; + double PijS,PjiS; + double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp; + int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l; + double F12[3],F23[3],F34[3],F31[3],F24[3]; + double fi[3],fj[3],fk[3],fl[3],f1[3],f2[3],f3[3],f4[4]; + double rji[3],rki[3],rlj[3],r13[3],r43[3]; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + + atomi = i; + atomj = j; + itype = map[type[atomi]]; + jtype = map[type[atomj]]; + wij = Sp(rij0mag,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + NijC = nC[atomi]-(wij*kronecker(jtype,0)); + NijH = nH[atomi]-(wij*kronecker(jtype,1)); + NjiC = nC[atomj]-(wij*kronecker(itype,0)); + NjiH = nH[atomj]-(wij*kronecker(itype,1)); + + bij = 0.0; + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + dgdc = 0.0; + dgdN = 0.0; + NconjtmpI = 0.0; + NconjtmpJ = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS); + Nki = nC[atomk]-(wik*kronecker(itype,0)) + + nH[atomk]-(wik*kronecker(itype,1)); + cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / + (rijmag*rikmag); + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + Etmp = Etmp+(wik*g*exp(lamdajik)); + tmp3 = tmp3+(wik*dgdN*exp(lamdajik)); + NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS)); + } + } + + PijS = 0.0; + dN2PIJ[0] = 0.0; + dN2PIJ[1] = 0.0; + PijS = PijSpline(NijC,NijH,itype,jtype,dN2PIJ); + pij = 1.0/sqrt(1.0+Etmp+PijS); + tmppij = -.5*pij*pij*pij; + tmp3pij = tmp3; + tmp = 0.0; + tmp2 = 0.0; + tmp3 = 0.0; + Etmp = 0.0; + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / + (rijmag*rjlmag); + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + Etmp = Etmp+(wjl*g*exp(lamdaijl)); + tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl)); + NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS)); + } + } + + PjiS = 0.0; + dN2PJI[0] = 0.0; + dN2PJI[1] = 0.0; + PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2PJI); + pji = 1.0/sqrt(1.0+Etmp+PjiS); + tmppji = -.5*pji*pji*pji; + tmp3pji = tmp3; + + // evaluate Nij conj + + Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ); + piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3piRC); + Tij = 0.0; + dN3Tij[0] = 0.0; + dN3Tij[1] = 0.0; + dN3Tij[2] = 0.0; + if (itype == 0 && jtype == 0) + Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3Tij); + + Etmp = 0.0; + if (fabs(Tij) > TOL) { + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + ktype = map[type[atomk]]; + if (atomk != atomj) { + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + cos321 = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / + (rijmag*rikmag); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + + rjk[0] = rik[0]-rij[0]; + rjk[1] = rik[1]-rij[1]; + rjk[2] = rik[2]-rij[2]; + rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]); + rij2 = rijmag*rijmag; + rik2 = rikmag*rikmag; + costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + + if (sqrt(1.0 - cos321*cos321) != 0.0) { + wik = Sp(rikmag,rcmin[itype][ktype],rcmaxp[itype][ktype],dwik); + REBO_neighs_j = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs_j[l]; + ltype = map[type[atoml]]; + if (!(atoml == atomi || atoml == atomk)) { + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt(rjl[0]*rjl[0] + rjl[1]*rjl[1] + rjl[2]*rjl[2]); + cos234 = -((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / + (rijmag*rjlmag); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + + ril[0] = rij[0]+rjl[0]; + ril[1] = rij[1]+rjl[1]; + ril[2] = rij[2]+rjl[2]; + ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]); + rijrjl = 2.0*rijmag*rjlmag; + rjl2 = rjlmag*rjlmag; + costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + + if (sqrt(1.0 - cos234*cos234) != 0.0) { + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dS); + crosskij[0] = (rij[1]*rik[2]-rij[2]*rik[1]); + crosskij[1] = (rij[2]*rik[0]-rij[0]*rik[2]); + crosskij[2] = (rij[0]*rik[1]-rij[1]*rik[0]); + crosskijmag = sqrt(crosskij[0]*crosskij[0] + + crosskij[1]*crosskij[1] + + crosskij[2]*crosskij[2]); + crossijl[0] = (rij[1]*rjl[2]-rij[2]*rjl[1]); + crossijl[1] = (rij[2]*rjl[0]-rij[0]*rjl[2]); + crossijl[2] = (rij[0]*rjl[1]-rij[1]*rjl[0]); + crossijlmag = sqrt(crossijl[0]*crossijl[0] + + crossijl[1]*crossijl[1] + + crossijl[2]*crossijl[2]); + omkijl = -1.0*(((crosskij[0]*crossijl[0]) + + (crosskij[1]*crossijl[1]) + + (crosskij[2]*crossijl[2])) / + (crosskijmag*crossijlmag)); + Etmp += ((1.0-(omkijl*omkijl))*wik*wjl) * + (1.0-tspjik)*(1.0-tspijl); + } + } + } + } + } + } + } + + bij = (.5*(pij+pji))+piRC+(Tij*Etmp); + Stb = Sp2(bij,bLJmin[itype][jtype],bLJmax[itype][jtype],dStb); + VA = VA*dStb; + + if (dStb != 0.0) { + tmp = tmppij; + dN2[0] = dN2PIJ[0]; + dN2[1] = dN2PIJ[1]; + tmp3 = tmp3pij; + + const double invrijm = 1.0/rijmag; + const double invrijm2 = invrijm*invrijm; + + // pij forces + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + if (atomk != atomj) { + lamdajik = 0.0; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt(rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2]); + lamdajik = 4.0*kronecker(itype,1) * + ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag)); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + + const double invrikm = 1.0/rikmag; + const double invrijkm = invrijm*invrikm; + const double invrikm2 = invrikm*invrikm; + + cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) + * invrijkm; + cosjik = MIN(cosjik,1.0); + cosjik = MAX(cosjik,-1.0); + + dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) - + (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2))); + dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) - + (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2))); + dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) - + (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2))); + dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2)); + dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2)); + dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2)); + dcosjikdrj[0] = (-rik[0]*invrijkm) + (cosjik*(rij[0]*invrijm2)); + dcosjikdrj[1] = (-rik[1]*invrijkm) + (cosjik*(rij[1]*invrijm2)); + dcosjikdrj[2] = (-rik[2]*invrijkm) + (cosjik*(rij[2]*invrijm2)); + + g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN); + + tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik)); + fj[0] = -tmp2*dcosjikdrj[0]; + fj[1] = -tmp2*dcosjikdrj[1]; + fj[2] = -tmp2*dcosjikdrj[2]; + fi[0] = -tmp2*dcosjikdri[0]; + fi[1] = -tmp2*dcosjikdri[1]; + fi[2] = -tmp2*dcosjikdri[2]; + fk[0] = -tmp2*dcosjikdrk[0]; + fk[1] = -tmp2*dcosjikdrk[1]; + fk[2] = -tmp2*dcosjikdrk[2]; + + tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1)); + fj[0] -= tmp2*(-rij[0]*invrijm); + fj[1] -= tmp2*(-rij[1]*invrijm); + fj[2] -= tmp2*(-rij[2]*invrijm); + fi[0] -= tmp2*((-rik[0]/rikmag)+(rij[0]*invrijm)); + fi[1] -= tmp2*((-rik[1]/rikmag)+(rij[1]*invrijm)); + fi[2] -= tmp2*((-rik[2]/rikmag)+(rij[2]*invrijm)); + fk[0] -= tmp2*(rik[0]/rikmag); + fk[1] -= tmp2*(rik[1]/rikmag); + fk[2] -= tmp2*(rik[2]/rikmag); + + // coordination forces + + // dwik forces + + tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))/rikmag; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)/rikmag; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + // dgdN forces + + tmp2 = VA*.5*(tmp*tmp3*dwik)/rikmag; + fi[0] -= tmp2*rik[0]; + fi[1] -= tmp2*rik[1]; + fi[2] -= tmp2*rik[2]; + fk[0] += tmp2*rik[0]; + fk[1] += tmp2*rik[1]; + fk[2] += tmp2*rik[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2]; + + if (vflag_atom) { + rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2]; + rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2]; + v_tally3_thr(atomi,atomj,atomk,fj,fk,rji,rki,thr); + } + } + } + + tmp = tmppji; + tmp3 = tmp3pji; + dN2[0] = dN2PJI[0]; + dN2[1] = dN2PJI[1]; + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml !=atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + lamdaijl = 4.0*kronecker(jtype,1) * + ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag)); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + + const double invrjlm = 1.0/rjlmag; + const double invrijlm = invrijm*invrjlm; + const double invrjlm2 = invrjlm*invrjlm; + + cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) * + invrijlm; + cosijl = MIN(cosijl,1.0); + cosijl = MAX(cosijl,-1.0); + + dcosijldri[0] = (-rjl[0]*invrijlm) - (cosijl*rij[0]*invrijm2); + dcosijldri[1] = (-rjl[1]*invrijlm) - (cosijl*rij[1]*invrijm2); + dcosijldri[2] = (-rjl[2]*invrijlm) - (cosijl*rij[2]*invrijm2); + dcosijldrj[0] = ((-rij[0]+rjl[0])*invrijlm) + + (cosijl*((rij[0]*invrijm2)-(rjl[0]*invrjlm2))); + dcosijldrj[1] = ((-rij[1]+rjl[1])*invrijlm) + + (cosijl*((rij[1]*invrijm2)-(rjl[1]*invrjlm2))); + dcosijldrj[2] = ((-rij[2]+rjl[2])*invrijlm) + + (cosijl*((rij[2]*invrijm2)-(rjl[2]*invrjlm2))); + dcosijldrl[0] = (rij[0]*invrijlm) + (cosijl*rjl[0]*invrjlm2); + dcosijldrl[1] = (rij[1]*invrijlm) + (cosijl*rjl[1]*invrjlm2); + dcosijldrl[2] = (rij[2]*invrijlm) + (cosijl*rjl[2]*invrjlm2); + + // evaluate splines g and derivatives dg + + g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN); + tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl)); + fi[0] = -tmp2*dcosijldri[0]; + fi[1] = -tmp2*dcosijldri[1]; + fi[2] = -tmp2*dcosijldri[2]; + fj[0] = -tmp2*dcosijldrj[0]; + fj[1] = -tmp2*dcosijldrj[1]; + fj[2] = -tmp2*dcosijldrj[2]; + fl[0] = -tmp2*dcosijldrl[0]; + fl[1] = -tmp2*dcosijldrl[1]; + fl[2] = -tmp2*dcosijldrl[2]; + + tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1)); + fi[0] -= tmp2*(rij[0]*invrijm); + fi[1] -= tmp2*(rij[1]*invrijm); + fi[2] -= tmp2*(rij[2]*invrijm); + fj[0] -= tmp2*((-rjl[0]*invrjlm)-(rij[0]*invrijm)); + fj[1] -= tmp2*((-rjl[1]*invrjlm)-(rij[1]*invrijm)); + fj[2] -= tmp2*((-rjl[2]*invrjlm)-(rij[2]*invrijm)); + fl[0] -= tmp2*(rjl[0]*invrjlm); + fl[1] -= tmp2*(rjl[1]*invrjlm); + fl[2] -= tmp2*(rjl[2]*invrjlm); + + // coordination forces + // dwik forces + + tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // PIJ forces + + tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + // dgdN forces + + tmp2=VA*.5*(tmp*tmp3*dwjl)*invrjlm; + fj[0] -= tmp2*rjl[0]; + fj[1] -= tmp2*rjl[1]; + fj[2] -= tmp2*rjl[2]; + fl[0] += tmp2*rjl[0]; + fl[1] += tmp2*rjl[1]; + fl[2] += tmp2*rjl[2]; + + f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2]; + f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2]; + + if (vflag_atom) { + rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2]; + v_tally3_thr(atomi,atomj,atoml,fi,fl,rij,rlj,thr); + } + } + } + + // piRC forces + + dN3[0] = dN3piRC[0]; + dN3[1] = dN3piRC[1]; + dN3[2] = dN3piRC[2]; + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) > TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + if (atomn != atomi) { + ntype = map[type[atomn]]; + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // piRC forces to J side + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + if (atomn != atomj) { + ntype = map[type[atomn]]; + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + + if (fabs(Tij) > TOL) { + dN3[0] = dN3Tij[0]; + dN3[1] = dN3Tij[1]; + dN3[2] = dN3Tij[2]; + atom2 = atomi; + atom3 = atomj; + r32[0] = x[atom3][0]-x[atom2][0]; + r32[1] = x[atom3][1]-x[atom2][1]; + r32[2] = x[atom3][2]-x[atom2][2]; + r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2])); + r23[0] = -r32[0]; + r23[1] = -r32[1]; + r23[2] = -r32[2]; + r23mag = r32mag; + + REBO_neighs_i = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs_i[k]; + atom1 = atomk; + ktype = map[type[atomk]]; + if (atomk != atomj) { + r21[0] = x[atom2][0]-x[atom1][0]; + r21[1] = x[atom2][1]-x[atom1][1]; + r21[2] = x[atom2][2]-x[atom1][2]; + r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]); + cos321 = ((r21[0]*rij[0])+(r21[1]*rij[1])+(r21[2]*rij[2])) / + (r21mag*rijmag); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + sin321 = sqrt(1.0 - cos321*cos321); + sink2i = 1.0/(sin321*sin321); + rik2i = 1.0/(r21mag*r21mag); + + if (sin321 != 0.0) { + rr = (rijmag*rijmag)-(r21mag*r21mag); + rjk[0] = r21[0]-rij[0]; + rjk[1] = r21[1]-rij[1]; + rjk[2] = r21[2]-rij[2]; + rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]); + rijrik = 2.0*rijmag*r21mag; + rik2 = r21mag*r21mag; + dctik = (-rr+rjk2)/(rijrik*rik2); + dctij = (rr+rjk2)/(rijrik*rijmag*rijmag); + dctjk = -2.0/rijrik; + w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21); + rikmag = r21mag; + rij2 = r32mag*r32mag; + rik2 = r21mag*r21mag; + costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + dtsjik = -dtsjik; + + REBO_neighs_j = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs_j[l]; + atom4 = atoml; + ltype = map[type[atoml]]; + if (!(atoml == atomi || atoml == atomk)) { + r34[0] = x[atom3][0]-x[atom4][0]; + r34[1] = x[atom3][1]-x[atom4][1]; + r34[2] = x[atom3][2]-x[atom4][2]; + r34mag = sqrt(r34[0]*r34[0] + r34[1]*r34[1] + r34[2]*r34[2]); + cos234 = -1.0*((rij[0]*r34[0])+(rij[1]*r34[1]) + + (rij[2]*r34[2]))/(rijmag*r34mag); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + sin234 = sqrt(1.0 - cos234*cos234); + sinl2i = 1.0/(sin234*sin234); + rjl2i = 1.0/(r34mag*r34mag); + + if (sin234 != 0.0) { + w34 = Sp(r34mag,rcmin[jtype][ltype], + rcmaxp[jtype][ltype],dw34); + rr = (r23mag*r23mag)-(r34mag*r34mag); + ril[0] = r23[0]+r34[0]; + ril[1] = r23[1]+r34[1]; + ril[2] = r23[2]+r34[2]; + ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]); + rijrjl = 2.0*r23mag*r34mag; + rjl2 = r34mag*r34mag; + dctjl = (-rr+ril2)/(rijrjl*rjl2); + dctji = (rr+ril2)/(rijrjl*r23mag*r23mag); + dctil = -2.0/rijrjl; + rjlmag = r34mag; + rjl2 = r34mag*r34mag; + costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + dtsijl = -dtsijl; //need minus sign + prefactor = VA*Tij; + + cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]); + cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]); + cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]); + cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]); + cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]); + cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]); + + cwnum = (cross321[0]*cross234[0]) + + (cross321[1]*cross234[1])+(cross321[2]*cross234[2]); + cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234; + om1234 = cwnum/cwnom; + cw = om1234; + Etmp += ((1.0-(om1234*om1234))*w21*w34) * + (1.0-tspjik)*(1.0-tspijl); + + dt1dik = (rik2i)-(dctik*sink2i*cos321); + dt1djk = (-dctjk*sink2i*cos321); + dt1djl = (rjl2i)-(dctjl*sinl2i*cos234); + dt1dil = (-dctil*sinl2i*cos234); + dt1dij = (2.0/(r23mag*r23mag)) - + (dctij*sink2i*cos321)-(dctji*sinl2i*cos234); + + dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]); + dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]); + dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]); + + dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]); + dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]); + dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]); + + dt2dij[0] = (r21[2]*cross234[1]) - + (r34[2]*cross321[1])-(r21[1]*cross234[2]) + + (r34[1]*cross321[2]); + dt2dij[1] = (r21[0]*cross234[2]) - + (r34[0]*cross321[2])-(r21[2]*cross234[0]) + + (r34[2]*cross321[0]); + dt2dij[2] = (r21[1]*cross234[0]) - + (r34[1]*cross321[0])-(r21[0]*cross234[1]) + + (r34[0]*cross321[1]); + + aa = (prefactor*2.0*cw/cwnom)*w21*w34 * + (1.0-tspjik)*(1.0-tspijl); + aaa1 = -prefactor*(1.0-(om1234*om1234)) * + (1.0-tspjik)*(1.0-tspijl); + aaa2 = aaa1*w21*w34; + at2 = aa*cwnum; + + fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) + + (aaa2*dtsijl*dctji*(1.0-tspjik)); + fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl)); + fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik)); + fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl)); + fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik)); + + F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]); + F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]); + F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]); + + F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]); + F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]); + F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]); + + F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]); + F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]); + F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]); + + F31[0] = (fcjkpc*rjk[0]); + F31[1] = (fcjkpc*rjk[1]); + F31[2] = (fcjkpc*rjk[2]); + + F24[0] = (fcilpc*ril[0]); + F24[1] = (fcilpc*ril[1]); + F24[2] = (fcilpc*ril[2]); + + f1[0] = -F12[0]-F31[0]; + f1[1] = -F12[1]-F31[1]; + f1[2] = -F12[2]-F31[2]; + f2[0] = F23[0]+F12[0]+F24[0]; + f2[1] = F23[1]+F12[1]+F24[1]; + f2[2] = F23[2]+F12[2]+F24[2]; + f3[0] = -F23[0]+F34[0]+F31[0]; + f3[1] = -F23[1]+F34[1]+F31[1]; + f3[2] = -F23[2]+F34[2]+F31[2]; + f4[0] = -F34[0]-F24[0]; + f4[1] = -F34[1]-F24[1]; + f4[2] = -F34[2]-F24[2]; + + // coordination forces + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag; + f2[0] -= tmp2*r21[0]; + f2[1] -= tmp2*r21[1]; + f2[2] -= tmp2*r21[2]; + f1[0] += tmp2*r21[0]; + f1[1] += tmp2*r21[1]; + f1[2] += tmp2*r21[2]; + + tmp2 = VA*Tij*((1.0-(om1234*om1234))) * + (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag; + f3[0] -= tmp2*r34[0]; + f3[1] -= tmp2*r34[1]; + f3[2] -= tmp2*r34[2]; + f4[0] += tmp2*r34[0]; + f4[1] += tmp2*r34[1]; + f4[2] += tmp2*r34[2]; + + f[atom1][0] += f1[0]; f[atom1][1] += f1[1]; + f[atom1][2] += f1[2]; + f[atom2][0] += f2[0]; f[atom2][1] += f2[1]; + f[atom2][2] += f2[2]; + f[atom3][0] += f3[0]; f[atom3][1] += f3[1]; + f[atom3][2] += f3[2]; + f[atom4][0] += f4[0]; f[atom4][1] += f4[1]; + f[atom4][2] += f4[2]; + + if (vflag_atom) { + r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2]; + r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2]; + v_tally4_thr(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43,thr); + } + } + } + } + } + } + } + + REBO_neighs = REBO_firstneigh[i]; + for (k = 0; k < REBO_numneigh[i]; k++) { + atomk = REBO_neighs[k]; + if (atomk != atomj) { + ktype = map[type[atomk]]; + rik[0] = x[atomi][0]-x[atomk][0]; + rik[1] = x[atomi][1]-x[atomk][1]; + rik[2] = x[atomi][2]-x[atomk][2]; + rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2])); + wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] - + (wik*kronecker(itype,1)); + SpN = Sp(Nki,Nmin,Nmax,dNki); + + tmp2 = VA*dN3[0]*dwik*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; + f[atomi][0] -= tmp2*rik[0]; + f[atomi][1] -= tmp2*rik[1]; + f[atomi][2] -= tmp2*rik[2]; + f[atomk][0] += tmp2*rik[0]; + f[atomk][1] += tmp2*rik[1]; + f[atomk][2] += tmp2*rik[2]; + + if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + + if (fabs(dNki) >TOL) { + REBO_neighs_k = REBO_firstneigh[atomk]; + for (n = 0; n < REBO_numneigh[atomk]; n++) { + atomn = REBO_neighs_k[n]; + ntype = map[type[atomn]]; + if (atomn !=atomi) { + rkn[0] = x[atomk][0]-x[atomn][0]; + rkn[1] = x[atomk][1]-x[atomn][1]; + rkn[2] = x[atomk][2]-x[atomn][2]; + rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2])); + Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag; + f[atomk][0] -= tmp2*rkn[0]; + f[atomk][1] -= tmp2*rkn[1]; + f[atomk][2] -= tmp2*rkn[2]; + f[atomn][0] += tmp2*rkn[0]; + f[atomn][1] += tmp2*rkn[1]; + f[atomn][2] += tmp2*rkn[2]; + + if (vflag_atom) v_tally2_thr(atomk,atomn,-tmp2,rkn,thr); + } + } + } + } + } + + // Tij forces + + REBO_neighs = REBO_firstneigh[j]; + for (l = 0; l < REBO_numneigh[j]; l++) { + atoml = REBO_neighs[l]; + if (atoml != atomi) { + ltype = map[type[atoml]]; + rjl[0] = x[atomj][0]-x[atoml][0]; + rjl[1] = x[atomj][1]-x[atoml][1]; + rjl[2] = x[atomj][2]-x[atoml][2]; + rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2])); + wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl); + Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] - + (wjl*kronecker(jtype,1)); + SpN = Sp(Nlj,Nmin,Nmax,dNlj); + + tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; + f[atomj][0] -= tmp2*rjl[0]; + f[atomj][1] -= tmp2*rjl[1]; + f[atomj][2] -= tmp2*rjl[2]; + f[atoml][0] += tmp2*rjl[0]; + f[atoml][1] += tmp2*rjl[1]; + f[atoml][2] += tmp2*rjl[2]; + + if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + + if (fabs(dNlj) > TOL) { + REBO_neighs_l = REBO_firstneigh[atoml]; + for (n = 0; n < REBO_numneigh[atoml]; n++) { + atomn = REBO_neighs_l[n]; + ntype = map[type[atomn]]; + if (atomn != atomj) { + rln[0] = x[atoml][0]-x[atomn][0]; + rln[1] = x[atoml][1]-x[atomn][1]; + rln[2] = x[atoml][2]-x[atomn][2]; + rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2])); + Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln); + + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag; + f[atoml][0] -= tmp2*rln[0]; + f[atoml][1] -= tmp2*rln[1]; + f[atoml][2] -= tmp2*rln[2]; + f[atomn][0] += tmp2*rln[0]; + f[atomn][1] += tmp2*rln[1]; + f[atomn][2] += tmp2*rln[2]; + + if (vflag_atom) v_tally2_thr(atoml,atomn,-tmp2,rln,thr); + } + } + } + } + } + } + } + + return Stb; +} + +/* ---------------------------------------------------------------------- + REBO forces and energy +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::FREBO_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr) +{ + int i,j,k,m,ii,itype,jtype,itag,jtag; + double delx,dely,delz,evdwl,fpair,xtmp,ytmp,ztmp; + double rsq,rij,wij; + double Qij,Aij,alphaij,VR,pre,dVRdi,VA,term,bij,dVAdi,dVA; + double dwij,del[3]; + int *ilist,*REBO_neighs; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int *tag = atom->tag; + int nlocal = atom->nlocal; + + ilist = list->ilist; + + // two-body interactions from REBO neighbor list, skip half of them + + for (ii = ifrom; ii < ito; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + REBO_neighs = REBO_firstneigh[i]; + + for (k = 0; k < REBO_numneigh[i]; k++) { + j = REBO_neighs[k]; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = x[i][0] - x[j][0]; + dely = x[i][1] - x[j][1]; + delz = x[i][2] - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + rij = sqrt(rsq); + wij = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + if (wij <= TOL) continue; + + Qij = Q[itype][jtype]; + Aij = A[itype][jtype]; + alphaij = alpha[itype][jtype]; + + VR = wij*(1.0+(Qij/rij)) * Aij*exp(-alphaij*rij); + pre = wij*Aij * exp(-alphaij*rij); + dVRdi = pre * ((-alphaij)-(Qij/rsq)-(Qij*alphaij/rij)); + dVRdi += VR/wij * dwij; + + VA = dVA = 0.0; + for (m = 0; m < 3; m++) { + term = -wij * BIJc[itype][jtype][m] * exp(-Beta[itype][jtype][m]*rij); + VA += term; + dVA += -Beta[itype][jtype][m] * term; + } + dVA += VA/wij * dwij; + del[0] = delx; + del[1] = dely; + del[2] = delz; + bij = bondorder_thr(i,j,del,rij,VA,vflag_atom,thr); + dVAdi = bij*dVA; + + fpair = -(dVRdi+dVAdi) / rij; + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (eflag) evdwl = VR + bij*VA; + if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} + +/* ---------------------------------------------------------------------- + compute LJ forces and energy + find 3- and 4-step paths between atoms I,J via REBO neighbor lists +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::FLJ_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr) +{ + int i,j,k,m,ii,jj,kk,mm,jnum,itype,jtype,ktype,mtype,itag,jtag; + int atomi,atomj,atomk,atomm; + int testpath,npath,done; + double evdwl,fpair,xtmp,ytmp,ztmp; + double rsq,best,wik,wkm,cij,rij,dwij,dwik,dwkj,dwkm,dwmj; + double delij[3],rijsq,delik[3],rik,deljk[3]; + double rkj,wkj,dC,VLJ,dVLJ,VA,Str,dStr,Stb; + double vdw,slw,dvdw,dslw,drij,swidth,tee,tee2; + double rljmin,rljmax,sigcut,sigmin,sigwid; + double delkm[3],rkm,deljm[3],rmj,wmj,r2inv,r6inv,scale,delscale[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + int *REBO_neighs_i,*REBO_neighs_k; + double delikS[3],deljkS[3],delkmS[3],deljmS[3],delimS[3]; + double rikS,rkjS,rkmS,rmjS,wikS,dwikS; + double wkjS,dwkjS,wkmS,dwkmS,wmjS,dwmjS; + double fpair1,fpair2,fpair3; + double fi[3],fj[3],fk[3],fm[3]; + + // I-J interaction from full neighbor list + // skip 1/2 of interactions since only consider each pair once + + evdwl = 0.0; + rljmin = 0.0; + rljmax = 0.0; + sigcut = 0.0; + sigmin = 0.0; + sigwid = 0.0; + + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = ifrom; ii < ito; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + atomi = i; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + atomj = j; + + delij[0] = xtmp - x[j][0]; + delij[1] = ytmp - x[j][1]; + delij[2] = ztmp - x[j][2]; + rijsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; + + // if outside of LJ cutoff, skip + // if outside of 4-path cutoff, best = 0.0, no need to test paths + // if outside of 2-path cutoff but inside 4-path cutoff, + // best = 0.0, test 3-,4-paths + // if inside 2-path cutoff, best = wij, only test 3-,4-paths if best < 1 + + if (rijsq >= cutljsq[itype][jtype]) continue; + rij = sqrt(rijsq); + if (rij >= cut3rebo) { + best = 0.0; + testpath = 0; + } else if (rij >= rcmax[itype][jtype]) { + best = 0.0; + testpath = 1; + } else { + best = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij); + npath = 2; + if (best < 1.0) testpath = 1; + else testpath = 0; + } + + done = 0; + if (testpath) { + + // test all 3-body paths = I-K-J + // I-K interactions come from atom I's REBO neighbors + // if wik > current best, compute wkj + // if best = 1.0, done + + REBO_neighs_i = REBO_firstneigh[i]; + for (kk = 0; kk < REBO_numneigh[i] && done==0; kk++) { + k = REBO_neighs_i[kk]; + if (k == j) continue; + ktype = map[type[k]]; + + delik[0] = x[i][0] - x[k][0]; + delik[1] = x[i][1] - x[k][1]; + delik[2] = x[i][2] - x[k][2]; + rsq = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2]; + if (rsq < rcmaxsq[itype][ktype]) { + rik = sqrt(rsq); + wik = Sp(rik,rcmin[itype][ktype],rcmax[itype][ktype],dwik); + } else wik = 0.0; + + if (wik > best) { + deljk[0] = x[j][0] - x[k][0]; + deljk[1] = x[j][1] - x[k][1]; + deljk[2] = x[j][2] - x[k][2]; + rsq = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2]; + if (rsq < rcmaxsq[ktype][jtype]) { + rkj = sqrt(rsq); + wkj = Sp(rkj,rcmin[ktype][jtype],rcmax[ktype][jtype],dwkj); + if (wik*wkj > best) { + best = wik*wkj; + npath = 3; + atomk = k; + delikS[0] = delik[0]; + delikS[1] = delik[1]; + delikS[2] = delik[2]; + rikS = rik; + wikS = wik; + dwikS = dwik; + deljkS[0] = deljk[0]; + deljkS[1] = deljk[1]; + deljkS[2] = deljk[2]; + rkjS = rkj; + wkjS = wkj; + dwkjS = dwkj; + if (best == 1.0) { + done = 1; + break; + } + } + } + + // test all 4-body paths = I-K-M-J + // K-M interactions come from atom K's REBO neighbors + // if wik*wkm > current best, compute wmj + // if best = 1.0, done + + REBO_neighs_k = REBO_firstneigh[k]; + for (mm = 0; mm < REBO_numneigh[k] && done==0; mm++) { + m = REBO_neighs_k[mm]; + if (m == i || m == j) continue; + mtype = map[type[m]]; + delkm[0] = x[k][0] - x[m][0]; + delkm[1] = x[k][1] - x[m][1]; + delkm[2] = x[k][2] - x[m][2]; + rsq = delkm[0]*delkm[0] + delkm[1]*delkm[1] + delkm[2]*delkm[2]; + if (rsq < rcmaxsq[ktype][mtype]) { + rkm = sqrt(rsq); + wkm = Sp(rkm,rcmin[ktype][mtype],rcmax[ktype][mtype],dwkm); + } else wkm = 0.0; + + if (wik*wkm > best) { + deljm[0] = x[j][0] - x[m][0]; + deljm[1] = x[j][1] - x[m][1]; + deljm[2] = x[j][2] - x[m][2]; + rsq = deljm[0]*deljm[0] + deljm[1]*deljm[1] + + deljm[2]*deljm[2]; + if (rsq < rcmaxsq[mtype][jtype]) { + rmj = sqrt(rsq); + wmj = Sp(rmj,rcmin[mtype][jtype],rcmax[mtype][jtype],dwmj); + if (wik*wkm*wmj > best) { + best = wik*wkm*wmj; + npath = 4; + atomk = k; + delikS[0] = delik[0]; + delikS[1] = delik[1]; + delikS[2] = delik[2]; + rikS = rik; + wikS = wik; + dwikS = dwik; + atomm = m; + delkmS[0] = delkm[0]; + delkmS[1] = delkm[1]; + delkmS[2] = delkm[2]; + rkmS = rkm; + wkmS = wkm; + dwkmS = dwkm; + deljmS[0] = deljm[0]; + deljmS[1] = deljm[1]; + deljmS[2] = deljm[2]; + rmjS = rmj; + wmjS = wmj; + dwmjS = dwmj; + if (best == 1.0) { + done = 1; + break; + } + } + } + } + } + } + } + } + + cij = 1.0 - best; + if (cij == 0.0) continue; + + // compute LJ forces and energy + + sigwid = 0.84; + sigcut = 3.0; + sigmin = sigcut - sigwid; + + rljmin = sigma[itype][jtype]; + rljmax = sigcut * rljmin; + rljmin = sigmin * rljmin; + + if (rij > rljmax){ + slw = 0.0; + dslw = 0.0;} + else if (rij > rljmin){ + drij = rij - rljmin; + swidth = rljmax - rljmin; + tee = drij / swidth; + tee2 = tee*tee; + slw = 1.0 - tee2 * (3.0 - 2.0 * tee); + dslw = 6.0 * tee * (1.0 - tee) / rij / swidth; + } + else + slw = 1.0; + dslw = 0.0; + + r2inv = 1.0/rijsq; + r6inv = r2inv*r2inv*r2inv; + + vdw = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + dvdw = -r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) / rij; + + // VLJ now becomes vdw * slw, derivaties, etc. + + VLJ = vdw * slw; + dVLJ = dvdw * slw + vdw * dslw; + + Str = Sp2(rij,rcLJmin[itype][jtype],rcLJmax[itype][jtype],dStr); + VA = Str*cij*VLJ; + if (Str > 0.0) { + scale = rcmin[itype][jtype] / rij; + delscale[0] = scale * delij[0]; + delscale[1] = scale * delij[1]; + delscale[2] = scale * delij[2]; + Stb = bondorderLJ_thr(i,j,delscale,rcmin[itype][jtype],VA, + delij,rij,vflag_atom,thr); + } else Stb = 0.0; + + fpair = -(dStr * (Stb*cij*VLJ - cij*VLJ) + + dVLJ * (Str*Stb*cij + cij - Str*cij)) / rij; + + f[i][0] += delij[0]*fpair; + f[i][1] += delij[1]*fpair; + f[i][2] += delij[2]*fpair; + f[j][0] -= delij[0]*fpair; + f[j][1] -= delij[1]*fpair; + f[j][2] -= delij[2]*fpair; + + if (eflag) evdwl = VA*Stb + (1.0-Str)*cij*VLJ; + if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delij[0],delij[1],delij[2],thr); + + if (cij < 1.0) { + dC = Str*Stb*VLJ + (1.0-Str)*VLJ; + if (npath == 2) { + fpair = dC*dwij / rij; + f[atomi][0] += delij[0]*fpair; + f[atomi][1] += delij[1]*fpair; + f[atomi][2] += delij[2]*fpair; + f[atomj][0] -= delij[0]*fpair; + f[atomj][1] -= delij[1]*fpair; + f[atomj][2] -= delij[2]*fpair; + + if (vflag_atom) v_tally2_thr(atomi,atomj,fpair,delij,thr); + + } else if (npath == 3) { + fpair1 = dC*dwikS*wkjS / rikS; + fi[0] = delikS[0]*fpair1; + fi[1] = delikS[1]*fpair1; + fi[2] = delikS[2]*fpair1; + fpair2 = dC*wikS*dwkjS / rkjS; + fj[0] = deljkS[0]*fpair2; + fj[1] = deljkS[1]*fpair2; + fj[2] = deljkS[2]*fpair2; + + f[atomi][0] += fi[0]; + f[atomi][1] += fi[1]; + f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; + f[atomj][1] += fj[1]; + f[atomj][2] += fj[2]; + f[atomk][0] -= fi[0] + fj[0]; + f[atomk][1] -= fi[1] + fj[1]; + f[atomk][2] -= fi[2] + fj[2]; + + if (vflag_atom) + v_tally3_thr(atomi,atomj,atomk,fi,fj,delikS,deljkS,thr); + + } else { + fpair1 = dC*dwikS*wkmS*wmjS / rikS; + fi[0] = delikS[0]*fpair1; + fi[1] = delikS[1]*fpair1; + fi[2] = delikS[2]*fpair1; + + fpair2 = dC*wikS*dwkmS*wmjS / rkmS; + fk[0] = delkmS[0]*fpair2 - fi[0]; + fk[1] = delkmS[1]*fpair2 - fi[1]; + fk[2] = delkmS[2]*fpair2 - fi[2]; + + fpair3 = dC*wikS*wkmS*dwmjS / rmjS; + fj[0] = deljmS[0]*fpair3; + fj[1] = deljmS[1]*fpair3; + fj[2] = deljmS[2]*fpair3; + + fm[0] = -delkmS[0]*fpair2 - fj[0]; + fm[1] = -delkmS[1]*fpair2 - fj[1]; + fm[2] = -delkmS[2]*fpair2 - fj[2]; + + f[atomi][0] += fi[0]; + f[atomi][1] += fi[1]; + f[atomi][2] += fi[2]; + f[atomj][0] += fj[0]; + f[atomj][1] += fj[1]; + f[atomj][2] += fj[2]; + f[atomk][0] += fk[0]; + f[atomk][1] += fk[1]; + f[atomk][2] += fk[2]; + f[atomm][0] += fm[0]; + f[atomm][1] += fm[1]; + f[atomm][2] += fm[2]; + + if (vflag_atom) { + delimS[0] = delikS[0] + delkmS[0]; + delimS[1] = delikS[1] + delkmS[1]; + delimS[2] = delikS[2] + delkmS[2]; + v_tally4_thr(atomi,atomj,atomk,atomm,fi,fj,fk,delimS,deljmS,delkmS,thr); + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + torsional forces and energy +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::TORSION_thr(int ifrom, int ito, + int evflag, int eflag, ThrData * const thr) +{ + int i,j,k,l,ii,itag,jtag; + double evdwl,fpair,xtmp,ytmp,ztmp; + double cos321; + double w21,dw21,cos234,w34,dw34; + double cross321[3],cross321mag,cross234[3],cross234mag; + double w23,dw23,cw2,ekijl,Ec; + double cw,cwnum,cwnom; + double rij,rij2,rik,rjl,tspjik,dtsjik,tspijl,dtsijl,costmp,fcpc; + double sin321,sin234,rjk2,rik2,ril2,rjl2; + double rjk,ril; + double Vtors; + double dndij[3],tmpvec[3],dndik[3],dndjl[3]; + double dcidij,dcidik,dcidjk,dcjdji,dcjdjl,dcjdil; + double dsidij,dsidik,dsidjk,dsjdji,dsjdjl,dsjdil; + double dxidij,dxidik,dxidjk,dxjdji,dxjdjl,dxjdil; + double ddndij,ddndik,ddndjk,ddndjl,ddndil,dcwddn,dcwdn,dvpdcw,Ftmp[3]; + double del32[3],rsq,r32,del23[3],del21[3],r21; + double deljk[3],del34[3],delil[3],delkl[3],r23,r34; + double fi[3],fj[3],fk[3],fl[3]; + int itype,jtype,ktype,ltype,kk,ll,jj; + int *ilist,*REBO_neighs_i,*REBO_neighs_j; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int *tag = atom->tag; + + ilist = list->ilist; + + for (ii = ifrom; ii < ito; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + if (itype != 0) continue; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + REBO_neighs_i = REBO_firstneigh[i]; + + for (jj = 0; jj < REBO_numneigh[i]; jj++) { + j = REBO_neighs_i[jj]; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + if (jtype != 0) continue; + + del32[0] = x[j][0]-x[i][0]; + del32[1] = x[j][1]-x[i][1]; + del32[2] = x[j][2]-x[i][2]; + rsq = del32[0]*del32[0] + del32[1]*del32[1] + del32[2]*del32[2]; + r32 = sqrt(rsq); + del23[0] = -del32[0]; + del23[1] = -del32[1]; + del23[2] = -del32[2]; + r23 = r32; + w23 = Sp(r23,rcmin[itype][jtype],rcmax[itype][jtype],dw23); + + for (kk = 0; kk < REBO_numneigh[i]; kk++) { + k = REBO_neighs_i[kk]; + ktype = map[type[k]]; + if (k == j) continue; + del21[0] = x[i][0]-x[k][0]; + del21[1] = x[i][1]-x[k][1]; + del21[2] = x[i][2]-x[k][2]; + rsq = del21[0]*del21[0] + del21[1]*del21[1] + del21[2]*del21[2]; + r21 = sqrt(rsq); + cos321 = - ((del21[0]*del32[0]) + (del21[1]*del32[1]) + + (del21[2]*del32[2])) / (r21*r32); + cos321 = MIN(cos321,1.0); + cos321 = MAX(cos321,-1.0); + sin321 = sqrt(1.0 - cos321*cos321); + if (sin321 < TOL) continue; + + deljk[0] = del21[0]-del23[0]; + deljk[1] = del21[1]-del23[1]; + deljk[2] = del21[2]-del23[2]; + rjk2 = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2]; + rjk=sqrt(rjk2); + rik2 = r21*r21; + w21 = Sp(r21,rcmin[itype][ktype],rcmax[itype][ktype],dw21); + + rij = r32; + rik = r21; + rij2 = r32*r32; + rik2 = r21*r21; + costmp = 0.5*(rij2+rik2-rjk2)/rij/rik; + tspjik = Sp2(costmp,thmin,thmax,dtsjik); + dtsjik = -dtsjik; + + REBO_neighs_j = REBO_firstneigh[j]; + for (ll = 0; ll < REBO_numneigh[j]; ll++) { + l = REBO_neighs_j[ll]; + ltype = map[type[l]]; + if (l == i || l == k) continue; + del34[0] = x[j][0]-x[l][0]; + del34[1] = x[j][1]-x[l][1]; + del34[2] = x[j][2]-x[l][2]; + rsq = del34[0]*del34[0] + del34[1]*del34[1] + del34[2]*del34[2]; + r34 = sqrt(rsq); + cos234 = (del32[0]*del34[0] + del32[1]*del34[1] + + del32[2]*del34[2]) / (r32*r34); + cos234 = MIN(cos234,1.0); + cos234 = MAX(cos234,-1.0); + sin234 = sqrt(1.0 - cos234*cos234); + if (sin234 < TOL) continue; + w34 = Sp(r34,rcmin[jtype][ltype],rcmax[jtype][ltype],dw34); + delil[0] = del23[0] + del34[0]; + delil[1] = del23[1] + del34[1]; + delil[2] = del23[2] + del34[2]; + ril2 = delil[0]*delil[0] + delil[1]*delil[1] + delil[2]*delil[2]; + ril=sqrt(ril2); + rjl2 = r34*r34; + + rjl = r34; + rjl2 = r34*r34; + costmp = 0.5*(rij2+rjl2-ril2)/rij/rjl; + tspijl = Sp2(costmp,thmin,thmax,dtsijl); + dtsijl = -dtsijl; //need minus sign + cross321[0] = (del32[1]*del21[2])-(del32[2]*del21[1]); + cross321[1] = (del32[2]*del21[0])-(del32[0]*del21[2]); + cross321[2] = (del32[0]*del21[1])-(del32[1]*del21[0]); + cross321mag = sqrt(cross321[0]*cross321[0]+ + cross321[1]*cross321[1]+ + cross321[2]*cross321[2]); + cross234[0] = (del23[1]*del34[2])-(del23[2]*del34[1]); + cross234[1] = (del23[2]*del34[0])-(del23[0]*del34[2]); + cross234[2] = (del23[0]*del34[1])-(del23[1]*del34[0]); + cross234mag = sqrt(cross234[0]*cross234[0]+ + cross234[1]*cross234[1]+ + cross234[2]*cross234[2]); + cwnum = (cross321[0]*cross234[0]) + + (cross321[1]*cross234[1])+(cross321[2]*cross234[2]); + cwnom = r21*r34*r32*r32*sin321*sin234; + cw = cwnum/cwnom; + + cw2 = (.5*(1.0-cw)); + ekijl = epsilonT[ktype][ltype]; + Ec = 256.0*ekijl/405.0; + Vtors = (Ec*(pow(cw2,5.0)))-(ekijl/10.0); + + if (eflag) evdwl = Vtors*w21*w23*w34*(1.0-tspjik)*(1.0-tspijl); + + dndij[0] = (cross234[1]*del21[2])-(cross234[2]*del21[1]); + dndij[1] = (cross234[2]*del21[0])-(cross234[0]*del21[2]); + dndij[2] = (cross234[0]*del21[1])-(cross234[1]*del21[0]); + + tmpvec[0] = (del34[1]*cross321[2])-(del34[2]*cross321[1]); + tmpvec[1] = (del34[2]*cross321[0])-(del34[0]*cross321[2]); + tmpvec[2] = (del34[0]*cross321[1])-(del34[1]*cross321[0]); + + dndij[0] = dndij[0]+tmpvec[0]; + dndij[1] = dndij[1]+tmpvec[1]; + dndij[2] = dndij[2]+tmpvec[2]; + + dndik[0] = (del23[1]*cross234[2])-(del23[2]*cross234[1]); + dndik[1] = (del23[2]*cross234[0])-(del23[0]*cross234[2]); + dndik[2] = (del23[0]*cross234[1])-(del23[1]*cross234[0]); + + dndjl[0] = (cross321[1]*del23[2])-(cross321[2]*del23[1]); + dndjl[1] = (cross321[2]*del23[0])-(cross321[0]*del23[2]); + dndjl[2] = (cross321[0]*del23[1])-(cross321[1]*del23[0]); + + dcidij = ((r23*r23)-(r21*r21)+(rjk*rjk))/(2.0*r23*r23*r21); + dcidik = ((r21*r21)-(r23*r23)+(rjk*rjk))/(2.0*r23*r21*r21); + dcidjk = (-rjk)/(r23*r21); + dcjdji = ((r23*r23)-(r34*r34)+(ril*ril))/(2.0*r23*r23*r34); + dcjdjl = ((r34*r34)-(r23*r23)+(ril*ril))/(2.0*r23*r34*r34); + dcjdil = (-ril)/(r23*r34); + + dsidij = (-cos321/sin321)*dcidij; + dsidik = (-cos321/sin321)*dcidik; + dsidjk = (-cos321/sin321)*dcidjk; + + dsjdji = (-cos234/sin234)*dcjdji; + dsjdjl = (-cos234/sin234)*dcjdjl; + dsjdil = (-cos234/sin234)*dcjdil; + + dxidij = (r21*sin321)+(r23*r21*dsidij); + dxidik = (r23*sin321)+(r23*r21*dsidik); + dxidjk = (r23*r21*dsidjk); + + dxjdji = (r34*sin234)+(r23*r34*dsjdji); + dxjdjl = (r23*sin234)+(r23*r34*dsjdjl); + dxjdil = (r23*r34*dsjdil); + + ddndij = (dxidij*cross234mag)+(cross321mag*dxjdji); + ddndik = dxidik*cross234mag; + ddndjk = dxidjk*cross234mag; + ddndjl = cross321mag*dxjdjl; + ddndil = cross321mag*dxjdil; + dcwddn = -cwnum/(cwnom*cwnom); + dcwdn = 1.0/cwnom; + dvpdcw = (-1.0)*Ec*(-.5)*5.0*pow(cw2,4.0) * + w23*w21*w34*(1.0-tspjik)*(1.0-tspijl); + + Ftmp[0] = dvpdcw*((dcwdn*dndij[0])+(dcwddn*ddndij*del23[0]/r23)); + Ftmp[1] = dvpdcw*((dcwdn*dndij[1])+(dcwddn*ddndij*del23[1]/r23)); + Ftmp[2] = dvpdcw*((dcwdn*dndij[2])+(dcwddn*ddndij*del23[2]/r23)); + fi[0] = Ftmp[0]; + fi[1] = Ftmp[1]; + fi[2] = Ftmp[2]; + fj[0] = -Ftmp[0]; + fj[1] = -Ftmp[1]; + fj[2] = -Ftmp[2]; + + Ftmp[0] = dvpdcw*((dcwdn*dndik[0])+(dcwddn*ddndik*del21[0]/r21)); + Ftmp[1] = dvpdcw*((dcwdn*dndik[1])+(dcwddn*ddndik*del21[1]/r21)); + Ftmp[2] = dvpdcw*((dcwdn*dndik[2])+(dcwddn*ddndik*del21[2]/r21)); + fi[0] += Ftmp[0]; + fi[1] += Ftmp[1]; + fi[2] += Ftmp[2]; + fk[0] = -Ftmp[0]; + fk[1] = -Ftmp[1]; + fk[2] = -Ftmp[2]; + + Ftmp[0] = (dvpdcw*dcwddn*ddndjk*deljk[0])/rjk; + Ftmp[1] = (dvpdcw*dcwddn*ddndjk*deljk[1])/rjk; + Ftmp[2] = (dvpdcw*dcwddn*ddndjk*deljk[2])/rjk; + fj[0] += Ftmp[0]; + fj[1] += Ftmp[1]; + fj[2] += Ftmp[2]; + fk[0] -= Ftmp[0]; + fk[1] -= Ftmp[1]; + fk[2] -= Ftmp[2]; + + Ftmp[0] = dvpdcw*((dcwdn*dndjl[0])+(dcwddn*ddndjl*del34[0]/r34)); + Ftmp[1] = dvpdcw*((dcwdn*dndjl[1])+(dcwddn*ddndjl*del34[1]/r34)); + Ftmp[2] = dvpdcw*((dcwdn*dndjl[2])+(dcwddn*ddndjl*del34[2]/r34)); + fj[0] += Ftmp[0]; + fj[1] += Ftmp[1]; + fj[2] += Ftmp[2]; + fl[0] = -Ftmp[0]; + fl[1] = -Ftmp[1]; + fl[2] = -Ftmp[2]; + + Ftmp[0] = (dvpdcw*dcwddn*ddndil*delil[0])/ril; + Ftmp[1] = (dvpdcw*dcwddn*ddndil*delil[1])/ril; + Ftmp[2] = (dvpdcw*dcwddn*ddndil*delil[2])/ril; + fi[0] += Ftmp[0]; + fi[1] += Ftmp[1]; + fi[2] += Ftmp[2]; + fl[0] -= Ftmp[0]; + fl[1] -= Ftmp[1]; + fl[2] -= Ftmp[2]; + + // coordination forces + + fpair = Vtors*dw21*w23*w34*(1.0-tspjik)*(1.0-tspijl) / r21; + fi[0] -= del21[0]*fpair; + fi[1] -= del21[1]*fpair; + fi[2] -= del21[2]*fpair; + fk[0] += del21[0]*fpair; + fk[1] += del21[1]*fpair; + fk[2] += del21[2]*fpair; + + fpair = Vtors*w21*dw23*w34*(1.0-tspjik)*(1.0-tspijl) / r23; + fi[0] -= del23[0]*fpair; + fi[1] -= del23[1]*fpair; + fi[2] -= del23[2]*fpair; + fj[0] += del23[0]*fpair; + fj[1] += del23[1]*fpair; + fj[2] += del23[2]*fpair; + + fpair = Vtors*w21*w23*dw34*(1.0-tspjik)*(1.0-tspijl) / r34; + fj[0] -= del34[0]*fpair; + fj[1] -= del34[1]*fpair; + fj[2] -= del34[2]*fpair; + fl[0] += del34[0]*fpair; + fl[1] += del34[1]*fpair; + fl[2] += del34[2]*fpair; + + // additional cut off function forces + + fcpc = -Vtors*w21*w23*w34*dtsjik*(1.0-tspijl); + fpair = fcpc*dcidij/rij; + fi[0] += fpair*del23[0]; + fi[1] += fpair*del23[1]; + fi[2] += fpair*del23[2]; + fj[0] -= fpair*del23[0]; + fj[1] -= fpair*del23[1]; + fj[2] -= fpair*del23[2]; + + fpair = fcpc*dcidik/rik; + fi[0] += fpair*del21[0]; + fi[1] += fpair*del21[1]; + fi[2] += fpair*del21[2]; + fk[0] -= fpair*del21[0]; + fk[1] -= fpair*del21[1]; + fk[2] -= fpair*del21[2]; + + fpair = fcpc*dcidjk/rjk; + fj[0] += fpair*deljk[0]; + fj[1] += fpair*deljk[1]; + fj[2] += fpair*deljk[2]; + fk[0] -= fpair*deljk[0]; + fk[1] -= fpair*deljk[1]; + fk[2] -= fpair*deljk[2]; + + fcpc = -Vtors*w21*w23*w34*(1.0-tspjik)*dtsijl; + fpair = fcpc*dcjdji/rij; + fi[0] += fpair*del23[0]; + fi[1] += fpair*del23[1]; + fi[2] += fpair*del23[2]; + fj[0] -= fpair*del23[0]; + fj[1] -= fpair*del23[1]; + fj[2] -= fpair*del23[2]; + + fpair = fcpc*dcjdjl/rjl; + fj[0] += fpair*del34[0]; + fj[1] += fpair*del34[1]; + fj[2] += fpair*del34[2]; + fl[0] -= fpair*del34[0]; + fl[1] -= fpair*del34[1]; + fl[2] -= fpair*del34[2]; + + fpair = fcpc*dcjdil/ril; + fi[0] += fpair*delil[0]; + fi[1] += fpair*delil[1]; + fi[2] += fpair*delil[2]; + fl[0] -= fpair*delil[0]; + fl[1] -= fpair*delil[1]; + fl[2] -= fpair*delil[2]; + + // sum per-atom forces into atom force array + + f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2]; + f[j][0] += fj[0]; f[j][1] += fj[1]; f[j][2] += fj[2]; + f[k][0] += fk[0]; f[k][1] += fk[1]; f[k][2] += fk[2]; + f[l][0] += fl[0]; f[l][1] += fl[1]; f[l][2] += fl[2]; + + if (evflag) { + delkl[0] = delil[0] - del21[0]; + delkl[1] = delil[1] - del21[1]; + delkl[2] = delil[2] - del21[2]; + ev_tally4_thr(this,i,j,k,l,evdwl,fi,fj,fk,delil,del34,delkl,thr); + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create REBO neighbor list from main neighbor list + REBO neighbor list stores neighbors of ghost atoms +------------------------------------------------------------------------- */ + +void PairAIREBOOMP::REBO_neigh_thr() +{ + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + const int nthreads = comm->nthreads; + + if (atom->nmax > maxlocal) { + maxlocal = atom->nmax; + memory->destroy(REBO_numneigh); + memory->sfree(REBO_firstneigh); + memory->destroy(nC); + memory->destroy(nH); + memory->create(REBO_numneigh,maxlocal,"AIREBO:numneigh"); + REBO_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *), + "AIREBO:firstneigh"); + memory->create(nC,maxlocal,"AIREBO:nC"); + memory->create(nH,maxlocal,"AIREBO:nH"); + } + + if (nthreads > maxpage) + add_pages(nthreads - maxpage); + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int i,j,ii,jj,n,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS; + int *ilist,*jlist,*numneigh,**firstneigh; + int *neighptr; + + double **x = atom->x; + int *type = atom->type; + + const int allnum = list->inum + list->gnum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + const int iidelta = 1 + allnum/nthreads; + const int iifrom = tid*iidelta; + int iito = iifrom + iidelta; + if (iito > allnum) + iito = allnum; + + // store all REBO neighs of owned and ghost atoms + // scan full neighbor list of I + + int npage = tid; + int npnt = 0; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if (pgsize - npnt < oneatom) { + npnt = 0; + npage += nthreads; + if (npage >= maxpage) add_pages(nthreads); + } + neighptr = &(pages[npage][npnt]); + n = 0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = map[type[i]]; + nC[i] = nH[i] = 0.0; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < rcmaxsq[itype][jtype]) { + neighptr[n++] = j; + if (jtype == 0) + nC[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS); + else + nH[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS); + } + } + + REBO_firstneigh[i] = neighptr; + REBO_numneigh[i] = n; + npnt += n; + + if (npnt >= pgsize) + error->one(FLERR,"REBO list overflow, boost neigh_modify one or page"); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairAIREBOOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairAIREBO::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_airebo_omp.h b/src/USER-OMP/pair_airebo_omp.h new file mode 100644 index 0000000000..9ba598fa8f --- /dev/null +++ b/src/USER-OMP/pair_airebo_omp.h @@ -0,0 +1,53 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(airebo/omp,PairAIREBOOMP) + +#else + +#ifndef LMP_PAIR_AIREBO_OMP_H +#define LMP_PAIR_AIREBO_OMP_H + +#include "pair_airebo.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairAIREBOOMP : public PairAIREBO, public ThrOMP { + + public: + PairAIREBOOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + double bondorder_thr(int i, int j, double rij[3], double rijmag, + double VA, int vflag_atom, ThrData * const thr); + double bondorderLJ_thr(int i, int j, double rij[3], double rijmag, + double VA, double rij0[3], double rijmag0, + int vflag_atom, ThrData * const thr); + + void FREBO_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr); + void FLJ_thr(int ifrom, int ito, int evflag, int eflag, + int vflag_atom, ThrData * const thr); + void TORSION_thr(int ifrom, int ito, int evflag, int eflag, ThrData * const thr); + void REBO_neigh_thr(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp new file mode 100644 index 0000000000..80607b7225 --- /dev/null +++ b/src/USER-OMP/pair_comb_omp.cpp @@ -0,0 +1,647 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_comb_omp.h" +#include "atom.h" +#include "comm.h" +#include "group.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#if defined(_OPENMP) +#include +#endif + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairCombOMP::PairCombOMP(LAMMPS *lmp) : + PairComb(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCombOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + + // grow coordination array if necessary + + if (atom->nmax > nmax) { + memory->destroy(NCo); + memory->destroy(bbij); + nmax = atom->nmax; + memory->create(NCo,nmax,"pair:NCo"); + memory->create(bbij,nmax,nmax,"pair:bbij"); + } + + // Build short range neighbor list + Short_neigh_thr(); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairCombOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,jj,kk,jnum,iparam_i; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + int mr1,mr2,mr3; + int rsc,inty; + double elp_ij,filp[3],fjlp[3],fklp[3]; + double iq,jq; + double yaself; + double potal,fac11,fac11e; + double vionij,fvionij,sr1,sr2,sr3,Eov,Fov; + int sht_jnum, *sht_jlist; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const tag = atom->tag; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + yaself = vionij = fvionij = Eov = Fov = 0.0; + + double fxtmp,fytmp,fztmp; + double fjxtmp,fjytmp,fjztmp; + + // self energy correction term: potal + + potal_calc(potal,fac11,fac11e); + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + iq = q[i]; + NCo[i] = 0; + iparam_i = elem2param[itype][itype][itype]; + + // self energy, only on i atom + + yaself = self(¶ms[iparam_i],iq,potal); + + if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,yaself, + 0.0,0.0,0.0,0.0,0.0,thr); + + // two-body interactions (long and short repulsive) + + jlist = firstneigh[i]; + jnum = numneigh[i]; + sht_jlist = sht_first[i]; + sht_jnum = sht_num[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + // Qj calculates 2-body Coulombic + + jtype = map[type[j]]; + jq = q[j]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + + // long range q-dependent + + if (rsq > params[iparam_ij].lcutsq) continue; + + inty = intype[itype][jtype]; + + // polynomial three-point interpolation + + tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype); + + // 1/r energy and forces + + direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq, + potal,fac11,fac11e,vionij,fvionij); + + // field correction to self energy + + field(¶ms[iparam_ij],rsq,iq,jq,vionij,fvionij); + + // polarization field + // sums up long range forces + + fxtmp += delx*fvionij; + fytmp += dely*fvionij; + fztmp += delz*fvionij; + f[j][0] -= delx*fvionij; + f[j][1] -= dely*fvionij; + f[j][2] -= delz*fvionij; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + 0.0,vionij,fvionij,delx,dely,delz,thr); + + // short range q-independent + + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,EFLAG,evdwl,iq,jq); + + // repulsion is pure two-body, sums up pair repulsive forces + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + + // accumulate coordination number information + + if (cor_flag) { + int numcoor = 0; + for (jj = 0; jj < sht_jnum; jj++) { + j = sht_jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + if(params[iparam_ij].hfocor > 0.0 ) { + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = vec3_dot(delr1,delr1); + + if (rsq1 > params[iparam_ij].cutsq) continue; + ++numcoor; + } + } + NCo[i] = numcoor; + } + + // three-body interactions + // half i-j loop + + for (jj = 0; jj < sht_jnum; jj++) { + j = sht_jlist[jj]; + j &= NEIGHMASK; + + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + // this Qj for q-dependent BSi + + jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = vec3_dot(delr1,delr1); + + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + fjxtmp = fjytmp = fjztmp = 0.0; + zeta_ij = 0.0; + cuo_flag1 = 0; cuo_flag2 = 0; + + for (kk = 0; kk < sht_jnum; kk++) { + k = sht_jlist[kk]; + if (j == k) continue; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = vec3_dot(delr2,delr2); + + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + + if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1; + if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1; + } + + if (cuo_flag1 && cuo_flag2) cuo_flag = 1; + else cuo_flag = 0; + + force_zeta(¶ms[iparam_ij],EFLAG,i,j,rsq1,zeta_ij, + iq,jq,fpair,prefactor,evdwl); + + // over-coordination correction for HfO2 + + if (cor_flag && NCo[i] != 0) + Over_cor(¶ms[iparam_ij],rsq1,NCo[i],Eov, Fov); + evdwl += Eov; + fpair += Fov; + + fxtmp += delr1[0]*fpair; + fytmp += delr1[1]*fpair; + fztmp += delr1[2]*fpair; + fjxtmp -= delr1[0]*fpair; + fjytmp -= delr1[1]*fpair; + fjztmp -= delr1[2]*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, + -fpair,-delr1[0],-delr1[1],-delr1[2],thr); + + // attractive term via loop over k (3-body forces) + + for (kk = 0; kk < sht_jnum; kk++) { + k = sht_jlist[kk]; + if (j == k) continue; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = vec3_dot(delr2,delr2); + if (rsq2 > params[iparam_ijk].cutsq) continue; + + for (rsc = 0; rsc < 3; rsc++) + fi[rsc] = fj[rsc] = fk[rsc] = 0.0; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + // 3-body LP and BB correction and forces + + elp_ij = elp(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + flp(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp); + + fxtmp += fi[0] + filp[0]; + fytmp += fi[1] + filp[1]; + fztmp += fi[2] + filp[2]; + fjxtmp += fj[0] + fjlp[0]; + fjytmp += fj[1] + fjlp[1]; + fjztmp += fj[2] + fjlp[2]; + f[k][0] += fk[0] + fklp[0]; + f[k][1] += fk[1] + fklp[1]; + f[k][2] += fk[2] + fklp[2]; + + if (EVFLAG) + ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + elp_ij,0.0,0.0,0.0,0.0,0.0, thr); + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr); + } + f[j][0] += fjxtmp; + f[j][1] += fjytmp; + f[j][2] += fjztmp; + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + + if (cuo_flag) params[iparam_i].cutsq *= 0.65; + } + cuo_flag = 0; +} + +/* ---------------------------------------------------------------------- */ + +double PairCombOMP::yasu_char(double *qf_fix, int &igroup) +{ + int ii; + double potal,fac11,fac11e; + + const double * const * const x = atom->x; + const double * const q = atom->q; + const int * const type = atom->type; + const int * const tag = atom->tag; + + const int inum = list->inum; + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + const int * const mask = atom->mask; + const int groupbit = group->bitmask[igroup]; + + qf = qf_fix; + for (ii = 0; ii < inum; ii++) { + const int i = ilist[ii]; + if (mask[i] & groupbit) + qf[i] = 0.0; + } + + // communicating charge force to all nodes, first forward then reverse + + comm->forward_comm_pair(this); + + // self energy correction term: potal + + potal_calc(potal,fac11,fac11e); + + // loop over full neighbor list of my atoms +#if defined(_OPENMP) +#pragma omp parallel for private(ii) default(none) shared(potal,fac11e) +#endif + for (ii = 0; ii < inum; ii ++) { + double fqi,fqj,fqij,fqji,fqjj,delr1[3]; + double sr1,sr2,sr3; + int mr1,mr2,mr3; + + const int i = ilist[ii]; + const int itag = tag[i]; + + if (mask[i] & groupbit) { + fqi = fqj = fqij = fqji = fqjj = 0.0; // should not be needed. + int itype = map[type[i]]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const double iq = q[i]; + const int iparam_i = elem2param[itype][itype][itype]; + + // charge force from self energy + + fqi = qfo_self(¶ms[iparam_i],iq,potal); + + // two-body interactions + + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + const int jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + const int jtype = map[type[j]]; + double jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + double rsq1 = vec3_dot(delr1,delr1); + + const int iparam_ij = elem2param[itype][jtype][jtype]; + + // long range q-dependent + + if (rsq1 > params[iparam_ij].lcutsq) continue; + + const int inty = intype[itype][jtype]; + + // polynomial three-point interpolation + + tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype); + + // 1/r charge forces + + qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij); + + // field correction to self energy and charge force + + qfo_field(¶ms[iparam_ij],rsq1,iq,jq,fqji,fqjj); + fqi += jq * fqij + fqji; +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[j] += (iq * fqij + fqjj); + } + + // three-body interactions + + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + const int jtype = map[type[j]]; + const double jq = q[j]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + double rsq1 = vec3_dot(delr1,delr1); + + const int iparam_ij = elem2param[itype][jtype][jtype]; + + if (rsq1 > params[iparam_ij].cutsq) continue; + + // charge force in Aij and Bij + + qfo_short(¶ms[iparam_ij],i,j,rsq1,iq,jq,fqij,fqjj); + fqi += fqij; +#if defined(_OPENMP) +#pragma omp atomic +#endif + qf[j] += fqjj; + } + +#if defined(_OPENMP) && 0 +#pragma omp atomic +#endif + qf[i] += fqi; + } + } + + comm->reverse_comm_pair(this); + + // sum charge force on each node and return it + + double eneg = 0.0; + for (ii = 0; ii < inum; ii++) { + const int i = ilist[ii]; + if (mask[i] & groupbit) + eneg += qf[i]; + } + double enegtot; + MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world); + return enegtot; +} + +/* ---------------------------------------------------------------------- */ + +double PairCombOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairComb::memory_usage(); + + return bytes; +} +/* ---------------------------------------------------------------------- */ + +void PairCombOMP::Short_neigh_thr() +{ + + if (atom->nmax > nmax) { + nmax = int(1.0 * atom->nmax); + memory->sfree(sht_num); + memory->sfree(sht_first); + memory->create(sht_num,nmax,"pair:sht_num"); + sht_first = (int **) memory->smalloc(nmax*sizeof(int *), + "pair:sht_first"); + } + + const int nthreads = comm->nthreads; + if (nthreads > maxpage) + add_pages(nthreads - maxpage); + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int nj,npntj,*neighptrj,itype,jtype; + int iparam_ij,*ilist,*jlist,*numneigh,**firstneigh; + int jnum,i,j,ii,jj; + double xtmp,ytmp,ztmp,rsq,delrj[3]; + double **x = atom->x; + int *type = atom->type; + + const int inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + const int iidelta = 1 + inum/nthreads; + const int iifrom = tid*iidelta; + int iito = iifrom + iidelta; + if (iito > inum) iito = inum; + + int npage = tid; + npntj = 0; + + for (ii = iifrom; ii < iito; ii++) { + i = ilist[ii]; + itype = type[i]; + +#if defined(_OPENMP) +#pragma omp critical +#endif + if(pgsize - npntj < oneatom) { + npntj = 0; + npage++; + if (npage == maxpage) add_pages(nthreads); + } + + neighptrj = &pages[npage][npntj]; + nj = 0; + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = type[j]; + iparam_ij = elem2param[itype][jtype][jtype]; + + delrj[0] = xtmp - x[j][0]; + delrj[1] = ytmp - x[j][1]; + delrj[2] = ztmp - x[j][2]; + rsq = vec3_dot(delrj,delrj); + + if (rsq > cutmin) continue; + neighptrj[nj++] = j; + } + sht_first[i] = neighptrj; + sht_num[i] = nj; + npntj += nj; + } + } +} diff --git a/src/USER-OMP/pair_comb_omp.h b/src/USER-OMP/pair_comb_omp.h new file mode 100644 index 0000000000..85011064e7 --- /dev/null +++ b/src/USER-OMP/pair_comb_omp.h @@ -0,0 +1,47 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(comb/omp,PairCombOMP) + +#else + +#ifndef LMP_PAIR_COMB_OMP_H +#define LMP_PAIR_COMB_OMP_H + +#include "pair_comb.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCombOMP : public PairComb, public ThrOMP { + + public: + PairCombOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + virtual double yasu_char(double *, int &); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + void Short_neigh_thr(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_coul_diel_omp.cpp b/src/USER-OMP/pair_coul_diel_omp.cpp new file mode 100644 index 0000000000..e1e8690245 --- /dev/null +++ b/src/USER-OMP/pair_coul_diel_omp.cpp @@ -0,0 +1,165 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_diel_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairCoulDielOMP::PairCoulDielOMP(LAMMPS *lmp) : + PairCoulDiel(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulDielOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulDielOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double rsq,r,rarg,th,depsdr,epsr,forcecoul,factor_coul; + int *ilist,*jlist,*numneigh,**firstneigh; + + ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + rarg = (r-rme[itype][jtype])/sigmae[itype][jtype]; + th=tanh(rarg); + epsr=a_eps+b_eps*th; + depsdr=b_eps * (1.0 - th*th) / sigmae[itype][jtype]; + + forcecoul = qqrd2e*qtmp*q[j]*((eps_s*(epsr+r*depsdr)/epsr/epsr) -1.)/rsq; + fpair = factor_coul*forcecoul/r; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + ecoul = (qqrd2e*qtmp*q[j]*((eps_s/epsr) -1.)/r) - offset[itype][jtype]; + ecoul *= factor_coul; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCoulDielOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulDiel::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_diel_omp.h b/src/USER-OMP/pair_coul_diel_omp.h new file mode 100644 index 0000000000..2dc0083ae3 --- /dev/null +++ b/src/USER-OMP/pair_coul_diel_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/diel/omp,PairCoulDielOMP) + +#else + +#ifndef LMP_PAIR_COUL_DIEL_OMP_H +#define LMP_PAIR_COUL_DIEL_OMP_H + +#include "pair_coul_diel.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulDielOMP : public PairCoulDiel, public ThrOMP { + + public: + PairCoulDielOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_gauss_cut_omp.cpp b/src/USER-OMP/pair_gauss_cut_omp.cpp new file mode 100644 index 0000000000..5499fab3bc --- /dev/null +++ b/src/USER-OMP/pair_gauss_cut_omp.cpp @@ -0,0 +1,156 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gauss_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairGaussCutOMP::PairGaussCutOMP(LAMMPS *lmp) : + PairGaussCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGaussCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairGaussCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r,rexp,ugauss,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + rexp = (r-rmh[itype][jtype])/sigmah[itype][jtype]; + ugauss = pgauss[itype][jtype]*exp(-0.5*rexp*rexp); + fpair = factor_lj*rexp/r*ugauss/sigmah[itype][jtype]; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + evdwl = ugauss - offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairGaussCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairGaussCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_gauss_cut_omp.h b/src/USER-OMP/pair_gauss_cut_omp.h new file mode 100644 index 0000000000..d3da26abed --- /dev/null +++ b/src/USER-OMP/pair_gauss_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gauss/cut/omp,PairGaussCutOMP) + +#else + +#ifndef LMP_PAIR_GAUSS_CUT_OMP_H +#define LMP_PAIR_GAUSS_CUT_OMP_H + +#include "pair_gauss_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairGaussCutOMP : public PairGaussCut, public ThrOMP { + + public: + PairGaussCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_line_lj_omp.cpp b/src/USER-OMP/pair_line_lj_omp.cpp new file mode 100644 index 0000000000..59982735d8 --- /dev/null +++ b/src/USER-OMP/pair_line_lj_omp.cpp @@ -0,0 +1,339 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_line_lj_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLineLJOMP::PairLineLJOMP(LAMMPS *lmp) : + PairLineLJ(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLineLJOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + const int * const line = atom->line; + const int * const type = atom->type; + + // grow discrete list if necessary and initialize + + if (nall > nmax) { + nmax = nall; + memory->destroy(dnum); + memory->destroy(dfirst); + memory->create(dnum,nall,"pair:dnum"); + memory->create(dfirst,nall,"pair:dfirst"); + } + memset(dnum,0,nall*sizeof(int)); + ndiscrete = 0; + + // need to discretize the system ahead of time + // until we find a good way to multi-thread it. + for (int i = 0; i < nall; ++i) + if (line[i] >= 0) + if (dnum[i] == 0) + discretize(i,sigma[type[i]][type[i]]); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLineLJOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + int ni,nj,npi,npj,ifirst,jfirst; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj; + double xi[2],xj[2],fi[2],fj[2],dxi,dxj,dyi,dyj,ti,tj; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const line = atom->line; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq >= cutsq[itype][jtype]) continue; + + // line/line interactions = NxN particles + + evdwl = 0.0; + if (line[i] >= 0 && line[j] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + rsq = delx*delx + dely*dely; + + sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + ti += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + tj += fpair*(dxj*dely - dyj*delx); + } + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + torque[i][2] += ti; + torque[j][2] += tj; + + // line/particle interaction = Nx1 particles + // convert line into Np particles based on sigma and line length + + } else if (line[i] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + + fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xj[0] = x[j][0]; + xj[1] = x[j][1]; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + rsq = delx*delx + dely*dely; + + sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + ti += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + tj += fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + torque[i][2] += ti; + torque[j][2] += tj; + + // particle/line interaction = Nx1 particles + // convert line into Np particles based on sigma and line length + + } else if (line[j] >= 0) { + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0] = fi[1] = fj[0] = fj[1] = ti = tj = 0.0; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + rsq = delx*delx + dely*dely; + + sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + ti += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + tj -= fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + torque[i][2] += ti; + torque[j][2] += tj; + + // particle/particle interaction = 1x1 particles + + } else { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = forcelj*r2inv; + + if (EFLAG) + evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLineLJOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLineLJ::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_line_lj_omp.h b/src/USER-OMP/pair_line_lj_omp.h new file mode 100644 index 0000000000..93b38afaf0 --- /dev/null +++ b/src/USER-OMP/pair_line_lj_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(line/lj/omp,PairLineLJOMP) + +#else + +#ifndef LMP_PAIR_LINE_LJ_OMP_H +#define LMP_PAIR_LINE_LJ_OMP_H + +#include "pair_line_lj.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLineLJOMP : public PairLineLJ, public ThrOMP { + + public: + PairLineLJOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp new file mode 100644 index 0000000000..3b12668549 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.cpp @@ -0,0 +1,260 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_charmm_coul_pppm_omp.h" +#include "pppm_proxy.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulPPPMOMP::PairLJCharmmCoulPPPMOMP(LAMMPS *lmp) : + PairLJCharmmCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + respa_enable = 0; + nproxy=1; + + kspace = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulPPPMOMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/proxy") != 0) + error->all(FLERR,"kspace style pppm/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJCharmmCoulLong::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulPPPMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + if (update->setupflag) kspace->setup_proxy(); + kspace->compute_proxy(eflag,vflag); + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double philj,switch1,switch2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq) { + r6inv = r2inv*r2inv*r2inv; + jtype = type[j]; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + switch2 = 12.0*rsq * (cut_ljsq-rsq) * + (rsq-cut_lj_innersq) / denom_lj; + philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); + forcelj = forcelj*switch1 + philj*switch2; + } + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + if (rsq > cut_lj_innersq) { + switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) * + (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj; + evdwl *= switch1; + } + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCharmmCoulPPPMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCharmmCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h new file mode 100644 index 0000000000..1faee2c479 --- /dev/null +++ b/src/USER-OMP/pair_lj_charmm_coul_pppm_omp.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/pppm/omp,PairLJCharmmCoulPPPMOMP) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H +#define LMP_PAIR_LJ_CHARMM_COUL_PPPM_OMP_H + +#include "pair_lj_charmm_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulPPPMOMP : public PairLJCharmmCoulLong, public ThrOMP { + + public: + PairLJCharmmCoulPPPMOMP(class LAMMPS *); + virtual ~PairLJCharmmCoulPPPMOMP() {}; + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + class PPPMProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp new file mode 100644 index 0000000000..c048db9d4c --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.cpp @@ -0,0 +1,245 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_pppm_omp.h" +#include "pppm_proxy.h" +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulPPPMOMP::PairLJCutCoulPPPMOMP(LAMMPS *lmp) : + PairLJCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + respa_enable = 0; + nproxy=1; + + kspace = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMOMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/proxy") != 0) + error->all(FLERR,"kspace style pppm/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJCutCoulLong::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + if (update->setupflag) kspace->setup_proxy(); + kspace->compute_proxy(eflag,vflag); + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulPPPMOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h new file mode 100644 index 0000000000..010b7818b2 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_omp.h @@ -0,0 +1,54 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/pppm/omp,PairLJCutCoulPPPMOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_PPPM_OMP_H + +#include "pair_lj_cut_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulPPPMOMP : public PairLJCutCoulLong, public ThrOMP { + + public: + PairLJCutCoulPPPMOMP(class LAMMPS *); + virtual ~PairLJCutCoulPPPMOMP() {}; + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + class PPPMProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp new file mode 100644 index 0000000000..add0e295e2 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.cpp @@ -0,0 +1,487 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_pppm_tip4p_omp.h" +#include "pppm_proxy.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulPPPMTIP4POMP::PairLJCutCoulPPPMTIP4POMP(LAMMPS *lmp) : + PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY) +{ + respa_enable = 0; + nproxy=1; + + kspace = NULL; + + // for caching m-shift corrected positions + maxmpos = 0; + h1idx = h2idx = NULL; + mpos = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulPPPMTIP4POMP::~PairLJCutCoulPPPMTIP4POMP() +{ + memory->destroy(h1idx); + memory->destroy(h2idx); + memory->destroy(mpos); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMTIP4POMP::init_style() +{ + if (comm->nthreads < 2) + error->all(FLERR,"need at least two threads per MPI task for this pair style"); + + if (strcmp(force->kspace_style,"pppm/tip4p/proxy") != 0) + error->all(FLERR,"kspace style pppm/tip4p/proxy is required with this pair style"); + + kspace = static_cast(force->kspace); + + PairLJCutCoulLongTIP4P::init_style(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMTIP4POMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + // reallocate per-atom arrays, if necessary + if (nall > maxmpos) { + maxmpos = nall; + memory->grow(mpos,maxmpos,3,"pair:mpos"); + memory->grow(h1idx,maxmpos,"pair:h1idx"); + memory->grow(h2idx,maxmpos,"pair:h2idx"); + } + + // cache corrected M positions in mpos[] + const double * const * const x = atom->x; + const int * const type = atom->type; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,h1idx[i],h2idx[i],mpos[i]); + } else { + mpos[i][0] = x[i][0]; + mpos[i][1] = x[i][1]; + mpos[i][2] = x[i][2]; + } + } + for (int i = nlocal; i < nall; i++) { + if (type[i] == typeO) { + find_M_permissive(i,h1idx[i],h2idx[i],mpos[i]); + } else { + mpos[i][0] = x[i][0]; + mpos[i][1] = x[i][1]; + mpos[i][2] = x[i][2]; + } + } + + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + // thread id 0 runs pppm, the rest the pair style + if (tid < nproxy) { + if (update->setupflag) kspace->setup_proxy(); + kspace->compute_proxy(eflag,vflag); + } else { + if (evflag) { + if (eflag) { + if (vflag) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (vflag) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + eval<0,0,0>(ifrom, ito, thr); + } + } + + sync_threads(); + reduce_thr(this, eflag, vflag, thr, nproxy); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulPPPMTIP4POMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + int n,vlist[6]; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double fraction,table; + double delxOM,delyOM,delzOM; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc,ddotf; + double v[6],xH1[3],xH2[3]; + double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz; + double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + x1 = mpos[i]; + iH1 = h1idx[i]; + iH2 = h2idx[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + + r2inv = 1.0/rsq; + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + + if (EFLAG) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, + evdwl,0.0,forcelj,delx,dely,delz,thr); + } + + // adjust rsq and delxyz for off-site O charge(s) + + if (itype == typeO || jtype == typeO) { + x2 = mpos[j]; + jH1 = h1idx[j]; + jH2 = h2idx[j]; + if (jtype == typeO && ( jH1 < 0 || jH2 < 0 )) + error->one(FLERR,"TIP4P hydrogen is missing"); + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // test current rsq against cutoff and compute Coulombic force + + if (rsq < cut_coulsq) { + r2inv = 1 / rsq; + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + cforce = forcecoul * r2inv; + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + if (VFLAG) { + v[0] = x[i][0] * delx * cforce; + v[1] = x[i][1] * dely * cforce; + v[2] = x[i][2] * delz * cforce; + v[3] = x[i][0] * dely * cforce; + v[4] = x[i][0] * delz * cforce; + v[5] = x[i][1] * delz * cforce; + vlist[n++] = i; + } + + } else { + + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; + + delxOM = x[i][0] - x1[0]; + delyOM = x[i][1] - x1[1]; + delzOM = x[i][2] - x1[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; + + f[iH1][0] += fHx; + f[iH1][1] += fHy; + f[iH1][2] += fHz; + + f[iH2][0] += fHx; + f[iH2][1] += fHy; + f[iH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[i],x[iH1],xH1); + domain->closest_image(x[i],x[iH2],xH2); + + v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (VFLAG) { + v[0] -= x[j][0] * delx * cforce; + v[1] -= x[j][1] * dely * cforce; + v[2] -= x[j][2] * delz * cforce; + v[3] -= x[j][0] * dely * cforce; + v[4] -= x[j][0] * delz * cforce; + v[5] -= x[j][1] * delz * cforce; + vlist[n++] = j; + } + + } else { + + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; + + delxOM = x[j][0] - x2[0]; + delyOM = x[j][1] - x2[1]; + delzOM = x[j][2] - x2[2]; + + ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) / + (qdist*qdist); + + f1x = alpha * (fdx - ddotf * delxOM); + f1y = alpha * (fdy - ddotf * delyOM); + f1z = alpha * (fdz - ddotf * delzOM); + + fOx = fdx - f1x; + fOy = fdy - f1y; + fOz = fdz - f1z; + + fHx = 0.5 * f1x; + fHy = 0.5 * f1y; + fHz = 0.5 * f1z; + + f[j][0] += fOx; + f[j][1] += fOy; + f[j][2] += fOz; + + f[jH1][0] += fHx; + f[jH1][1] += fHy; + f[jH1][2] += fHz; + + f[jH2][0] += fHx; + f[jH2][1] += fHy; + f[jH2][2] += fHz; + + if (VFLAG) { + domain->closest_image(x[j],x[jH1],xH1); + domain->closest_image(x[j],x[jH2],xH2); + + v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx; + v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy; + v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz; + v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy; + v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz; + v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz; + + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; + } + } + + if (EFLAG) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,thr); + } + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulPPPMTIP4POMP::find_M_permissive(int i, int &iH1, int &iH2, double *xM) +{ + // test that O is correctly bonded to 2 succesive H atoms + + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + + if (iH1 == -1 || iH2 == -1) + return; + else + find_M(i,iH1,iH2,xM); +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulPPPMTIP4POMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulLongTIP4P::memory_usage(); + bytes += 2 * maxmpos * sizeof(int); + bytes += 3 * maxmpos * sizeof(double); + bytes += maxmpos * sizeof(double *); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h new file mode 100644 index 0000000000..71da06eb9b --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_pppm_tip4p_omp.h @@ -0,0 +1,61 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/pppm/tip4p/omp,PairLJCutCoulPPPMTIP4POMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_PPPM_TIP4P_OMP_H + +#include "pair_lj_cut_coul_long_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulPPPMTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP { + + public: + PairLJCutCoulPPPMTIP4POMP(class LAMMPS *); + virtual ~PairLJCutCoulPPPMTIP4POMP(); + + virtual void init_style(); + + virtual void compute(int, int); + virtual double memory_usage(); + + protected: + // this is to cache m-shift corrected positions. + int maxmpos; // size of the following arrays + int *h1idx, *h2idx; // local index of hydrogen atoms + double **mpos; // coordinates corrected for m-shift. + void find_M_permissive(int, int &, int &, double *); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); + + class PPPMProxy *kspace; + int nproxy; +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp new file mode 100644 index 0000000000..0e120a8639 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.cpp @@ -0,0 +1,248 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sdk_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "lj_sdk_common.h" + +using namespace LAMMPS_NS; +using namespace LJSDKParms; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJSDKCoulLongOMP::PairLJSDKCoulLongOMP(LAMMPS *lmp) : + PairLJSDKCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSDKCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr); + else eval_thr<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr); + else eval_thr<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr); + else eval_thr<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJSDKCoulLongOMP::eval_thr(int iifrom, int iito, ThrData * const thr) +{ + int i,ii,j,jj,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,r2inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp=fytmp=fztmp=0.0; + + const int itype = type[i]; + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + const int ljt = lj_type[itype][jtype]; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else { + forcecoul = 0.0; + ecoul = 0.0; + } + + if (rsq < cut_ljsq[itype][jtype]) { + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv + - lj2[itype][jtype]); + + if (EFLAG) + evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + forcelj = r6inv*(lj1[itype][jtype]*r3inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r3inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv*(lj1[itype][jtype]*r6inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r6inv + - lj4[itype][jtype]) - offset[itype][jtype]; + } + } else { + forcelj=0.0; + evdwl = 0.0; + } + + fpair = (forcecoul + factor_lj*forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSDKCoulLongOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSDKCoulLong::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sdk_coul_long_omp.h b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h new file mode 100644 index 0000000000..01912535b1 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_coul_long_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sdk/coul/long/omp,PairLJSDKCoulLongOMP) +PairStyle(cg/cmm/coul/long/omp,PairLJSDKCoulLongOMP) + +#else + +#ifndef LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H +#define LMP_PAIR_LJ_SDK_COUL_LONG_OMP_H + +#include "pair_lj_sdk_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSDKCoulLongOMP : public PairLJSDKCoulLong, public ThrOMP { + + public: + PairLJSDKCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval_thr(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_sdk_omp.cpp b/src/USER-OMP/pair_lj_sdk_omp.cpp new file mode 100644 index 0000000000..bc5faafad5 --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_omp.cpp @@ -0,0 +1,184 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) + This style is a simplified re-implementation of the CG/CMM pair style +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sdk_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "lj_sdk_common.h" + +using namespace LAMMPS_NS; +using namespace LJSDKParms; + +/* ---------------------------------------------------------------------- */ + +PairLJSDKOMP::PairLJSDKOMP(LAMMPS *lmp) : + PairLJSDK(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSDKOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_thr<1,1,1>(ifrom, ito, thr); + else eval_thr<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_thr<1,0,1>(ifrom, ito, thr); + else eval_thr<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_thr<0,0,1>(ifrom, ito, thr); + else eval_thr<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJSDKOMP::eval_thr(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,forcelj,factor_lj; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + const int * const ilist = list->ilist; + const int * const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp=fytmp=fztmp=0.0; + + const int itype = type[i]; + const int * const jlist = firstneigh[i]; + const int jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + const int ljt = lj_type[itype][jtype]; + + if (ljt == LJ12_4) { + const double r4inv=r2inv*r2inv; + forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv + - lj2[itype][jtype]); + + if (EFLAG) + evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ9_6) { + const double r3inv = r2inv*sqrt(r2inv); + const double r6inv = r3inv*r3inv; + forcelj = r6inv*(lj1[itype][jtype]*r3inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r3inv + - lj4[itype][jtype]) - offset[itype][jtype]; + + } else if (ljt == LJ12_6) { + const double r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv*(lj1[itype][jtype]*r6inv + - lj2[itype][jtype]); + if (EFLAG) + evdwl = r6inv*(lj3[itype][jtype]*r6inv + - lj4[itype][jtype]) - offset[itype][jtype]; + } else continue; + + fpair = factor_lj*forcelj*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) evdwl *= factor_lj; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSDKOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSDK::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sdk_omp.h b/src/USER-OMP/pair_lj_sdk_omp.h new file mode 100644 index 0000000000..f2c9e5ff1f --- /dev/null +++ b/src/USER-OMP/pair_lj_sdk_omp.h @@ -0,0 +1,49 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sdk/omp,PairLJSDKOMP) +PairStyle(cg/cmm/omp,PairLJSDKOMP) + +#else + +#ifndef LMP_PAIR_LJ_SDK_OMP_H +#define LMP_PAIR_LJ_SDK_OMP_H + +#include "pair_lj_sdk.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSDKOMP : public PairLJSDK, public ThrOMP { + + public: + PairLJSDKOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval_thr(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_rebo_omp.cpp b/src/USER-OMP/pair_rebo_omp.cpp new file mode 100644 index 0000000000..70b5c4e8ae --- /dev/null +++ b/src/USER-OMP/pair_rebo_omp.cpp @@ -0,0 +1,33 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "pair_rebo_omp.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) {} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairREBOOMP::settings(int narg, char **arg) +{ + if (narg != 0) error->all(FLERR,"Illegal pair_style command"); + + cutlj = 0.0; + ljflag = torflag = 0; +} diff --git a/src/USER-OMP/pair_rebo_omp.h b/src/USER-OMP/pair_rebo_omp.h new file mode 100644 index 0000000000..4606e56ae1 --- /dev/null +++ b/src/USER-OMP/pair_rebo_omp.h @@ -0,0 +1,36 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(rebo/omp,PairREBOOMP) + +#else + +#ifndef LMP_PAIR_REBO_OMP_H +#define LMP_PAIR_REBO_OMP_H + +#include "pair_airebo_omp.h" + +namespace LAMMPS_NS { + +class PairREBOOMP : public PairAIREBOOMP { + public: + PairREBOOMP(class LAMMPS *); + virtual void settings(int, char **); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_table_omp.cpp b/src/USER-OMP/pair_tersoff_table_omp.cpp new file mode 100644 index 0000000000..6c9a680f09 --- /dev/null +++ b/src/USER-OMP/pair_tersoff_table_omp.cpp @@ -0,0 +1,496 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tersoff_table_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTersoffTableOMP::PairTersoffTableOMP(LAMMPS *lmp) : + PairTersoffTable(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffTableOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) + if (vflag_atom) eval<1,1>(ifrom, ito, thr); + else eval<1,0>(ifrom, ito, thr); + else eval<0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTersoffTableOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,inum,jnum; + int itype,jtype,ktype,ijparam,ikparam,ijkparam; + double xtmp,ytmp,ztmp; + double fxtmp,fytmp,fztmp; + int *ilist,*jlist,*numneigh,**firstneigh; + + int interpolIDX; + double r_ik_x, r_ik_y, r_ik_z; + double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z; + double invR_ij, invR_ik, cosTeta; + double repulsivePotential, attractivePotential; + double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1; + double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta; + double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK; + double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik; + double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot; + double factor_force_ij; + double gtetaFunctionDerived_temp,gtetaFunction_temp; + + double evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over full neighbor list of my atoms + for (ii = iifrom; ii < iito; ii++) { + + i = ilist[ii]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + fxtmp = fytmp = fztmp = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + // Pre-calculate gteta and cutoff function + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + + double dr_ij[3], r_ij; + + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + dr_ij[0] = xtmp - x[j][0]; + dr_ij[1] = ytmp - x[j][1]; + dr_ij[2] = ztmp - x[j][2]; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + + invR_ij = 1.0 / r_ij; + + directorCos_ij_x = invR_ij * dr_ij[0]; + directorCos_ij_y = invR_ij * dr_ij[1]; + directorCos_ij_z = invR_ij * dr_ij[2]; + + // preCutoffFunction + interpolDeltaX = r_ij - GRIDSTART; + interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF); + interpolIDX = (int) interpolTMP; + interpolY1 = cutoffFunction[itype][jtype][interpolIDX]; + interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1]; + preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + // preCutoffFunctionDerived + interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX]; + interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1]; + preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + + + for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z; + + // preGtetaFunction + interpolDeltaX=cosTeta+1.0; + interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA); + interpolIDX = (int) interpolTMP; + interpolY1 = gtetaFunction[itype][interpolIDX]; + interpolY2 = gtetaFunction[itype][interpolIDX+1]; + gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + // preGtetaFunctionDerived + interpolY1 = gtetaFunctionDerived[itype][interpolIDX]; + interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1]; + gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + + preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp; + preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp; + preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp; + preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp; + + } // loop on K + + } // loop on J + + + // loop over neighbours of atom i + for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { + + double dr_ij[3], r_ij, f_ij[3]; + + j = jlist[neighbor_j]; + j &= NEIGHMASK; + + dr_ij[0] = xtmp - x[j][0]; + dr_ij[1] = ytmp - x[j][1]; + dr_ij[2] = ztmp - x[j][2]; + r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; + + jtype = map[type[j]]; + ijparam = elem2param[itype][jtype][jtype]; + + if (r_ij > params[ijparam].cutsq) continue; + + r_ij = sqrt(r_ij); + invR_ij = 1.0 / r_ij; + + directorCos_ij_x = invR_ij * dr_ij[0]; + directorCos_ij_y = invR_ij * dr_ij[1]; + directorCos_ij_z = invR_ij * dr_ij[2]; + + exponentRepulsivePotential = params[ijparam].lam1 * r_ij; + exponentAttractivePotential = params[ijparam].lam2 * r_ij; + + // repulsiveExponential + interpolDeltaX = exponentRepulsivePotential - minArgumentExponential; + interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP); + interpolIDX = (int) interpolTMP; + interpolY1 = exponential[interpolIDX]; + interpolY2 = exponential[interpolIDX+1]; + repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + // attractiveExponential + interpolDeltaX = exponentAttractivePotential - minArgumentExponential; + interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP); + interpolIDX = (int) interpolTMP; + interpolY1 = exponential[interpolIDX]; + interpolY2 = exponential[interpolIDX+1]; + attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX); + + repulsivePotential = params[ijparam].biga * repulsiveExponential; + attractivePotential = -params[ijparam].bigb * attractiveExponential; + + cutoffFunctionIJ = preCutoffFunction[neighbor_j]; + cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j]; + + zeta = 0.0; + + // first loop over neighbours of atom i except j - part 1/2 + for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) { + double dr_ik[3], r_ik; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * r_ik_x; + directorCos_ik_y = invR_ik * r_ik_y; + directorCos_ik_z = invR_ik * r_ik_z; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + zeta += cutoffFunctionIK * gtetaFunctionIJK; + + } + + // first loop over neighbours of atom i except j - part 2/2 + for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + zeta += cutoffFunctionIK * gtetaFunctionIJK; + } + + // betaZetaPowerIJK + interpolDeltaX= params[ijparam].beta * zeta; + interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ); + interpolIDX = (int) interpolTMP; + interpolY1 = betaZetaPower[itype][interpolIDX]; + interpolY2 = betaZetaPower[itype][interpolIDX+1]; + betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX)); + // betaZetaPowerDerivedIJK + interpolY1 = betaZetaPowerDerived[itype][interpolIDX]; + interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1]; + betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX)); + + // Forces and virial + factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2); + + f_ij[0] = factor_force_ij * directorCos_ij_x; + f_ij[1] = factor_force_ij * directorCos_ij_y; + f_ij[2] = factor_force_ij * directorCos_ij_z; + + f[j][0] += f_ij[0]; + f[j][1] += f_ij[1]; + f[j][2] += f_ij[2]; + + fxtmp -= f_ij[0]; + fytmp -= f_ij[1]; + fztmp -= f_ij[2]; + + // potential energy + evdwl = cutoffFunctionIJ * repulsivePotential + + cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK; + + if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.5 * evdwl, 0.0, + -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr); + + factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK; + + // second loop over neighbours of atom i except j, forces and virial only - part 1/2 + for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + + directorCos_ij_z * directorCos_ik_z; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k]; + + factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot; + + f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x); + f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y); + f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z); + + factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot; + factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot; + + f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + + factor_2_force3_ik * directorCos_ik_x; + f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + + factor_2_force3_ik * directorCos_ik_y; + f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + + factor_2_force3_ik * directorCos_ik_z; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[k][0] -= f_ik[0]; + f[k][1] -= f_ik[1]; + f[k][2] -= f_ik[2]; + + fxtmp += f_ij[0] + f_ik[0]; + fytmp += f_ij[1] + f_ik[1]; + fztmp += f_ij[2] + f_ik[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr); + } + + // second loop over neighbours of atom i except j, forces and virial only - part 2/2 + for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) { + double dr_ik[3], r_ik, f_ik[3]; + + k = jlist[neighbor_k]; + k &= NEIGHMASK; + ktype = map[type[k]]; + ikparam = elem2param[itype][ktype][ktype]; + ijkparam = elem2param[itype][jtype][ktype]; + + dr_ik[0] = xtmp -x[k][0]; + dr_ik[1] = ytmp -x[k][1]; + dr_ik[2] = ztmp -x[k][2]; + r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; + + if (r_ik > params[ikparam].cutsq) continue; + + r_ik = sqrt(r_ik); + invR_ik = 1.0 / r_ik; + + directorCos_ik_x = invR_ik * dr_ik[0]; + directorCos_ik_y = invR_ik * dr_ik[1]; + directorCos_ik_z = invR_ik * dr_ik[2]; + + cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + + directorCos_ij_z * directorCos_ik_z; + + gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k]; + + gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k]; + + cutoffFunctionIK = preCutoffFunction[neighbor_k]; + + cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k]; + + factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot; + + f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x); + f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y); + f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z); + + factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot; + factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot; + + f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + + factor_2_force3_ik * directorCos_ik_x; + f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + + factor_2_force3_ik * directorCos_ik_y; + f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + + factor_2_force3_ik * directorCos_ik_z; + + f[j][0] -= f_ij[0]; + f[j][1] -= f_ij[1]; + f[j][2] -= f_ij[2]; + + f[k][0] -= f_ik[0]; + f[k][1] -= f_ik[1]; + f[k][2] -= f_ik[2]; + + fxtmp += f_ij[0] + f_ik[0]; + fytmp += f_ij[1] + f_ik[1]; + fztmp += f_ij[2] + f_ik[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr); + + } + } // loop on J + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } // loop on I +} + +/* ---------------------------------------------------------------------- */ + +double PairTersoffTableOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTersoffTable::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tersoff_table_omp.h b/src/USER-OMP/pair_tersoff_table_omp.h new file mode 100644 index 0000000000..ffde58215d --- /dev/null +++ b/src/USER-OMP/pair_tersoff_table_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/table/omp,PairTersoffTableOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_TABLE_OMP_H +#define LMP_PAIR_TERSOFF_TABLE_OMP_H + +#include "pair_tersoff_table.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffTableOMP : public PairTersoffTable, public ThrOMP { + + public: + PairTersoffTableOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tri_lj_omp.cpp b/src/USER-OMP/pair_tri_lj_omp.cpp new file mode 100644 index 0000000000..97f58a62d5 --- /dev/null +++ b/src/USER-OMP/pair_tri_lj_omp.cpp @@ -0,0 +1,409 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tri_lj_omp.h" +#include "math_extra.h" +#include "atom.h" +#include "atom_vec_tri.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTriLJOMP::PairTriLJOMP(LAMMPS *lmp) : + PairTriLJ(lmp), ThrOMP(lmp, THR_PAIR) +{ + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTriLJOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + const int * const tri = atom->tri; + const int * const type = atom->type; + AtomVecTri::Bonus * const bonus = avec->bonus; + + // grow discrete list if necessary and initialize + + if (nall > nmax) { + nmax = nall; + memory->destroy(dnum); + memory->destroy(dfirst); + memory->create(dnum,nall,"pair:dnum"); + memory->create(dfirst,nall,"pair:dfirst"); + } + memset(dnum,0,nall*sizeof(int)); + ndiscrete = 0; + + // need to discretize the system ahead of time + // until we find a good way to multi-thread it. + for (int i = 0; i < nall; ++i) { + double dc1[3],dc2[3],dc3[3],p[3][3]; + + if (tri[i] >= 0) { + if (dnum[i] == 0) { + MathExtra::quat_to_mat(bonus[tri[i]].quat,p); + MathExtra::matvec(p,bonus[tri[i]].c1,dc1); + MathExtra::matvec(p,bonus[tri[i]].c2,dc2); + MathExtra::matvec(p,bonus[tri[i]].c3,dc3); + dfirst[i] = ndiscrete; + discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3); + dnum[i] = ndiscrete - dfirst[i]; + } + } + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTriLJOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + int ni,nj,npi,npj,ifirst,jfirst; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj; + double dxi,dxj,dyi,dyj,dzi,dzj; + double xi[3],xj[3],fi[3],fj[3],ti[3],tj[3]; + int *ilist,*jlist,*numneigh,**firstneigh; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + double * const * const torque = thr->get_torque(); + const int * const tri = atom->tri; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq >= cutsq[itype][jtype]) continue; + + // tri/tri interactions = NxN particles + // c1,c2,c3 = corner pts of triangle I or J + + evdwl = 0.0; + if (tri[i] >= 0 && tri[j] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0; + ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + dzi = discrete[ifirst+ni].dz; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + dzj = discrete[jfirst+nj].dz; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xi[2] = x[i][2] + dzi; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + xj[2] = x[j][2] + dzj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + delz = xi[2] - xj[2]; + rsq = delx*delx + dely*dely + delz*delz; + + sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + fi[2] += delz*fpair; + ti[0] += fpair*(dyi*delz - dzi*dely); + ti[1] += fpair*(dzi*delx - dxi*delz); + ti[2] += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + fj[2] -= delz*fpair; + tj[0] -= fpair*(dyj*delz - dzj*dely); + tj[1] -= fpair*(dzj*delx - dxj*delz); + tj[2] -= fpair*(dxj*dely - dyj*delx); + } + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + torque[i][0] += ti[0]; + torque[i][1] += ti[1]; + torque[i][2] += ti[2]; + torque[j][0] += tj[0]; + torque[j][1] += tj[1]; + torque[j][2] += tj[2]; + + // tri/particle interaction = Nx1 particles + // c1,c2,c3 = corner pts of triangle I + + } else if (tri[i] >= 0) { + npi = dnum[i]; + ifirst = dfirst[i]; + + fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0; + ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0; + + for (ni = 0; ni < npi; ni++) { + dxi = discrete[ifirst+ni].dx; + dyi = discrete[ifirst+ni].dy; + dzi = discrete[ifirst+ni].dz; + + xi[0] = x[i][0] + dxi; + xi[1] = x[i][1] + dyi; + xi[2] = x[i][2] + dzi; + xj[0] = x[j][0]; + xj[1] = x[j][1]; + xj[2] = x[j][2]; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + delz = xi[2] - xj[2]; + rsq = delx*delx + dely*dely + delz*delz; + + sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + fi[2] += delz*fpair; + ti[0] += fpair*(dyi*delz - dzi*dely); + ti[1] += fpair*(dzi*delx - dxi*delz); + ti[2] += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + fj[2] -= delz*fpair; + tj[0] -= fpair*(dyj*delz - dzj*dely); + tj[1] -= fpair*(dzj*delx - dxj*delz); + tj[2] -= fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + torque[i][0] += ti[0]; + torque[i][1] += ti[1]; + torque[i][2] += ti[2]; + torque[j][0] += tj[0]; + torque[j][1] += tj[1]; + torque[j][2] += tj[2]; + + // particle/tri interaction = Nx1 particles + // c1,c2,c3 = corner pts of triangle J + + } else if (tri[j] >= 0) { + npj = dnum[j]; + jfirst = dfirst[j]; + + fi[0]=fi[1]=fi[2]=fj[0]=fj[1]=fj[2]=0.0; + ti[0]=ti[1]=ti[2]=tj[0]=tj[1]=tj[2]=0.0; + + for (nj = 0; nj < npj; nj++) { + dxj = discrete[jfirst+nj].dx; + dyj = discrete[jfirst+nj].dy; + dzj = discrete[jfirst+nj].dz; + + xi[0] = x[i][0]; + xi[1] = x[i][1]; + xi[2] = x[i][2]; + xj[0] = x[j][0] + dxj; + xj[1] = x[j][1] + dyj; + xj[2] = x[j][2] + dzj; + + delx = xi[0] - xj[0]; + dely = xi[1] - xj[1]; + delz = xi[2] - xj[2]; + rsq = delx*delx + dely*dely + delz*delz; + + sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma); + sig3 = sig*sig*sig; + term2 = 24.0*epsilon[itype][jtype] * sig3*sig3; + term1 = 2.0 * term2 * sig3*sig3; + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (term1*r6inv - term2); + fpair = forcelj*r2inv; + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + if (EFLAG) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0); + + fi[0] += delx*fpair; + fi[1] += dely*fpair; + fi[2] += delz*fpair; + ti[0] += fpair*(dyi*delz - dzi*dely); + ti[1] += fpair*(dzi*delx - dxi*delz); + ti[2] += fpair*(dxi*dely - dyi*delx); + + if (NEWTON_PAIR || j < nlocal) { + fj[0] -= delx*fpair; + fj[1] -= dely*fpair; + fj[2] -= delz*fpair; + tj[0] -= fpair*(dyj*delz - dzj*dely); + tj[1] -= fpair*(dzj*delx - dxj*delz); + tj[2] -= fpair*(dxj*dely - dyj*delx); + } + } + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + torque[i][0] += ti[0]; + torque[i][1] += ti[1]; + torque[i][2] += ti[2]; + torque[j][0] += tj[0]; + torque[j][1] += tj[1]; + torque[j][2] += tj[2]; + + // particle/particle interaction = 1x1 particles + + } else { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = forcelj*r2inv; + + if (EFLAG) + evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairTriLJOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTriLJ::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tri_lj_omp.h b/src/USER-OMP/pair_tri_lj_omp.h new file mode 100644 index 0000000000..d21c562a80 --- /dev/null +++ b/src/USER-OMP/pair_tri_lj_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tri/lj/omp,PairTriLJOMP) + +#else + +#ifndef LMP_PAIR_TRI_LJ_OMP_H +#define LMP_PAIR_TRI_LJ_OMP_H + +#include "pair_tri_lj.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTriLJOMP : public PairTriLJ, public ThrOMP { + + public: + PairTriLJOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp new file mode 100644 index 0000000000..50c0b5422b --- /dev/null +++ b/src/USER-OMP/pppm_omp.cpp @@ -0,0 +1,323 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" + +#include + +using namespace LAMMPS_NS; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMOMP::PPPMOMP(LAMMPS *lmp, int narg, char **arg) : + PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ +} + + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOMP::allocate() +{ + PPPM::allocate(); + + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + FFT_SCALAR **rho1d_thr; + memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr"); + ThrData *thr = fix->get_thr(tid); + thr->init_pppm(static_cast(rho1d_thr)); + } + + const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1; + + // reallocate density brick, so it fits our needs + memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out); + memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out, + nxlo_out,nxhi_out,"pppm:density_brick"); +} + +// NOTE: special version of reduce_data for FFT_SCALAR data type. +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// with writing to the array . +static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} + +/* ---------------------------------------------------------------------- + free memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMOMP::deallocate() +{ + PPPM::deallocate(); + for (int i=0; i < comm->nthreads; ++i) { + ThrData * thr = fix->get_thr(i); + FFT_SCALAR ** rho1d_thr = static_cast(thr->get_rho1d()); + memory->destroy2d_offset(rho1d_thr,-order/2); + } +} + +void PPPMOMP::compute(int eflag, int vflag) +{ + + PPPM::compute(eflag,vflag); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMOMP::make_rho() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) + { + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = atom->nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int tid = 0; + const int ifrom = 0; + const int ito = atom->nlocal; +#endif + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + + // set up clear 3d density array + const int nzoffs = (nzhi_out-nzlo_out+1)*tid; + FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); + memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); + + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + db[mz][my][mx] += x0*r1d[0][l]; + } + } + } + } +#if defined(_OPENMP) + // reduce 3d density array + if (nthreads > 1) { + sync_threads(); + data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); + } + } +#endif +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMOMP::fieldforce() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) + { + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = atom->nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = atom->nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + const double qfactor = qqrd2e*scale*q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + } +#if defined(_OPENMP) + } +#endif +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ +void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-order)/2; k <= order/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} diff --git a/src/USER-OMP/pppm_omp.h b/src/USER-OMP/pppm_omp.h new file mode 100644 index 0000000000..0701cb3396 --- /dev/null +++ b/src/USER-OMP/pppm_omp.h @@ -0,0 +1,49 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/omp,PPPMOMP) + +#else + +#ifndef LMP_PPPM_OMP_H +#define LMP_PPPM_OMP_H + +#include "pppm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMOMP : public PPPM, public ThrOMP { + public: + PPPMOMP(class LAMMPS *, int, char **); + virtual ~PPPMOMP () {}; + + protected: + virtual void allocate(); + virtual void deallocate(); + virtual void compute(int, int); + virtual void fieldforce(); + virtual void make_rho(); + void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &); +// void compute_rho_coeff(); +// void slabcorr(int); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_proxy.cpp b/src/USER-OMP/pppm_proxy.cpp new file mode 100644 index 0000000000..1b8a9c5453 --- /dev/null +++ b/src/USER-OMP/pppm_proxy.cpp @@ -0,0 +1,61 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_proxy.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PPPMProxy::PPPMProxy(LAMMPS *lmp, int narg, char **arg) : + PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) {} + +/* ---------------------------------------------------------------------- */ + +void PPPMProxy::setup_proxy() +{ + PPPM::setup(); +} + +/* ---------------------------------------------------------------------- */ + +void PPPMProxy::compute(int eflag, int vflag) +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag,vflag,thr); + } +} + +/* ---------------------------------------------------------------------- */ + +void PPPMProxy::compute_proxy(int eflag, int vflag) +{ + PPPM::compute(eflag,vflag); +} + diff --git a/src/USER-OMP/pppm_proxy.h b/src/USER-OMP/pppm_proxy.h new file mode 100644 index 0000000000..e7387569d8 --- /dev/null +++ b/src/USER-OMP/pppm_proxy.h @@ -0,0 +1,44 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/proxy,PPPMProxy) + +#else + +#ifndef LMP_PPPM_PROXY_H +#define LMP_PPPM_PROXY_H + +#include "pppm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMProxy : public PPPM, public ThrOMP { + public: + PPPMProxy(class LAMMPS *, int, char **); + virtual ~PPPMProxy () {}; + + virtual void compute(int, int); + virtual void compute_proxy(int eflag, int vflag); + + // nothing to do in setup + virtual void setup() {}; + virtual void setup_proxy(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_tip4p_proxy.cpp b/src/USER-OMP/pppm_tip4p_proxy.cpp new file mode 100644 index 0000000000..fa8d382c27 --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_proxy.cpp @@ -0,0 +1,61 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_tip4p_proxy.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PPPMTIP4PProxy::PPPMTIP4PProxy(LAMMPS *lmp, int narg, char **arg) : + PPPMTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE|THR_PROXY) {} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4PProxy::setup_proxy() +{ + PPPMTIP4P::setup(); +} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4PProxy::compute(int eflag, int vflag) +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag,vflag,thr); + } +} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4PProxy::compute_proxy(int eflag, int vflag) +{ + PPPMTIP4P::compute(eflag,vflag); +} + diff --git a/src/USER-OMP/pppm_tip4p_proxy.h b/src/USER-OMP/pppm_tip4p_proxy.h new file mode 100644 index 0000000000..b69cab2ad6 --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_proxy.h @@ -0,0 +1,44 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/tip4p/proxy,PPPMTIP4PProxy) + +#else + +#ifndef LMP_PPPM_TIP4P_PROXY_H +#define LMP_PPPM_TIP4P_PROXY_H + +#include "pppm_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMTIP4PProxy : public PPPMTIP4P, public ThrOMP { + public: + PPPMTIP4PProxy(class LAMMPS *, int, char **); + virtual ~PPPMTIP4PProxy () {}; + + virtual void compute(int, int); + virtual void compute_proxy(int eflag, int vflag); + + // nothing to do in setup + virtual void setup() {}; + virtual void setup_proxy(); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp new file mode 100644 index 0000000000..d6830e64bf --- /dev/null +++ b/src/USER-OMP/thr_data.cpp @@ -0,0 +1,219 @@ +/* ------------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + per-thread data management for LAMMPS + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "thr_data.h" + +#include +#include + +using namespace LAMMPS_NS; + + +ThrData::ThrData(int tid) + : _f(NULL), _torque(NULL), _erforce(NULL), _de(NULL), _drho(NULL), _mu(NULL), + _lambda(NULL), _rhoB(NULL), _D_values(NULL), _rho(NULL), _fp(NULL), + _rho1d(NULL), _tid(tid) +{ + // nothing else to do here. +} + + +/* ---------------------------------------------------------------------- */ + +void ThrData::check_tid(int tid) +{ + if (tid != _tid) + fprintf(stderr,"WARNING: external and internal tid mismatch %d != %d\n",tid,_tid); +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_force(int nall, double **f, double **torque, + double *erforce, double *de, double *drho) +{ + eng_vdwl=eng_coul=eng_bond=eng_angle=eng_dihed=eng_imprp=eng_kspce=0.0; + memset(virial_pair,0,6*sizeof(double)); + memset(virial_bond,0,6*sizeof(double)); + memset(virial_angle,0,6*sizeof(double)); + memset(virial_dihed,0,6*sizeof(double)); + memset(virial_imprp,0,6*sizeof(double)); + memset(virial_kspce,0,6*sizeof(double)); + + eatom_pair=eatom_bond=eatom_angle=eatom_dihed=eatom_imprp=eatom_kspce=NULL; + vatom_pair=vatom_bond=vatom_angle=vatom_dihed=vatom_imprp=vatom_kspce=NULL; + + _f = f + _tid*nall; + memset(&(_f[0][0]),0,nall*3*sizeof(double)); + + if (torque) { + _torque = torque + _tid*nall; + memset(&(_torque[0][0]),0,nall*3*sizeof(double)); + } else _torque = NULL; + + if (erforce) { + _erforce = erforce + _tid*nall; + memset(&(_erforce[0]),0,nall*sizeof(double)); + } else _erforce = NULL; + + if (de) { + _de = de + _tid*nall; + memset(&(_de[0]),0,nall*sizeof(double)); + } else _de = NULL; + + if (drho) { + _drho = drho + _tid*nall; + memset(&(_drho[0]),0,nall*sizeof(double)); + } else _drho = NULL; +} + +/* ---------------------------------------------------------------------- + set up and clear out locally managed per atom arrays +------------------------------------------------------------------------- */ + +void ThrData::init_eam(int nall, double *rho) +{ + _rho = rho + _tid*nall; + memset(_rho, 0, nall*sizeof(double)); +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_adp(int nall, double *rho, double **mu, double **lambda) +{ + init_eam(nall, rho); + + _mu = mu + _tid*nall; + _lambda = lambda + _tid*nall; + memset(&(_mu[0][0]), 0, nall*3*sizeof(double)); + memset(&(_lambda[0][0]), 0, nall*6*sizeof(double)); +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_cdeam(int nall, double *rho, double *rhoB, double *D_values) +{ + init_eam(nall, rho); + + _rhoB = rhoB + _tid*nall; + _D_values = D_values + _tid*nall; + memset(_rhoB, 0, nall*sizeof(double)); + memset(_D_values, 0, nall*sizeof(double)); +} + +/* ---------------------------------------------------------------------- */ + +void ThrData::init_eim(int nall, double *rho, double *fp) +{ + init_eam(nall, rho); + + _fp = fp + _tid*nall; + memset(_fp,0,nall*sizeof(double)); +} + +/* ---------------------------------------------------------------------- + compute global pair virial via summing F dot r over own & ghost atoms + at this point, only pairwise forces have been accumulated in atom->f +------------------------------------------------------------------------- */ + +void ThrData::virial_fdotr_compute(double **x, int nlocal, int nghost, int nfirst) +{ + + // sum over force on all particles including ghosts + + if (nfirst < 0) { + int nall = nlocal + nghost; + for (int i = 0; i < nall; i++) { + virial_pair[0] += _f[i][0]*x[i][0]; + virial_pair[1] += _f[i][1]*x[i][1]; + virial_pair[2] += _f[i][2]*x[i][2]; + virial_pair[3] += _f[i][1]*x[i][0]; + virial_pair[4] += _f[i][2]*x[i][0]; + virial_pair[5] += _f[i][2]*x[i][1]; + } + + // neighbor includegroup flag is set + // sum over force on initial nfirst particles and ghosts + + } else { + int nall = nfirst; + for (int i = 0; i < nall; i++) { + virial_pair[0] += _f[i][0]*x[i][0]; + virial_pair[1] += _f[i][1]*x[i][1]; + virial_pair[2] += _f[i][2]*x[i][2]; + virial_pair[3] += _f[i][1]*x[i][0]; + virial_pair[4] += _f[i][2]*x[i][0]; + virial_pair[5] += _f[i][2]*x[i][1]; + } + + nall = nlocal + nghost; + for (int i = nlocal; i < nall; i++) { + virial_pair[0] += _f[i][0]*x[i][0]; + virial_pair[1] += _f[i][1]*x[i][1]; + virial_pair[2] += _f[i][2]*x[i][2]; + virial_pair[3] += _f[i][1]*x[i][0]; + virial_pair[4] += _f[i][2]*x[i][0]; + virial_pair[5] += _f[i][2]*x[i][1]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +double ThrData::memory_usage() +{ + double bytes = (7 + 6*6) * sizeof(double); + bytes += 2 * sizeof(double*); + bytes += 4 * sizeof(int); + + return bytes; +} + +/* additional helper functions */ + +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// with writing to the array . +void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} + diff --git a/src/USER-OMP/thr_data.h b/src/USER-OMP/thr_data.h new file mode 100644 index 0000000000..400d5716c5 --- /dev/null +++ b/src/USER-OMP/thr_data.h @@ -0,0 +1,132 @@ +/* -*- c++ -*- ------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifndef LMP_THR_DATA_H +#define LMP_THR_DATA_H + +#if defined(_OPENMP) +#include +#endif + +namespace LAMMPS_NS { + +// per thread data accumulators +// there should be one instance +// of this class for each thread. +class ThrData { + friend class FixOMP; + friend class ThrOMP; + + public: + ThrData(int tid); + ~ThrData() {}; + + void check_tid(int); // thread id consistency check + int get_tid() const { return _tid; }; // our thread id. + + // erase accumulator contents and hook up force arrays + void init_force(int, double **, double **, double *, double *, double *); + + // give access to per-thread offset arrays + double **get_f() const { return _f; }; + double **get_torque() const { return _torque; }; + double *get_de() const { return _de; }; + double *get_drho() const { return _drho; }; + + // setup and erase per atom arrays + void init_adp(int, double *, double **, double **); // ADP (+ EAM) + void init_cdeam(int, double *, double *, double *); // CDEAM (+ EAM) + void init_eam(int, double *); // EAM + void init_eim(int, double *, double *); // EIM (+ EAM) + + void init_pppm(void *r1d) { _rho1d = r1d; }; + + // access methods for arrays that we handle in this class + double **get_lambda() const { return _lambda; }; + double **get_mu() const { return _mu; }; + double *get_D_values() const { return _D_values; }; + double *get_fp() const { return _fp; }; + double *get_rho() const { return _rho; }; + double *get_rhoB() const { return _rhoB; }; + void *get_rho1d() const { return _rho1d; }; + + private: + double eng_vdwl; // non-bonded non-coulomb energy + double eng_coul; // non-bonded coulomb energy + double virial_pair[6]; // virial contribution from non-bonded + double *eatom_pair; + double **vatom_pair; + double eng_bond; // bond energy + double virial_bond[6]; // virial contribution from bonds + double *eatom_bond; + double **vatom_bond; + double eng_angle; // angle energy + double virial_angle[6]; // virial contribution from angles + double *eatom_angle; + double **vatom_angle; + double eng_dihed; // dihedral energy + double virial_dihed[6]; // virial contribution from dihedrals + double *eatom_dihed; + double **vatom_dihed; + double eng_imprp; // improper energy + double virial_imprp[6]; // virial contribution from impropers + double *eatom_imprp; + double **vatom_imprp; + double eng_kspce; // kspace energy + double virial_kspce[6]; // virial contribution from kspace + double *eatom_kspce; + double **vatom_kspce; + + // per thread segments of various force or similar arrays + + // these are maintained by atom styles + double **_f; + double **_torque; + double *_erforce; + double *_de; + double *_drho; + + // these are maintained by individual pair styles + double **_mu, **_lambda; // ADP (+ EAM) + double *_rhoB, *_D_values; // CDEAM (+ EAM) + double *_rho; // EAM + double *_fp; // EIM (+ EAM) + + // this is for pppm/omp + void *_rho1d; + + // my thread id + const int _tid; + + public: + // compute global per thread virial contribution from per-thread force + void virial_fdotr_compute(double **, int, int, int); + + double memory_usage(); + + // disabled default methods + private: + ThrData() : _tid(-1) {}; +}; + +//////////////////////////////////////////////////////////////////////// +// helper functions operating on data replicated for thread support // +//////////////////////////////////////////////////////////////////////// +// generic per thread data reduction for continous arrays of nthreads*nmax size +void data_reduce_thr(double *, int, int, int, int); +} +#endif