mirror of https://github.com/lammps/lammps.git
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7883 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
8705054819
commit
b2ccacbdc0
|
@ -0,0 +1,173 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "pair_beck_omp.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include "suffix.h"
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairBeckOMP::PairBeckOMP(LAMMPS *lmp) :
|
||||
PairBeck(lmp), ThrOMP(lmp, THR_PAIR)
|
||||
{
|
||||
suffix_flag |= Suffix::OMP;
|
||||
respa_enable = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairBeckOMP::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) {
|
||||
ev_setup(eflag,vflag);
|
||||
} else evflag = vflag_fdotr = 0;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
const int nthreads = comm->nthreads;
|
||||
const int inum = list->inum;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(eflag,vflag)
|
||||
#endif
|
||||
{
|
||||
int ifrom, ito, tid;
|
||||
|
||||
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
|
||||
ThrData *thr = fix->get_thr(tid);
|
||||
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
|
||||
else eval<1,1,0>(ifrom, ito, thr);
|
||||
} else {
|
||||
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
|
||||
else eval<1,0,0>(ifrom, ito, thr);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
|
||||
else eval<0,0,0>(ifrom, ito, thr);
|
||||
}
|
||||
|
||||
reduce_thr(this, eflag, vflag, thr);
|
||||
} // end of omp parallel region
|
||||
}
|
||||
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
|
||||
void PairBeckOMP::eval(int iifrom, int iito, ThrData * const thr)
|
||||
{
|
||||
int i,j,ii,jj,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r5,force_beck,factor_lj;
|
||||
double r,rinv;
|
||||
double aaij,alphaij,betaij;
|
||||
double term1,term1inv,term2,term3,term4,term5,term6;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
evdwl = 0.0;
|
||||
|
||||
const double * const * const x = atom->x;
|
||||
double * const * const f = thr->get_f();
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double *special_lj = force->special_lj;
|
||||
double fxtmp,fytmp,fztmp;
|
||||
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = iifrom; ii < iito; ++ii) {
|
||||
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
fxtmp=fytmp=fztmp=0.0;
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r = sqrt(rsq);
|
||||
r5 = rsq*rsq*r;
|
||||
aaij = aa[itype][jtype];
|
||||
alphaij = alpha[itype][jtype];
|
||||
betaij = beta[itype][jtype];
|
||||
term1 = aaij*aaij + rsq;
|
||||
term2 = 1.0/pow(term1,5);
|
||||
term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq;
|
||||
term4 = alphaij + r5*betaij;
|
||||
term5 = alphaij + 6.0*r5*betaij;
|
||||
rinv = 1.0/r;
|
||||
force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5;
|
||||
force_beck -= BB[itype][jtype]*r*term2*term3;
|
||||
|
||||
fpair = factor_lj*force_beck*rinv;
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
if (NEWTON_PAIR || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
|
||||
if (EFLAG) {
|
||||
term6 = 1.0/pow(term1,3);
|
||||
term1inv = 1.0/term1;
|
||||
evdwl = AA[itype][jtype]*exp(-1.0*r*term4);
|
||||
evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv);
|
||||
}
|
||||
|
||||
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
|
||||
evdwl,0.0,fpair,delx,dely,delz,thr);
|
||||
}
|
||||
}
|
||||
f[i][0] += fxtmp;
|
||||
f[i][1] += fytmp;
|
||||
f[i][2] += fztmp;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairBeckOMP::memory_usage()
|
||||
{
|
||||
double bytes = memory_usage_thr();
|
||||
bytes += PairBeck::memory_usage();
|
||||
|
||||
return bytes;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(beck/omp,PairBeckOMP)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_BECK_OMP_H
|
||||
#define LMP_PAIR_BECK_OMP_H
|
||||
|
||||
#include "pair_beck.h"
|
||||
#include "thr_omp.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairBeckOMP : public PairBeck, public ThrOMP {
|
||||
|
||||
public:
|
||||
PairBeckOMP(class LAMMPS *);
|
||||
|
||||
virtual void compute(int, int);
|
||||
virtual double memory_usage();
|
||||
|
||||
private:
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
|
||||
void eval(int ifrom, int ito, ThrData * const thr);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,324 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "string.h"
|
||||
|
||||
#include "pair_meam_spline_omp.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include "suffix.h"
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairMEAMSplineOMP::PairMEAMSplineOMP(LAMMPS *lmp) :
|
||||
PairMEAMSpline(lmp), ThrOMP(lmp, THR_PAIR)
|
||||
{
|
||||
suffix_flag |= Suffix::OMP;
|
||||
respa_enable = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairMEAMSplineOMP::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) {
|
||||
ev_setup(eflag,vflag);
|
||||
} else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
const int nthreads = comm->nthreads;
|
||||
const int inum = listfull->inum;
|
||||
|
||||
if (listhalf->inum != inum)
|
||||
error->warning(FLERR,"inconsistent half and full neighborlist");
|
||||
|
||||
// Grow per-atom array if necessary.
|
||||
|
||||
if (atom->nmax > nmax) {
|
||||
memory->destroy(Uprime_values);
|
||||
nmax = atom->nmax;
|
||||
memory->create(Uprime_values,nmax*nthreads,"pair:Uprime");
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(eflag,vflag)
|
||||
#endif
|
||||
{
|
||||
int ifrom, ito, tid;
|
||||
|
||||
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
|
||||
ThrData *thr = fix->get_thr(tid);
|
||||
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
|
||||
|
||||
thr->init_eam(nall,Uprime_values);
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
eval<1,1>(ifrom, ito, thr);
|
||||
} else {
|
||||
eval<1,0>(ifrom, ito, thr);
|
||||
}
|
||||
} else {
|
||||
eval<0,0>(ifrom, ito, thr);
|
||||
}
|
||||
|
||||
reduce_thr(this, eflag, vflag, thr);
|
||||
} // end of omp parallel region
|
||||
}
|
||||
|
||||
template <int EVFLAG, int EFLAG>
|
||||
void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
|
||||
{
|
||||
const int* const ilist_full = listfull->ilist;
|
||||
const int* const numneigh_full = listfull->numneigh;
|
||||
const int* const * const firstneigh_full = listfull->firstneigh;
|
||||
|
||||
// Determine the maximum number of neighbors a single atom has.
|
||||
int myMaxNeighbors = 0;
|
||||
for(int ii = iifrom; ii < iito; ii++) {
|
||||
int jnum = numneigh_full[ilist_full[ii]];
|
||||
if(jnum > myMaxNeighbors) myMaxNeighbors = jnum;
|
||||
}
|
||||
|
||||
// Allocate array for temporary bond info.
|
||||
MEAM2Body *myTwoBodyInfo = new MEAM2Body[myMaxNeighbors];
|
||||
|
||||
const double * const * const x = atom->x;
|
||||
double * const * const forces = thr->get_f();
|
||||
double * const Uprime_thr = thr->get_rho();
|
||||
const int tid = thr->get_tid();
|
||||
const int nthreads = comm->nthreads;
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
const double cutforcesq = cutoff*cutoff;
|
||||
|
||||
// Sum three-body contributions to charge density and compute embedding energies.
|
||||
for(int ii = iifrom; ii < iito; ii++) {
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
const double xtmp = x[i][0];
|
||||
const double ytmp = x[i][1];
|
||||
const double ztmp = x[i][2];
|
||||
const int* const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[i];
|
||||
double rho_value = 0;
|
||||
int numBonds = 0;
|
||||
MEAM2Body* nextTwoBodyInfo = myTwoBodyInfo;
|
||||
|
||||
for(int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj] & NEIGHMASK;
|
||||
|
||||
const double jdelx = x[j][0] - xtmp;
|
||||
const double jdely = x[j][1] - ytmp;
|
||||
const double jdelz = x[j][2] - ztmp;
|
||||
const double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz;
|
||||
|
||||
if (rij_sq < cutforcesq) {
|
||||
const double rij = sqrt(rij_sq);
|
||||
double partial_sum = 0;
|
||||
|
||||
nextTwoBodyInfo->tag = j;
|
||||
nextTwoBodyInfo->r = rij;
|
||||
nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime);
|
||||
nextTwoBodyInfo->del[0] = jdelx / rij;
|
||||
nextTwoBodyInfo->del[1] = jdely / rij;
|
||||
nextTwoBodyInfo->del[2] = jdelz / rij;
|
||||
|
||||
for(int kk = 0; kk < numBonds; kk++) {
|
||||
const MEAM2Body& bondk = myTwoBodyInfo[kk];
|
||||
double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + nextTwoBodyInfo->del[1]*bondk.del[1] + nextTwoBodyInfo->del[2]*bondk.del[2]);
|
||||
partial_sum += bondk.f * g.eval(cos_theta);
|
||||
}
|
||||
|
||||
rho_value += nextTwoBodyInfo->f * partial_sum;
|
||||
rho_value += rho.eval(rij);
|
||||
|
||||
numBonds++;
|
||||
nextTwoBodyInfo++;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute embedding energy and its derivative.
|
||||
double Uprime_i;
|
||||
double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy;
|
||||
Uprime_thr[i] = Uprime_i;
|
||||
if (EFLAG) {
|
||||
if (eflag_global) eng_vdwl += embeddingEnergy;
|
||||
if (eflag_atom) eatom[i] += embeddingEnergy;
|
||||
}
|
||||
|
||||
double forces_i[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
// Compute three-body contributions to force.
|
||||
for(int jj = 0; jj < numBonds; jj++) {
|
||||
const MEAM2Body bondj = myTwoBodyInfo[jj];
|
||||
const double rij = bondj.r;
|
||||
const int j = bondj.tag;
|
||||
|
||||
const double f_rij_prime = bondj.fprime;
|
||||
const double f_rij = bondj.f;
|
||||
|
||||
double forces_j[3] = {0.0, 0.0, 0.0};
|
||||
|
||||
MEAM2Body const* bondk = myTwoBodyInfo;
|
||||
for(int kk = 0; kk < jj; kk++, ++bondk) {
|
||||
const double rik = bondk->r;
|
||||
|
||||
const double cos_theta = (bondj.del[0]*bondk->del[0] + bondj.del[1]*bondk->del[1] + bondj.del[2]*bondk->del[2]);
|
||||
double g_prime;
|
||||
double g_value = g.eval(cos_theta, g_prime);
|
||||
const double f_rik_prime = bondk->fprime;
|
||||
const double f_rik = bondk->f;
|
||||
|
||||
double fij = -Uprime_i * g_value * f_rik * f_rij_prime;
|
||||
double fik = -Uprime_i * g_value * f_rij * f_rik_prime;
|
||||
|
||||
const double prefactor = Uprime_i * f_rij * f_rik * g_prime;
|
||||
const double prefactor_ij = prefactor / rij;
|
||||
const double prefactor_ik = prefactor / rik;
|
||||
fij += prefactor_ij * cos_theta;
|
||||
fik += prefactor_ik * cos_theta;
|
||||
|
||||
double fj[3], fk[3];
|
||||
|
||||
fj[0] = bondj.del[0] * fij - bondk->del[0] * prefactor_ij;
|
||||
fj[1] = bondj.del[1] * fij - bondk->del[1] * prefactor_ij;
|
||||
fj[2] = bondj.del[2] * fij - bondk->del[2] * prefactor_ij;
|
||||
forces_j[0] += fj[0];
|
||||
forces_j[1] += fj[1];
|
||||
forces_j[2] += fj[2];
|
||||
|
||||
fk[0] = bondk->del[0] * fik - bondj.del[0] * prefactor_ik;
|
||||
fk[1] = bondk->del[1] * fik - bondj.del[1] * prefactor_ik;
|
||||
fk[2] = bondk->del[2] * fik - bondj.del[2] * prefactor_ik;
|
||||
forces_i[0] -= fk[0];
|
||||
forces_i[1] -= fk[1];
|
||||
forces_i[2] -= fk[2];
|
||||
|
||||
const int k = bondk->tag;
|
||||
forces[k][0] += fk[0];
|
||||
forces[k][1] += fk[1];
|
||||
forces[k][2] += fk[2];
|
||||
|
||||
if(EVFLAG) {
|
||||
double delta_ij[3];
|
||||
double delta_ik[3];
|
||||
delta_ij[0] = bondj.del[0] * rij;
|
||||
delta_ij[1] = bondj.del[1] * rij;
|
||||
delta_ij[2] = bondj.del[2] * rij;
|
||||
delta_ik[0] = bondk->del[0] * rik;
|
||||
delta_ik[1] = bondk->del[1] * rik;
|
||||
delta_ik[2] = bondk->del[2] * rik;
|
||||
ev_tally3_thr(this,i,j,k,0.0,0.0,fj,fk,delta_ij,delta_ik,thr);
|
||||
}
|
||||
}
|
||||
|
||||
forces[i][0] -= forces_j[0];
|
||||
forces[i][1] -= forces_j[1];
|
||||
forces[i][2] -= forces_j[2];
|
||||
forces[j][0] += forces_j[0];
|
||||
forces[j][1] += forces_j[1];
|
||||
forces[j][2] += forces_j[2];
|
||||
}
|
||||
|
||||
forces[i][0] += forces_i[0];
|
||||
forces[i][1] += forces_i[1];
|
||||
forces[i][2] += forces_i[2];
|
||||
}
|
||||
|
||||
delete[] myTwoBodyInfo;
|
||||
|
||||
sync_threads();
|
||||
|
||||
// reduce per thread density
|
||||
data_reduce_thr(Uprime_values, nall, nthreads, 1, tid);
|
||||
|
||||
// wait until reduction is complete so that master thread
|
||||
// can communicate U'(rho) values.
|
||||
sync_threads();
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp master
|
||||
#endif
|
||||
{ comm->forward_comm_pair(this); }
|
||||
|
||||
// wait until master thread is done with communication
|
||||
sync_threads();
|
||||
|
||||
const int* const ilist_half = listhalf->ilist;
|
||||
const int* const numneigh_half = listhalf->numneigh;
|
||||
const int* const * const firstneigh_half = listhalf->firstneigh;
|
||||
|
||||
// Compute two-body pair interactions.
|
||||
for(int ii = iifrom; ii < iito; ii++) {
|
||||
const int i = ilist_half[ii];
|
||||
const double xtmp = x[i][0];
|
||||
const double ytmp = x[i][1];
|
||||
const double ztmp = x[i][2];
|
||||
const int* const jlist = firstneigh_half[i];
|
||||
const int jnum = numneigh_half[i];
|
||||
|
||||
for(int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj] & NEIGHMASK;
|
||||
|
||||
double jdel[3];
|
||||
jdel[0] = x[j][0] - xtmp;
|
||||
jdel[1] = x[j][1] - ytmp;
|
||||
jdel[2] = x[j][2] - ztmp;
|
||||
double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2];
|
||||
|
||||
if(rij_sq < cutforcesq) {
|
||||
double rij = sqrt(rij_sq);
|
||||
|
||||
double rho_prime;
|
||||
rho.eval(rij, rho_prime);
|
||||
double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]);
|
||||
|
||||
double pair_pot_deriv;
|
||||
double pair_pot = phi.eval(rij, pair_pot_deriv);
|
||||
fpair += pair_pot_deriv;
|
||||
|
||||
// Divide by r_ij to get forces from gradient.
|
||||
fpair /= rij;
|
||||
|
||||
forces[i][0] += jdel[0]*fpair;
|
||||
forces[i][1] += jdel[1]*fpair;
|
||||
forces[i][2] += jdel[2]*fpair;
|
||||
forces[j][0] -= jdel[0]*fpair;
|
||||
forces[j][1] -= jdel[1]*fpair;
|
||||
forces[j][2] -= jdel[2]*fpair;
|
||||
if (EVFLAG) ev_tally_thr(this,i,j,nlocal, 1 /* newton_pair */,
|
||||
pair_pot,0.0,-fpair,jdel[0],jdel[1],jdel[2],thr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairMEAMSplineOMP::memory_usage()
|
||||
{
|
||||
double bytes = memory_usage_thr();
|
||||
bytes += PairMEAMSpline::memory_usage();
|
||||
|
||||
return bytes;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(meam/spline/omp,PairMEAMSplineOMP)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_MEAM_SPLINE_OMP_H
|
||||
#define LMP_PAIR_MEAM_SPLINE_OMP_H
|
||||
|
||||
#include "pair_meam_spline.h"
|
||||
#include "thr_omp.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairMEAMSplineOMP : public PairMEAMSpline, public ThrOMP {
|
||||
|
||||
public:
|
||||
PairMEAMSplineOMP(class LAMMPS *);
|
||||
|
||||
virtual void compute(int, int);
|
||||
virtual double memory_usage();
|
||||
|
||||
private:
|
||||
template <int EVFLAG, int EFLAG>
|
||||
void eval(int iifrom, int iito, ThrData * const thr);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,398 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pppm_tip4p_omp.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "force.h"
|
||||
#include "memory.h"
|
||||
#include "math_const.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define OFFSET 16384
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
#define ZEROF 0.0f
|
||||
#define ONEF 1.0f
|
||||
#else
|
||||
#define ZEROF 0.0
|
||||
#define ONEF 1.0
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) :
|
||||
PPPMOMP(lmp, narg, arg) {}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
// NOTE: special version of reduce_data for FFT_SCALAR data type.
|
||||
// reduce per thread data into the first part of the data
|
||||
// array that is used for the non-threaded parts and reset
|
||||
// the temporary storage to 0.0. this routine depends on
|
||||
// multi-dimensional arrays like force stored in this order
|
||||
// x1,y1,z1,x2,y2,z2,...
|
||||
// we need to post a barrier to wait until all threads are done
|
||||
// writing to the array.
|
||||
static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
// NOOP in non-threaded execution.
|
||||
if (nthreads == 1) return;
|
||||
#pragma omp barrier
|
||||
{
|
||||
const int nvals = ndim*nall;
|
||||
const int idelta = nvals/nthreads + 1;
|
||||
const int ifrom = tid*idelta;
|
||||
const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
|
||||
|
||||
// this if protects against having more threads than atoms
|
||||
if (ifrom < nall) {
|
||||
for (int m = ifrom; m < ito; ++m) {
|
||||
for (int n = 1; n < nthreads; ++n) {
|
||||
dall[m] += dall[n*nvals + m];
|
||||
dall[n*nvals + m] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// NOOP in non-threaded execution.
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PPPMTIP4POMP::init()
|
||||
{
|
||||
// TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms
|
||||
|
||||
if (force->newton == 0)
|
||||
error->all(FLERR,"Kspace style pppm/tip4p/omp requires newton on");
|
||||
|
||||
PPPMOMP::init();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
find center grid pt for each of my particles
|
||||
check that full stencil for the particle will fit in my 3d brick
|
||||
store central grid pt indices in part2grid array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMTIP4POMP::particle_map()
|
||||
{
|
||||
const int * const type = atom->type;
|
||||
const double * const * const x = atom->x;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
int i, flag = 0;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
|
||||
#endif
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
int nx,ny,nz,iH1,iH2;
|
||||
double xM[3];
|
||||
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
} else {
|
||||
xM[0] = x[i][0];
|
||||
xM[1] = x[i][1];
|
||||
xM[2] = x[i][2];
|
||||
}
|
||||
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
// current particle coord can be outside global and local box
|
||||
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
|
||||
|
||||
nx = static_cast<int> ((xM[0]-boxlo[0])*delxinv+shift) - OFFSET;
|
||||
ny = static_cast<int> ((xM[1]-boxlo[1])*delyinv+shift) - OFFSET;
|
||||
nz = static_cast<int> ((xM[2]-boxlo[2])*delzinv+shift) - OFFSET;
|
||||
|
||||
part2grid[i][0] = nx;
|
||||
part2grid[i][1] = ny;
|
||||
part2grid[i][2] = nz;
|
||||
|
||||
// check that entire stencil around nx,ny,nz will fit in my 3d brick
|
||||
|
||||
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
|
||||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
|
||||
nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++;
|
||||
}
|
||||
|
||||
int flag_all;
|
||||
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
|
||||
if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create discretized "density" on section of global grid due to my particles
|
||||
density(x,y,z) = charge "density" at grid points of my 3d brick
|
||||
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
|
||||
in global grid
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMTIP4POMP::make_rho()
|
||||
{
|
||||
const double * const q = atom->q;
|
||||
const double * const * const x = atom->x;
|
||||
const int * const type = atom->type;
|
||||
const int nthreads = comm->nthreads;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
// each thread works on a fixed chunk of atoms.
|
||||
const int tid = omp_get_thread_num();
|
||||
const int inum = nlocal;
|
||||
const int idelta = 1 + inum/nthreads;
|
||||
const int ifrom = tid*idelta;
|
||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
||||
#else
|
||||
const int tid = 0;
|
||||
const int ifrom = 0;
|
||||
const int ito = nlocal;
|
||||
#endif
|
||||
|
||||
int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
|
||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||
double xM[3];
|
||||
|
||||
// set up clear 3d density array
|
||||
const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
|
||||
FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
|
||||
memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
|
||||
|
||||
ThrData *thr = fix->get_thr(tid);
|
||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||
|
||||
// loop over my charges, add their contribution to nearby grid points
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||
// (mx,my,mz) = global coords of moving stencil pt
|
||||
|
||||
// this if protects against having more threads than local atoms
|
||||
if (ifrom < nlocal) {
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
} else {
|
||||
xM[0] = x[i][0];
|
||||
xM[1] = x[i][1];
|
||||
xM[2] = x[i][2];
|
||||
}
|
||||
|
||||
nx = part2grid[i][0];
|
||||
ny = part2grid[i][1];
|
||||
nz = part2grid[i][2];
|
||||
dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
|
||||
dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
|
||||
dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;
|
||||
|
||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||
|
||||
z0 = delvolinv * q[i];
|
||||
for (n = nlower; n <= nupper; n++) {
|
||||
mz = n+nz;
|
||||
y0 = z0*r1d[2][n];
|
||||
for (m = nlower; m <= nupper; m++) {
|
||||
my = m+ny;
|
||||
x0 = y0*r1d[1][m];
|
||||
for (l = nlower; l <= nupper; l++) {
|
||||
mx = l+nx;
|
||||
db[mz][my][mx] += x0*r1d[0][l];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(_OPENMP)
|
||||
// reduce 3d density array
|
||||
if (nthreads > 1) {
|
||||
data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
interpolate from grid to get electric field & force on my particles
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMTIP4POMP::fieldforce()
|
||||
{
|
||||
// loop over my charges, interpolate electric field from nearby grid points
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
// (dx,dy,dz) = distance to "lower left" grid pt
|
||||
// (mx,my,mz) = global coords of moving stencil pt
|
||||
// ek = 3 components of E-field on particle
|
||||
|
||||
const double * const q = atom->q;
|
||||
const double * const * const x = atom->x;
|
||||
const int * const type = atom->type;
|
||||
const int nthreads = comm->nthreads;
|
||||
const int nlocal = atom->nlocal;
|
||||
const double qqrd2e = force->qqrd2e;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none)
|
||||
#endif
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
// each thread works on a fixed chunk of atoms.
|
||||
const int tid = omp_get_thread_num();
|
||||
const int inum = nlocal;
|
||||
const int idelta = 1 + inum/nthreads;
|
||||
const int ifrom = tid*idelta;
|
||||
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
|
||||
#else
|
||||
const int ifrom = 0;
|
||||
const int ito = nlocal;
|
||||
const int tid = 0;
|
||||
#endif
|
||||
ThrData *thr = fix->get_thr(tid);
|
||||
double * const * const f = thr->get_f();
|
||||
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
|
||||
|
||||
int l,m,n,nx,ny,nz,mx,my,mz;
|
||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||
FFT_SCALAR ekx,eky,ekz;
|
||||
int iH1,iH2;
|
||||
double xM[3], fx,fy,fz;
|
||||
double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;
|
||||
|
||||
// this if protects against having more threads than local atoms
|
||||
if (ifrom < nlocal) {
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
|
||||
if (type[i] == typeO) {
|
||||
find_M(i,iH1,iH2,xM);
|
||||
} else {
|
||||
xM[0] = x[i][0];
|
||||
xM[1] = x[i][1];
|
||||
xM[2] = x[i][2];
|
||||
}
|
||||
|
||||
nx = part2grid[i][0];
|
||||
ny = part2grid[i][1];
|
||||
nz = part2grid[i][2];
|
||||
dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
|
||||
dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
|
||||
dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;
|
||||
|
||||
compute_rho1d_thr(r1d,dx,dy,dz);
|
||||
|
||||
ekx = eky = ekz = ZEROF;
|
||||
for (n = nlower; n <= nupper; n++) {
|
||||
mz = n+nz;
|
||||
z0 = r1d[2][n];
|
||||
for (m = nlower; m <= nupper; m++) {
|
||||
my = m+ny;
|
||||
y0 = z0*r1d[1][m];
|
||||
for (l = nlower; l <= nupper; l++) {
|
||||
mx = l+nx;
|
||||
x0 = y0*r1d[0][l];
|
||||
ekx -= x0*vdx_brick[mz][my][mx];
|
||||
eky -= x0*vdy_brick[mz][my][mx];
|
||||
ekz -= x0*vdz_brick[mz][my][mx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert E-field to force
|
||||
const double qfactor = qqrd2e*scale*q[i];
|
||||
|
||||
if (type[i] != typeO) {
|
||||
f[i][0] += qfactor*ekx;
|
||||
f[i][1] += qfactor*eky;
|
||||
f[i][2] += qfactor*ekz;
|
||||
|
||||
} else {
|
||||
fx = qfactor * ekx;
|
||||
fy = qfactor * eky;
|
||||
fz = qfactor * ekz;
|
||||
find_M(i,iH1,iH2,xM);
|
||||
|
||||
rOMx = xM[0] - x[i][0];
|
||||
rOMy = xM[1] - x[i][1];
|
||||
rOMz = xM[2] - x[i][2];
|
||||
|
||||
ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);
|
||||
|
||||
f1x = ddotf * rOMx;
|
||||
f1y = ddotf * rOMy;
|
||||
f1z = ddotf * rOMz;
|
||||
|
||||
f[i][0] += fx - alpha * (fx - f1x);
|
||||
f[i][1] += fy - alpha * (fy - f1y);
|
||||
f[i][2] += fz - alpha * (fz - f1z);
|
||||
|
||||
f[iH1][0] += 0.5*alpha*(fx - f1x);
|
||||
f[iH1][1] += 0.5*alpha*(fy - f1y);
|
||||
f[iH1][2] += 0.5*alpha*(fz - f1z);
|
||||
|
||||
f[iH2][0] += 0.5*alpha*(fx - f1x);
|
||||
f[iH2][1] += 0.5*alpha*(fy - f1y);
|
||||
f[iH2][2] += 0.5*alpha*(fz - f1z);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
find 2 H atoms bonded to O atom i
|
||||
compute position xM of fictitious charge site for O atom
|
||||
also return local indices iH1,iH2 of H atoms
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMTIP4POMP::find_M(int i, int &iH1, int &iH2, double *xM)
|
||||
{
|
||||
iH1 = atom->map(atom->tag[i] + 1);
|
||||
iH2 = atom->map(atom->tag[i] + 2);
|
||||
|
||||
if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing");
|
||||
if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
|
||||
error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
|
||||
|
||||
const double * const * const x = atom->x;
|
||||
|
||||
double delx1 = x[iH1][0] - x[i][0];
|
||||
double dely1 = x[iH1][1] - x[i][1];
|
||||
double delz1 = x[iH1][2] - x[i][2];
|
||||
domain->minimum_image(delx1,dely1,delz1);
|
||||
|
||||
double delx2 = x[iH2][0] - x[i][0];
|
||||
double dely2 = x[iH2][1] - x[i][1];
|
||||
double delz2 = x[iH2][2] - x[i][2];
|
||||
domain->minimum_image(delx2,dely2,delz2);
|
||||
|
||||
xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
|
||||
xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
|
||||
xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KSPACE_CLASS
|
||||
|
||||
KSpaceStyle(pppm/tip4p/omp,PPPMTIP4POMP)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PPPM_TIP4P_OMP_H
|
||||
#define LMP_PPPM_TIP4P_OMP_H
|
||||
|
||||
#include "pppm_omp.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PPPMTIP4POMP : public PPPMOMP {
|
||||
public:
|
||||
PPPMTIP4POMP(class LAMMPS *, int, char **);
|
||||
virtual ~PPPMTIP4POMP () {};
|
||||
virtual void init();
|
||||
|
||||
protected:
|
||||
virtual void particle_map();
|
||||
virtual void fieldforce();
|
||||
virtual void make_rho();
|
||||
|
||||
private:
|
||||
void find_M(int, int &, int &, double *);
|
||||
|
||||
// void slabcorr(int);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Kspace style pppm/tip4p/omp requires newton on
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
*/
|
Loading…
Reference in New Issue