git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7466 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2012-01-09 15:22:33 +00:00
parent 619f3e50e2
commit 9e4fd659e9
16 changed files with 3035 additions and 0 deletions

View File

@ -0,0 +1,202 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_born_coul_wolf_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairBornCoulWolfOMP::PairBornCoulWolfOMP(LAMMPS *lmp) :
PairBornCoulWolf(lmp), ThrOMP(lmp, THR_PAIR)
{
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairBornCoulWolfOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairBornCoulWolfOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double prefactor;
double r,rexp;
int *ilist,*jlist,*numneigh,**firstneigh;
double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
evdwl = ecoul = 0.0;
const double * const * const x = atom->x;
double * const * const f = thr->get_f();
const double * const q = atom->q;
const int * const type = atom->type;
int nlocal = atom->nlocal;
double *special_coul = force->special_coul;
double *special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
// self and shifted coulombic energy
e_self = v_sh = 0.0;
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r = sqrt(rsq);
if (rsq < cut_coulsq) {
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
fpair = (forcecoul + factor_lj*forceborn)*r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
}
if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i][0] += fxtmp;
f[i][1] += fytmp;
f[i][2] += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairBornCoulWolfOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairBornCoulWolf::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(born/coul/wolf/omp,PairBornCoulWolfOMP)
#else
#ifndef LMP_PAIR_BORN_COUL_WOLF_OMP_H
#define LMP_PAIR_BORN_COUL_WOLF_OMP_H
#include "pair_born_coul_wolf.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairBornCoulWolfOMP : public PairBornCoulWolf, public ThrOMP {
public:
PairBornCoulWolfOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,343 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_brownian_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "update.h"
#include "random_mars.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define EPSILON 1.0e-10
/* ---------------------------------------------------------------------- */
PairBrownianOMP::PairBrownianOMP(LAMMPS *lmp) :
PairBrownian(lmp), ThrOMP(lmp, THR_PAIR)
{
respa_enable = 0;
random_thr = NULL;
}
/* ---------------------------------------------------------------------- */
PairBrownianOMP::~PairBrownianOMP()
{
if (random_thr) {
for (int i=1; i < comm->nthreads; ++i)
delete random_thr[i];
delete[] random_thr;
random_thr = NULL;
}
}
/* ---------------------------------------------------------------------- */
void PairBrownianOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
if (!random_thr)
random_thr = new RanMars*[nthreads];
// to ensure full compatibility with the serial Brownian style
// we use is random number generator instance for thread 0
random_thr[0] = random;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
// generate a random number generator instance for
// all threads != 0. make sure we use unique seeds.
if (random_thr && tid > 0)
random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
+ comm->nprocs*tid);
if (flaglog) {
if (evflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (evflag) {
if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr);
else eval<0,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int FLAGLOG, int EVFLAG, int NEWTON_PAIR>
void PairBrownianOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
double rsq,r,h_sep,radi;
int *ilist,*jlist,*numneigh,**firstneigh;
const double * const * const x = atom->x;
double * const * const f = thr->get_f();
double * const * const torque = thr->get_torque();
const double * const radius = atom->radius;
const int * const type = atom->type;
const int nlocal = atom->nlocal;
RanMars &rng = *random_thr[thr->get_tid()];
double vxmu2f = force->vxmu2f;
double randr;
double prethermostat;
double xl[3],a_sq,a_sh,a_pu,Fbmag;
double p1[3],p2[3],p3[3];
int overlaps = 0;
// scale factor for Brownian moments
prethermostat = sqrt(24.0*force->boltz*t_target/update->dt);
prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
radi = radius[i];
jlist = firstneigh[i];
jnum = numneigh[i];
// FLD contribution to force and torque due to isotropic terms
if (flagfld) {
f[i][0] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
f[i][1] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
f[i][2] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
if (FLAGLOG) {
torque[i][0] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5);
torque[i][1] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5);
torque[i][2] += prethermostat*sqrt(RT0)*(rng.uniform()-0.5);
}
}
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
// scalar resistances a_sq and a_sh
h_sep = r - 2.0*radi;
// check for overlaps
if (h_sep < 0.0) overlaps++;
// if less than minimum gap, use minimum gap instead
if (r < cut_inner[itype][jtype])
h_sep = cut_inner[itype][jtype] - 2.0*radi;
// scale h_sep by radi
h_sep = h_sep/radi;
// scalar resistances
if (FLAGLOG) {
a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
a_pu = 8.0*MY_PI*mu*pow(radi,3)*(3.0/160.0*log(1.0/h_sep));
} else
a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
// generate the Pairwise Brownian Force: a_sq
Fbmag = prethermostat*sqrt(a_sq);
// generate a random number
randr = rng.uniform()-0.5;
// contribution due to Brownian motion
fx = Fbmag*randr*delx/r;
fy = Fbmag*randr*dely/r;
fz = Fbmag*randr*delz/r;
// add terms due to a_sh
if (FLAGLOG) {
// generate two orthogonal vectors to the line of centers
p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r;
set_3_orthogonal_vectors(p1,p2,p3);
// magnitude
Fbmag = prethermostat*sqrt(a_sh);
// force in each of the two directions
randr = rng.uniform()-0.5;
fx += Fbmag*randr*p2[0];
fy += Fbmag*randr*p2[1];
fz += Fbmag*randr*p2[2];
randr = rng.uniform()-0.5;
fx += Fbmag*randr*p3[0];
fy += Fbmag*randr*p3[1];
fz += Fbmag*randr*p3[2];
}
// scale forces to appropriate units
fx = vxmu2f*fx;
fy = vxmu2f*fy;
fz = vxmu2f*fz;
// sum to total force
f[i][0] -= fx;
f[i][1] -= fy;
f[i][2] -= fz;
if (NEWTON_PAIR || j < nlocal) {
//randr = rng.uniform()-0.5;
//fx = Fbmag*randr*delx/r;
//fy = Fbmag*randr*dely/r;
//fz = Fbmag*randr*delz/r;
f[j][0] += fx;
f[j][1] += fy;
f[j][2] += fz;
}
// torque due to the Brownian Force
if (FLAGLOG) {
// location of the point of closest approach on I from its center
xl[0] = -delx/r*radi;
xl[1] = -dely/r*radi;
xl[2] = -delz/r*radi;
// torque = xl_cross_F
tx = xl[1]*fz - xl[2]*fy;
ty = xl[2]*fx - xl[0]*fz;
tz = xl[0]*fy - xl[1]*fx;
// torque is same on both particles
torque[i][0] -= tx;
torque[i][1] -= ty;
torque[i][2] -= tz;
if (NEWTON_PAIR || j < nlocal) {
torque[j][0] -= tx;
torque[j][1] -= ty;
torque[j][2] -= tz;
}
// torque due to a_pu
Fbmag = prethermostat*sqrt(a_pu);
// force in each direction
randr = rng.uniform()-0.5;
tx = Fbmag*randr*p2[0];
ty = Fbmag*randr*p2[1];
tz = Fbmag*randr*p2[2];
randr = rng.uniform()-0.5;
tx += Fbmag*randr*p3[0];
ty += Fbmag*randr*p3[1];
tz += Fbmag*randr*p3[2];
// torque has opposite sign on two particles
torque[i][0] -= tx;
torque[i][1] -= ty;
torque[i][2] -= tz;
if (NEWTON_PAIR || j < nlocal) {
torque[j][0] += tx;
torque[j][1] += ty;
torque[j][2] += tz;
}
}
if (EVFLAG) ev_tally_xyz(i,j,nlocal,NEWTON_PAIR,
0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
}
}
}
}
/* ---------------------------------------------------------------------- */
double PairBrownianOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairBrownian::memory_usage();
bytes += comm->nthreads * sizeof(RanMars*);
bytes += comm->nthreads * sizeof(RanMars);
return bytes;
}

View File

@ -0,0 +1,52 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(brownian/omp,PairBrownianOMP)
#else
#ifndef LMP_PAIR_BROWNIAN_OMP_H
#define LMP_PAIR_BROWNIAN_OMP_H
#include "pair_brownian.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairBrownianOMP : public PairBrownian, public ThrOMP {
public:
PairBrownianOMP(class LAMMPS *);
virtual ~PairBrownianOMP();
virtual void compute(int, int);
virtual double memory_usage();
protected:
class RanMars **random_thr;
private:
template <int LOGFLAG, int EVFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,330 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_brownian_poly_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "update.h"
#include "random_mars.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define EPSILON 1.0e-10
/* ---------------------------------------------------------------------- */
PairBrownianPolyOMP::PairBrownianPolyOMP(LAMMPS *lmp) :
PairBrownianPoly(lmp), ThrOMP(lmp, THR_PAIR)
{
respa_enable = 0;
random_thr = NULL;
}
/* ---------------------------------------------------------------------- */
PairBrownianPolyOMP::~PairBrownianPolyOMP()
{
if (random_thr) {
for (int i=1; i < comm->nthreads; ++i)
delete random_thr[i];
delete[] random_thr;
random_thr = NULL;
}
}
/* ---------------------------------------------------------------------- */
void PairBrownianPolyOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
if (!random_thr)
random_thr = new RanMars*[nthreads];
// to ensure full compatibility with the serial BrownianPoly style
// we use is random number generator instance for thread 0
random_thr[0] = random;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
// generate a random number generator instance for
// all threads != 0. make sure we use unique seeds.
if (random_thr && tid > 0)
random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
+ comm->nprocs*tid);
if (flaglog) {
if (evflag)
eval<1,1>(ifrom, ito, thr);
else
eval<1,0>(ifrom, ito, thr);
} else {
if (evflag)
eval<0,1>(ifrom, ito, thr);
else eval<0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int FLAGLOG, int EVFLAG>
void PairBrownianPolyOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
double rsq,r,h_sep,beta0,beta1,radi,radj;
int *ilist,*jlist,*numneigh,**firstneigh;
const double * const * const x = atom->x;
double * const * const f = thr->get_f();
double * const * const torque = thr->get_torque();
const double * const radius = atom->radius;
const int * const type = atom->type;
const int nlocal = atom->nlocal;
RanMars &rng = *random_thr[thr->get_tid()];
double vxmu2f = force->vxmu2f;
int overlaps = 0;
double randr;
double prethermostat;
double xl[3],a_sq,a_sh,a_pu,Fbmag;
double p1[3],p2[3],p3[3];
// scale factor for Brownian moments
prethermostat = sqrt(24.0*force->boltz*t_target/update->dt);
prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
radi = radius[i];
jlist = firstneigh[i];
jnum = numneigh[i];
// FLD contribution to force and torque due to isotropic terms
if (flagfld) {
f[i][0] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
f[i][1] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
f[i][2] += prethermostat*sqrt(R0)*(rng.uniform()-0.5);
if (FLAGLOG) {
const double rad3 = radi*radi*radi;
torque[i][0] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5);
torque[i][1] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5);
torque[i][2] += prethermostat*sqrt(RT0*rad3)*(rng.uniform()-0.5);
}
}
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
radj = radius[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
// scalar resistances a_sq and a_sh
h_sep = r - radi-radj;
// check for overlaps
if (h_sep < 0.0) overlaps++;
// if less than minimum gap, use minimum gap instead
if (r < cut_inner[itype][jtype])
h_sep = cut_inner[itype][jtype] - radi-radj;
// scale h_sep by radi
h_sep = h_sep/radi;
beta0 = radj/radi;
beta1 = 1.0 + beta0;
// scalar resistances
if (FLAGLOG) {
a_sq = beta0*beta0/beta1/beta1/h_sep +
(1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3)*log(1.0/h_sep);
a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*pow(beta0,3) +
pow(beta0,4))/21.0/pow(beta1,4)*h_sep*log(1.0/h_sep);
a_sq *= 6.0*MY_PI*mu*radi;
a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3) *
log(1.0/h_sep);
a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3) +
16.0*pow(beta0,4))/375.0/pow(beta1,4) *
h_sep*log(1.0/h_sep);
a_sh *= 6.0*MY_PI*mu*radi;
a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 *
pow(beta0,3))/250.0/pow(beta1,3)*h_sep*log(1.0/h_sep);
a_pu *= 8.0*MY_PI*mu*pow(radi,3);
} else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
// generate the Pairwise Brownian Force: a_sq
Fbmag = prethermostat*sqrt(a_sq);
// generate a random number
randr = rng.uniform()-0.5;
// contribution due to Brownian motion
fx = Fbmag*randr*delx/r;
fy = Fbmag*randr*dely/r;
fz = Fbmag*randr*delz/r;
// add terms due to a_sh
if (FLAGLOG) {
// generate two orthogonal vectors to the line of centers
p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r;
set_3_orthogonal_vectors(p1,p2,p3);
// magnitude
Fbmag = prethermostat*sqrt(a_sh);
// force in each of the two directions
randr = rng.uniform()-0.5;
fx += Fbmag*randr*p2[0];
fy += Fbmag*randr*p2[1];
fz += Fbmag*randr*p2[2];
randr = rng.uniform()-0.5;
fx += Fbmag*randr*p3[0];
fy += Fbmag*randr*p3[1];
fz += Fbmag*randr*p3[2];
}
// scale forces to appropriate units
fx = vxmu2f*fx;
fy = vxmu2f*fy;
fz = vxmu2f*fz;
// sum to total force
f[i][0] -= fx;
f[i][1] -= fy;
f[i][2] -= fz;
// torque due to the Brownian Force
if (FLAGLOG) {
// location of the point of closest approach on I from its center
xl[0] = -delx/r*radi;
xl[1] = -dely/r*radi;
xl[2] = -delz/r*radi;
// torque = xl_cross_F
tx = xl[1]*fz - xl[2]*fy;
ty = xl[2]*fx - xl[0]*fz;
tz = xl[0]*fy - xl[1]*fx;
// torque is same on both particles
torque[i][0] -= tx;
torque[i][1] -= ty;
torque[i][2] -= tz;
// torque due to a_pu
Fbmag = prethermostat*sqrt(a_pu);
// force in each direction
randr = rng.uniform()-0.5;
tx = Fbmag*randr*p2[0];
ty = Fbmag*randr*p2[1];
tz = Fbmag*randr*p2[2];
randr = rng.uniform()-0.5;
tx += Fbmag*randr*p3[0];
ty += Fbmag*randr*p3[1];
tz += Fbmag*randr*p3[2];
// torque has opposite sign on two particles
torque[i][0] -= tx;
torque[i][1] -= ty;
torque[i][2] -= tz;
}
if (EVFLAG) ev_tally_xyz(i,j,nlocal,/* newton_pair */ 0,
0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
}
}
}
}
/* ---------------------------------------------------------------------- */
double PairBrownianPolyOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairBrownianPoly::memory_usage();
bytes += comm->nthreads * sizeof(RanMars*);
bytes += comm->nthreads * sizeof(RanMars);
return bytes;
}

View File

@ -0,0 +1,52 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(brownian/poly/omp,PairBrownianPolyOMP)
#else
#ifndef LMP_PAIR_BROWNIAN_POLY_OMP_H
#define LMP_PAIR_BROWNIAN_POLY_OMP_H
#include "pair_brownian_poly.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairBrownianPolyOMP : public PairBrownianPoly, public ThrOMP {
public:
PairBrownianPolyOMP(class LAMMPS *);
virtual ~PairBrownianPolyOMP();
virtual void compute(int, int);
virtual double memory_usage();
protected:
class RanMars **random_thr;
private:
template <int LOGFLAG, int EVFLAG>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,183 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_coul_wolf_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairCoulWolfOMP::PairCoulWolfOMP(LAMMPS *lmp) :
PairCoulWolf(lmp), ThrOMP(lmp, THR_PAIR)
{
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairCoulWolfOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairCoulWolfOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double rsq,forcecoul,factor_coul;
double prefactor;
double r,rexp;
int *ilist,*jlist,*numneigh,**firstneigh;
double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
ecoul = 0.0;
const double * const * const x = atom->x;
double * const * const f = thr->get_f();
const double * const q = atom->q;
const int * const type = atom->type;
const int nlocal = atom->nlocal;
const double * const special_coul = force->special_coul;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
// self and shifted coulombic energy
e_self = v_sh = 0.0;
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
fpair = forcecoul / rsq;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
}
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
0.0,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i][0] += fxtmp;
f[i][1] += fytmp;
f[i][2] += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairCoulWolfOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairCoulWolf::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(coul/wolf/omp,PairCoulWolfOMP)
#else
#ifndef LMP_PAIR_COUL_WOLF_OMP_H
#define LMP_PAIR_COUL_WOLF_OMP_H
#include "pair_coul_wolf.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairCoulWolfOMP : public PairCoulWolf, public ThrOMP {
public:
PairCoulWolfOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,226 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lj_class2_coul_pppm_omp.h"
#include "pppm_proxy.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "update.h"
#include <string.h>
using namespace LAMMPS_NS;
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairLJClass2CoulPPPMOMP::PairLJClass2CoulPPPMOMP(LAMMPS *lmp) :
PairLJClass2CoulLong(lmp), ThrOMP(lmp, THR_PAIR|THR_PROXY)
{
respa_enable = 0;
nproxy=1;
kspace = NULL;
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulPPPMOMP::init_style()
{
if (comm->nthreads < 2)
error->all(FLERR,"need at least two threads per MPI task for this pair style");
if (strcmp(force->kspace_style,"pppm/proxy") != 0)
error->all(FLERR,"kspace style pppm/proxy is required with this pair style");
kspace = static_cast<PPPMProxy *>(force->kspace);
PairLJClass2CoulLong::init_style();
}
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulPPPMOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads, nproxy);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
// thread id 0 runs pppm, the rest the pair style
if (tid < nproxy) {
kspace->compute_proxy(eflag,vflag);
} else {
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
}
sync_threads();
reduce_thr(this, eflag, vflag, thr, nproxy);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairLJClass2CoulPPPMOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double r,rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
int *ilist,*jlist,*numneigh,**firstneigh;
evdwl = ecoul = 0.0;
const double * const * const x = atom->x;
double * const * const f = thr->get_f();
const double * const q = atom->q;
const int * const type = atom->type;
const int nlocal = atom->nlocal;
const double * const special_coul = force->special_coul;
const double * const special_lj = force->special_lj;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
forcelj *= factor_lj;
} else forcelj = 0.0;
fpair = (forcecoul + forcelj) * r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_coulsq) {
ecoul = prefactor*erfc;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
}
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i][0] += fxtmp;
f[i][1] += fytmp;
f[i][2] += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairLJClass2CoulPPPMOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairLJClass2CoulLong::memory_usage();
return bytes;
}

View File

@ -0,0 +1,54 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/class2/coul/pppm/omp,PairLJClass2CoulPPPMOMP)
#else
#ifndef LMP_PAIR_LJ_CLASS2_COUL_PPPM_OMP_H
#define LMP_PAIR_LJ_CLASS2_COUL_PPPM_OMP_H
#include "pair_lj_class2_coul_long.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairLJClass2CoulPPPMOMP : public PairLJClass2CoulLong, public ThrOMP {
public:
PairLJClass2CoulPPPMOMP(class LAMMPS *);
virtual ~PairLJClass2CoulPPPMOMP() {};
virtual void init_style();
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
class PPPMProxy *kspace;
int nproxy;
};
}
#endif
#endif

View File

@ -0,0 +1,413 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lubricate_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "update.h"
#include "random_mars.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairLubricateOMP::PairLubricateOMP(LAMMPS *lmp) :
PairLubricate(lmp), ThrOMP(lmp, THR_PAIR)
{
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
PairLubricateOMP::~PairLubricateOMP()
{}
/* ---------------------------------------------------------------------- */
void PairLubricateOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (flaglog) {
if (evflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (evflag) {
if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr);
else eval<0,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int FLAGLOG, int EVFLAG, int NEWTON_PAIR>
void PairLubricateOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
double rsq,r,h_sep,radi;
double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
double vt1,vt2,vt3,wt1,wt2,wt3,wdotn;
double vRS0;
double vi[3],vj[3],wi[3],wj[3],xl[3];
double a_sq,a_sh,a_pu;
int *ilist,*jlist,*numneigh,**firstneigh;
double lamda[3],vstream[3];
double vxmu2f = force->vxmu2f;
double * const * const x = atom->x;
double * const * const v = atom->v;
double * const * const f = thr->get_f();
double * const * const omega = atom->omega;
double * const * const torque = thr->get_torque();
const double * const radius = atom->radius;
const int * const type = atom->type;
const int nlocal = atom->nlocal;
int overlaps = 0;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// subtract streaming component of velocity, omega, angmom
// assume fluid streaming velocity = box deformation rate
// vstream = (ux,uy,uz)
// ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z
// uy = h_rate[1]*y + h_rate[3]*z
// uz = h_rate[2]*z
// omega_new = omega - curl(vstream)/2
// angmom_new = angmom - I*curl(vstream)/2
// Ef = (grad(vstream) + (grad(vstream))^T) / 2
if (shearing) {
double *h_rate = domain->h_rate;
double *h_ratelo = domain->h_ratelo;
for (ii = iifrom; ii < iito; ii++) {
i = ilist[ii];
itype = type[i];
radi = radius[i];
domain->x2lamda(x[i],lamda);
vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
h_rate[4]*lamda[2] + h_ratelo[0];
vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
v[i][0] -= vstream[0];
v[i][1] -= vstream[1];
v[i][2] -= vstream[2];
omega[i][0] += 0.5*h_rate[3];
omega[i][1] -= 0.5*h_rate[4];
omega[i][2] += 0.5*h_rate[5];
}
// set Ef from h_rate in strain units
Ef[0][0] = h_rate[0]/domain->xprd;
Ef[1][1] = h_rate[1]/domain->yprd;
Ef[2][2] = h_rate[2]/domain->zprd;
Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd;
Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd;
Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd;
// copy updated velocity/omega/angmom to the ghost particles
// no need to do this if not shearing since comm->ghost_velocity is set
sync_threads();
comm->forward_comm_pair(this);
}
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
radi = radius[i];
jlist = firstneigh[i];
jnum = numneigh[i];
// angular velocity
wi[0] = omega[i][0];
wi[1] = omega[i][1];
wi[2] = omega[i][2];
// FLD contribution to force and torque due to isotropic terms
// FLD contribution to stress from isotropic RS0
if (flagfld) {
f[i][0] -= vxmu2f*R0*v[i][0];
f[i][1] -= vxmu2f*R0*v[i][1];
f[i][2] -= vxmu2f*R0*v[i][2];
torque[i][0] -= vxmu2f*RT0*wi[0];
torque[i][1] -= vxmu2f*RT0*wi[1];
torque[i][2] -= vxmu2f*RT0*wi[2];
if (shearing && vflag_either) {
vRS0 = -vxmu2f * RS0;
v_tally_tensor(i,i,nlocal,NEWTON_PAIR,
vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2],
vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]);
}
}
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
// angular momentum = I*omega = 2/5 * M*R^2 * omega
wj[0] = omega[j][0];
wj[1] = omega[j][1];
wj[2] = omega[j][2];
// xl = point of closest approach on particle i from its center
xl[0] = -delx/r*radi;
xl[1] = -dely/r*radi;
xl[2] = -delz/r*radi;
// velocity at the point of closest approach on both particles
// v = v + omega_cross_xl - Ef.xl
// particle i
vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1])
- (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2])
- (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0])
- (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
// particle j
vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1])
+ (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2])
+ (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0])
+ (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
// scalar resistances XA and YA
h_sep = r - 2.0*radi;
// check for overlaps
if (h_sep < 0.0) overlaps++;
// if less than the minimum gap use the minimum gap instead
if (r < cut_inner[itype][jtype])
h_sep = cut_inner[itype][jtype] - 2.0*radi;
// scale h_sep by radi
h_sep = h_sep/radi;
// scalar resistances
if (FLAGLOG) {
a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
a_pu = 8.0*MY_PI*mu*pow(radi,3)*(3.0/160.0*log(1.0/h_sep));
} else
a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
// relative velocity at the point of closest approach
// includes fluid velocity
vr1 = vi[0] - vj[0];
vr2 = vi[1] - vj[1];
vr3 = vi[2] - vj[2];
// normal component (vr.n)n
vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
vn1 = vnnr*delx/r;
vn2 = vnnr*dely/r;
vn3 = vnnr*delz/r;
// tangential component vr - (vr.n)n
vt1 = vr1 - vn1;
vt2 = vr2 - vn2;
vt3 = vr3 - vn3;
// force due to squeeze type motion
fx = a_sq*vn1;
fy = a_sq*vn2;
fz = a_sq*vn3;
// force due to all shear kind of motions
if (FLAGLOG) {
fx = fx + a_sh*vt1;
fy = fy + a_sh*vt2;
fz = fz + a_sh*vt3;
}
// scale forces for appropriate units
fx *= vxmu2f;
fy *= vxmu2f;
fz *= vxmu2f;
// add to total force
f[i][0] -= fx;
f[i][1] -= fy;
f[i][2] -= fz;
if (NEWTON_PAIR || j < nlocal) {
f[j][0] += fx;
f[j][1] += fy;
f[j][2] += fz;
}
// torque due to this force
if (FLAGLOG) {
tx = xl[1]*fz - xl[2]*fy;
ty = xl[2]*fx - xl[0]*fz;
tz = xl[0]*fy - xl[1]*fx;
torque[i][0] -= vxmu2f*tx;
torque[i][1] -= vxmu2f*ty;
torque[i][2] -= vxmu2f*tz;
if (NEWTON_PAIR || j < nlocal) {
torque[j][0] -= vxmu2f*tx;
torque[j][1] -= vxmu2f*ty;
torque[j][2] -= vxmu2f*tz;
}
// torque due to a_pu
wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely +
(wi[2]-wj[2])*delz)/r;
wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
tx = a_pu*wt1;
ty = a_pu*wt2;
tz = a_pu*wt3;
torque[i][0] -= vxmu2f*tx;
torque[i][1] -= vxmu2f*ty;
torque[i][2] -= vxmu2f*tz;
if (NEWTON_PAIR || j < nlocal) {
torque[j][0] += vxmu2f*tx;
torque[j][1] += vxmu2f*ty;
torque[j][2] += vxmu2f*tz;
}
}
if (EVFLAG) ev_tally_xyz(i,j,nlocal,NEWTON_PAIR,
0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
}
}
}
// restore streaming component of velocity, omega, angmom
if (shearing) {
double *h_rate = domain->h_rate;
double *h_ratelo = domain->h_ratelo;
for (ii = iifrom; ii < iito; ii++) {
i = ilist[ii];
itype = type[i];
radi = radius[i];
domain->x2lamda(x[i],lamda);
vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
h_rate[4]*lamda[2] + h_ratelo[0];
vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
v[i][0] += vstream[0];
v[i][1] += vstream[1];
v[i][2] += vstream[2];
omega[i][0] -= 0.5*h_rate[3];
omega[i][1] += 0.5*h_rate[4];
omega[i][2] -= 0.5*h_rate[5];
}
}
}
/* ---------------------------------------------------------------------- */
double PairLubricateOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairLubricate::memory_usage();
return bytes;
}

View File

@ -0,0 +1,49 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lubricate/omp,PairLubricateOMP)
#else
#ifndef LMP_PAIR_LUBRICATE_OMP_H
#define LMP_PAIR_LUBRICATE_OMP_H
#include "pair_lubricate.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairLubricateOMP : public PairLubricate, public ThrOMP {
public:
PairLubricateOMP(class LAMMPS *);
virtual ~PairLubricateOMP();
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int LOGFLAG, int EVFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,418 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lubricate_poly_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "update.h"
#include "random_mars.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
/* ---------------------------------------------------------------------- */
PairLubricatePolyOMP::PairLubricatePolyOMP(LAMMPS *lmp) :
PairLubricatePoly(lmp), ThrOMP(lmp, THR_PAIR)
{
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
PairLubricatePolyOMP::~PairLubricatePolyOMP()
{}
/* ---------------------------------------------------------------------- */
void PairLubricatePolyOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (flaglog) {
if (shearing) {
if (evflag)
eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (evflag)
eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (shearing) {
if (evflag)
eval<0,1,1>(ifrom, ito, thr);
else eval<0,1,0>(ifrom, ito, thr);
} else {
if (evflag)
eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int FLAGLOG, int SHEARING, int EVFLAG>
void PairLubricatePolyOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
double rsq,r,h_sep,beta0,beta1,radi,radj;
double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
double vt1,vt2,vt3,wt1,wt2,wt3,wdotn;
double vRS0;
double vi[3],vj[3],wi[3],wj[3],xl[3],jl[3];
double a_sq,a_sh,a_pu;
int *ilist,*jlist,*numneigh,**firstneigh;
double lamda[3],vstream[3];
double vxmu2f = force->vxmu2f;
double * const * const x = atom->x;
double * const * const v = atom->v;
double * const * const f = thr->get_f();
double * const * const omega = atom->omega;
double * const * const torque = thr->get_torque();
const double * const radius = atom->radius;
const int * const type = atom->type;
const int nlocal = atom->nlocal;
int overlaps = 0;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// subtract streaming component of velocity, omega, angmom
// assume fluid streaming velocity = box deformation rate
// vstream = (ux,uy,uz)
// ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z
// uy = h_rate[1]*y + h_rate[3]*z
// uz = h_rate[2]*z
// omega_new = omega - curl(vstream)/2
// angmom_new = angmom - I*curl(vstream)/2
// Ef = (grad(vstream) + (grad(vstream))^T) / 2
if (shearing) {
double *h_rate = domain->h_rate;
double *h_ratelo = domain->h_ratelo;
for (ii = iifrom; ii < iito; ii++) {
i = ilist[ii];
itype = type[i];
radi = radius[i];
domain->x2lamda(x[i],lamda);
vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
h_rate[4]*lamda[2] + h_ratelo[0];
vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
v[i][0] -= vstream[0];
v[i][1] -= vstream[1];
v[i][2] -= vstream[2];
omega[i][0] += 0.5*h_rate[3];
omega[i][1] -= 0.5*h_rate[4];
omega[i][2] += 0.5*h_rate[5];
}
// set Ef from h_rate in strain units
Ef[0][0] = h_rate[0]/domain->xprd;
Ef[1][1] = h_rate[1]/domain->yprd;
Ef[2][2] = h_rate[2]/domain->zprd;
Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd;
Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd;
Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd;
// copy updated omega to the ghost particles
// no need to do this if not shearing since comm->ghost_velocity is set
sync_threads();
comm->forward_comm_pair(this);
}
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
radi = radius[i];
jlist = firstneigh[i];
jnum = numneigh[i];
// angular velocity
wi[0] = omega[i][0];
wi[1] = omega[i][1];
wi[2] = omega[i][2];
// FLD contribution to force and torque due to isotropic terms
// FLD contribution to stress from isotropic RS0
if (flagfld) {
f[i][0] -= vxmu2f*R0*radi*v[i][0];
f[i][1] -= vxmu2f*R0*radi*v[i][1];
f[i][2] -= vxmu2f*R0*radi*v[i][2];
const double rad3 = radi*radi*radi;
torque[i][0] -= vxmu2f*RT0*rad3*wi[0];
torque[i][1] -= vxmu2f*RT0*rad3*wi[1];
torque[i][2] -= vxmu2f*RT0*rad3*wi[2];
if (SHEARING && vflag_either) {
vRS0 = -vxmu2f * RS0*rad3;
v_tally_tensor(i,i,nlocal,/* newton_pair */ 0,
vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2],
vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]);
}
}
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
radj = atom->radius[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
// angular momentum = I*omega = 2/5 * M*R^2 * omega
wj[0] = omega[j][0];
wj[1] = omega[j][1];
wj[2] = omega[j][2];
// xl = point of closest approach on particle i from its center
xl[0] = -delx/r*radi;
xl[1] = -dely/r*radi;
xl[2] = -delz/r*radi;
jl[0] = -delx/r*radj;
jl[1] = -dely/r*radj;
jl[2] = -delz/r*radj;
// velocity at the point of closest approach on both particles
// v = v + omega_cross_xl - Ef.xl
// particle i
vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1])
- (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2])
- (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0])
- (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
// particle j
vj[0] = v[j][0] - (wj[1]*jl[2] - wj[2]*jl[1])
+ (Ef[0][0]*jl[0] + Ef[0][1]*jl[1] + Ef[0][2]*jl[2]);
vj[1] = v[j][1] - (wj[2]*jl[0] - wj[0]*jl[2])
+ (Ef[1][0]*jl[0] + Ef[1][1]*jl[1] + Ef[1][2]*jl[2]);
vj[2] = v[j][2] - (wj[0]*jl[1] - wj[1]*jl[0])
+ (Ef[2][0]*jl[0] + Ef[2][1]*jl[1] + Ef[2][2]*jl[2]);
// scalar resistances XA and YA
h_sep = r - radi-radj;
// check for overlaps
if (h_sep < 0.0) overlaps++;
// if less than the minimum gap use the minimum gap instead
if (r < cut_inner[itype][jtype])
h_sep = cut_inner[itype][jtype] - radi-radj;
// scale h_sep by radi
h_sep = h_sep/radi;
beta0 = radj/radi;
beta1 = 1.0 + beta0;
// scalar resistances
if (FLAGLOG) {
a_sq = beta0*beta0/beta1/beta1/h_sep +
(1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3)*log(1.0/h_sep);
a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0 *
pow(beta0,3)+pow(beta0,4))/21.0/pow(beta1,4) *
h_sep*log(1.0/h_sep);
a_sq *= 6.0*MY_PI*mu*radi;
a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3) *
log(1.0/h_sep);
a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3) +
16.0*pow(beta0,4))/375.0/pow(beta1,4) *
h_sep*log(1.0/h_sep);
a_sh *= 6.0*MY_PI*mu*radi;
a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 *
pow(beta0,3))/250.0/pow(beta1,3)*h_sep*log(1.0/h_sep);
a_pu *= 8.0*MY_PI*mu*pow(radi,3);
} else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
// relative velocity at the point of closest approach
// includes fluid velocity
vr1 = vi[0] - vj[0];
vr2 = vi[1] - vj[1];
vr3 = vi[2] - vj[2];
// normal component (vr.n)n
vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
vn1 = vnnr*delx/r;
vn2 = vnnr*dely/r;
vn3 = vnnr*delz/r;
// tangential component vr - (vr.n)n
vt1 = vr1 - vn1;
vt2 = vr2 - vn2;
vt3 = vr3 - vn3;
// force due to squeeze type motion
fx = a_sq*vn1;
fy = a_sq*vn2;
fz = a_sq*vn3;
// force due to all shear kind of motions
if (FLAGLOG) {
fx = fx + a_sh*vt1;
fy = fy + a_sh*vt2;
fz = fz + a_sh*vt3;
}
// scale forces for appropriate units
fx *= vxmu2f;
fy *= vxmu2f;
fz *= vxmu2f;
// add to total force
f[i][0] -= fx;
f[i][1] -= fy;
f[i][2] -= fz;
// torque due to this force
if (FLAGLOG) {
tx = xl[1]*fz - xl[2]*fy;
ty = xl[2]*fx - xl[0]*fz;
tz = xl[0]*fy - xl[1]*fx;
torque[i][0] -= vxmu2f*tx;
torque[i][1] -= vxmu2f*ty;
torque[i][2] -= vxmu2f*tz;
// torque due to a_pu
wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely +
(wi[2]-wj[2])*delz)/r;
wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
tx = a_pu*wt1;
ty = a_pu*wt2;
tz = a_pu*wt3;
torque[i][0] -= vxmu2f*tx;
torque[i][1] -= vxmu2f*ty;
torque[i][2] -= vxmu2f*tz;
}
if (EVFLAG) ev_tally_xyz(i,nlocal,nlocal, /* newton_pair */ 0,
0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
}
}
}
// restore streaming component of velocity, omega, angmom
if (SHEARING) {
double *h_rate = domain->h_rate;
double *h_ratelo = domain->h_ratelo;
for (ii = iifrom; ii < iito; ii++) {
i = ilist[ii];
itype = type[i];
radi = radius[i];
domain->x2lamda(x[i],lamda);
vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
h_rate[4]*lamda[2] + h_ratelo[0];
vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
v[i][0] += vstream[0];
v[i][1] += vstream[1];
v[i][2] += vstream[2];
omega[i][0] -= 0.5*h_rate[3];
omega[i][1] += 0.5*h_rate[4];
omega[i][2] -= 0.5*h_rate[5];
}
}
}
/* ---------------------------------------------------------------------- */
double PairLubricatePolyOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairLubricatePoly::memory_usage();
return bytes;
}

View File

@ -0,0 +1,49 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lubricate/poly/omp,PairLubricateOMP)
#else
#ifndef LMP_PAIR_LUBRICATE_POLY_OMP_H
#define LMP_PAIR_LUBRICATE_POLY_OMP_H
#include "pair_lubricate_poly.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairLubricatePolyOMP : public PairLubricatePoly, public ThrOMP {
public:
PairLubricatePolyOMP(class LAMMPS *);
virtual ~PairLubricatePolyOMP();
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int LOGFLAG, int EVFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,517 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "pppm_cg_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include <string.h>
#include <math.h>
using namespace LAMMPS_NS;
using namespace MathConst;
#define SMALLQ 0.00001
#if defined(FFT_SINGLE)
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
#define EPS_HOC 1.0e-7
/* ---------------------------------------------------------------------- */
PPPMCGOMP::PPPMCGOMP(LAMMPS *lmp, int narg, char **arg) :
PPPMCG(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMCGOMP::allocate()
{
PPPMCG::allocate();
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
FFT_SCALAR **rho1d_thr;
memory->create2d_offset(rho1d_thr,3,-order/2,order/2,"pppm:rho1d_thr");
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(static_cast<void *>(rho1d_thr));
}
const int nzend = (nzhi_out-nzlo_out+1)*nthreads + nzlo_out -1;
// reallocate density brick, so it fits our needs
memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
memory->create3d_offset(density_brick,nzlo_out,nzend,nylo_out,nyhi_out,
nxlo_out,nxhi_out,"pppm:density_brick");
}
// NOTE: special version of reduce_data for FFT_SCALAR data type.
// reduce per thread data into the first part of the data
// array that is used for the non-threaded parts and reset
// the temporary storage to 0.0. this routine depends on
// multi-dimensional arrays like force stored in this order
// x1,y1,z1,x2,y2,z2,...
// we need to post a barrier to wait until all threads are done
// writing to the array.
static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid)
{
#if defined(_OPENMP)
// NOOP in non-threaded execution.
if (nthreads == 1) return;
#pragma omp barrier
{
const int nvals = ndim*nall;
const int idelta = nvals/nthreads + 1;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
// this if protects against having more threads than atoms
if (ifrom < nall) {
for (int m = ifrom; m < ito; ++m) {
for (int n = 1; n < nthreads; ++n) {
dall[m] += dall[n*nvals + m];
dall[n*nvals + m] = 0.0;
}
}
}
}
#else
// NOOP in non-threaded execution.
return;
#endif
}
/* ----------------------------------------------------------------------
free memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMCGOMP::deallocate()
{
PPPMCG::deallocate();
for (int i=0; i < comm->nthreads; ++i) {
ThrData * thr = fix->get_thr(i);
FFT_SCALAR ** rho1d_thr = static_cast<FFT_SCALAR **>(thr->get_rho1d());
memory->destroy2d_offset(rho1d_thr,-order/2);
}
}
/* ----------------------------------------------------------------------
adjust PPPMCG coeffs, called initially and whenever volume has changed
------------------------------------------------------------------------- */
void PPPMCGOMP::setup()
{
int i,j,k,n;
double *prd;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
volume = xprd * yprd * zprd_slab;
delxinv = nx_pppm/xprd;
delyinv = ny_pppm/yprd;
delzinv = nz_pppm/zprd_slab;
delvolinv = delxinv*delyinv*delzinv;
const double unitkx = (2.0*MY_PI/xprd);
const double unitky = (2.0*MY_PI/yprd);
const double unitkz = (2.0*MY_PI/zprd_slab);
// fkx,fky,fkz for my FFT grid pts
double per;
for (i = nxlo_fft; i <= nxhi_fft; i++) {
per = i - nx_pppm*(2*i/nx_pppm);
fkx[i] = unitkx*per;
}
for (i = nylo_fft; i <= nyhi_fft; i++) {
per = i - ny_pppm*(2*i/ny_pppm);
fky[i] = unitky*per;
}
for (i = nzlo_fft; i <= nzhi_fft; i++) {
per = i - nz_pppm*(2*i/nz_pppm);
fkz[i] = unitkz*per;
}
// virial coefficients
double sqk,vterm;
n = 0;
for (k = nzlo_fft; k <= nzhi_fft; k++) {
for (j = nylo_fft; j <= nyhi_fft; j++) {
for (i = nxlo_fft; i <= nxhi_fft; i++) {
sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
if (sqk == 0.0) {
vg[n][0] = 0.0;
vg[n][1] = 0.0;
vg[n][2] = 0.0;
vg[n][3] = 0.0;
vg[n][4] = 0.0;
vg[n][5] = 0.0;
} else {
vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
vg[n][3] = vterm*fkx[i]*fky[j];
vg[n][4] = vterm*fkx[i]*fkz[k];
vg[n][5] = vterm*fky[j]*fkz[k];
}
n++;
}
}
}
// modified (Hockney-Eastwood) Coulomb Green's function
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
pow(-log(EPS_HOC),0.25));
const double form = 1.0;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m;
double snx,sny,snz,sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
const double gew2 = -4.0*g_ewald*g_ewald;
const int nnx = nxhi_fft-nxlo_fft+1;
const int nny = nyhi_fft-nylo_fft+1;
loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads);
for (m = nzlo_fft; m <= nzhi_fft; m++) {
const double fkzm = fkz[m];
snz = sin(0.5*fkzm*zprd_slab/nz_pppm);
snz *= snz;
for (l = nylo_fft; l <= nyhi_fft; l++) {
const double fkyl = fky[l];
sny = sin(0.5*fkyl*yprd/ny_pppm);
sny *= sny;
for (k = nxlo_fft; k <= nxhi_fft; k++) {
/* only compute the part designated to this thread */
nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft));
if ((nn < nnfrom) || (nn >=nnto)) continue;
const double fkxk = fkx[k];
snx = sin(0.5*fkxk*xprd/nx_pppm);
snx *= snx;
sqk = fkxk*fkxk + fkyl*fkyl + fkzm*fkzm;
if (sqk != 0.0) {
numerator = form*MY_4PI/sqk;
denominator = gf_denom(snx,sny,snz);
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = fkxk + unitkx*nx_pppm*nx;
sx = exp(qx*qx/gew2);
wx = 1.0;
argx = 0.5*qx*xprd/nx_pppm;
if (argx != 0.0) wx = pow(sin(argx)/argx,order);
wx *=wx;
for (ny = -nby; ny <= nby; ny++) {
qy = fkyl + unitky*ny_pppm*ny;
sy = exp(qy*qy/gew2);
wy = 1.0;
argy = 0.5*qy*yprd/ny_pppm;
if (argy != 0.0) wy = pow(sin(argy)/argy,order);
wy *= wy;
for (nz = -nbz; nz <= nbz; nz++) {
qz = fkzm + unitkz*nz_pppm*nz;
sz = exp(qz*qz/gew2);
wz = 1.0;
argz = 0.5*qz*zprd_slab/nz_pppm;
if (argz != 0.0) wz = pow(sin(argz)/argz,order);
wz *= wz;
dot1 = fkxk*qx + fkyl*qy + fkzm*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[nn] = numerator*sum1/denominator;
} else greensfn[nn] = 0.0;
}
}
}
}
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPPMCG
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void PPPMCGOMP::compute(int eflag, int vflag)
{
PPPMCG::compute(eflag,vflag);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMCGOMP::make_rho()
{
const double * const q = atom->q;
const double * const * const x = atom->x;
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none)
{
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = num_charged;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int tid = 0;
const int ifrom = 0;
const int ito = num_charged;
#endif
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// set up clear 3d density array
const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// this if protects against having more threads than charged local atoms
if (ifrom < num_charged) {
for (j = ifrom; j < ito; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
db[mz][my][mx] += x0*r1d[0][l];
}
}
}
}
}
#if defined(_OPENMP)
// reduce 3d density array
if (nthreads > 1) {
data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
}
}
#endif
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles
------------------------------------------------------------------------- */
void PPPMCGOMP::fieldforce()
{
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const double * const q = atom->q;
const double * const * const x = atom->x;
const int nthreads = comm->nthreads;
const double qqrd2e = force->qqrd2e;
#if defined(_OPENMP)
#pragma omp parallel default(none)
{
// each thread works on a fixed chunk of atoms.
const int tid = omp_get_thread_num();
const int inum = num_charged;
const int idelta = 1 + inum/nthreads;
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
const int ifrom = 0;
const int ito = num_charged;
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
double * const * const f = thr->get_f();
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
int i,j,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// this if protects against having more threads than charged local atoms
if (ifrom < num_charged) {
for (j = ifrom; j < ito; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e*scale*q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
}
}
#if defined(_OPENMP)
}
#endif
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-1; l >= 0; l--) {
r1 = rho_coeff[l][k] + r1*dx;
r2 = rho_coeff[l][k] + r2*dy;
r3 = rho_coeff[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}

View File

@ -0,0 +1,51 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef KSPACE_CLASS
KSpaceStyle(pppm/cg/omp,PPPMCGOMP)
#else
#ifndef LMP_PPPM_CG_OMP_H
#define LMP_PPPM_CG_OMP_H
#include "pppm_cg.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PPPMCGOMP : public PPPMCG, public ThrOMP {
public:
PPPMCGOMP(class LAMMPS *, int, char **);
virtual void compute(int, int);
virtual void setup();
virtual ~PPPMCGOMP () {};
protected:
virtual void allocate();
virtual void deallocate();
virtual void fieldforce();
virtual void make_rho();
void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &,
const FFT_SCALAR &, const FFT_SCALAR &);
// void compute_rho_coeff();
// void slabcorr(int);
};
}
#endif
#endif