mirror of https://github.com/lammps/lammps.git
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6816 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
62126a8f6d
commit
1d743442b3
|
@ -18,7 +18,7 @@ PACKAGE = asphere class2 colloid dipole gpu granular \
|
|||
shock srd xtc
|
||||
|
||||
PACKUSER = user-misc user-atc user-awpmd user-cg-cmm \
|
||||
user-cuda user-eff user-ewaldn user-reaxc
|
||||
user-cuda user-eff user-ewaldn user-omp user-reaxc
|
||||
|
||||
PACKALL = $(PACKAGE) $(PACKUSER)
|
||||
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
# Install/unInstall package files in LAMMPS
|
||||
# do not install child files if parent does not exist
|
||||
|
||||
if (test $1 = 1) then
|
||||
|
||||
# if (test -e ../pair_lj_cut_coul_long.cpp) then
|
||||
# cp pair_lj_cut_coul_long_omp.cpp ..
|
||||
# cp pair_lj_cut_coul_long_omp.h ..
|
||||
# fi
|
||||
|
||||
cp pair_lj_cut_omp.cpp ..
|
||||
|
||||
cp thr_omp.cpp ..
|
||||
|
||||
cp pair_lj_cut_omp.h ..
|
||||
|
||||
cp thr_omp.h ..
|
||||
|
||||
elif (test $1 = 0) then
|
||||
|
||||
# rm -f ../pair_lj_cut_coul_long_omp.cpp
|
||||
rm -f ../pair_lj_cut_omp.cpp
|
||||
|
||||
rm -f ../thr_omp.cpp
|
||||
|
||||
# rm -f ../pair_lj_cut_coul_long_omp.h
|
||||
rm -f ../pair_lj_cut_omp.h
|
||||
|
||||
rm -f ../thr_omp.h
|
||||
|
||||
fi
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#/bin/sh
|
||||
# Update package files in LAMMPS
|
||||
# copy package file to src if it doesn't exists or is different
|
||||
# do not copy gayberne files if non-GPU version does not exist
|
||||
for file in *_omp.cpp *_omp.h; do
|
||||
# let us see if the "rain man" can count the toothpicks...
|
||||
ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
|
||||
if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
|
||||
: # do check for those files.
|
||||
elif (test ! -e ../$ofile) then
|
||||
continue
|
||||
fi
|
||||
if (test ! -e ../$file) then
|
||||
echo " creating src/$file"
|
||||
cp $file ..
|
||||
elif ! cmp -s $file ../$file ; then
|
||||
echo " updating src/$file"
|
||||
cp $file ..
|
||||
fi
|
||||
done
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math.h"
|
||||
#include "pair_lj_cut_omp.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
|
||||
PairLJCut(lmp), ThrOMP(lmp, PAIR)
|
||||
{
|
||||
respa_enable = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCutOMP::compute(int eflag, int vflag)
|
||||
{
|
||||
if (eflag || vflag) {
|
||||
ev_setup(eflag,vflag);
|
||||
ev_setup_thr(this);
|
||||
} else evflag = vflag_fdotr = 0;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
const int nthreads = comm->nthreads;
|
||||
const int inum = list->inum;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(shared)
|
||||
#endif
|
||||
{
|
||||
int ifrom, ito, tid;
|
||||
double **f;
|
||||
|
||||
f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
|
||||
else eval<1,1,0>(f, ifrom, ito, tid);
|
||||
} else {
|
||||
if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
|
||||
else eval<1,0,0>(f, ifrom, ito, tid);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
|
||||
else eval<0,0,0>(f, ifrom, ito, tid);
|
||||
}
|
||||
|
||||
// reduce per thread forces into global force array.
|
||||
force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
|
||||
} // end of omp parallel region
|
||||
|
||||
// reduce per thread energy and virial, if requested.
|
||||
if (evflag) ev_reduce_thr(this);
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
|
||||
void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
|
||||
{
|
||||
int i,j,ii,jj,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,r2inv,r6inv,forcelj,factor_lj;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
evdwl = 0.0;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double *special_lj = force->special_lj;
|
||||
double fxtmp,fytmp,fztmp;
|
||||
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = iifrom; ii < iito; ++ii) {
|
||||
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
fxtmp=fytmp=fztmp=0.0;
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
r2inv = 1.0/rsq;
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
|
||||
fxtmp += delx*fpair;
|
||||
fytmp += dely*fpair;
|
||||
fztmp += delz*fpair;
|
||||
if (NEWTON_PAIR || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
|
||||
if (EFLAG) {
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
|
||||
- offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
|
||||
evdwl,0.0,fpair,delx,dely,delz,tid);
|
||||
}
|
||||
}
|
||||
f[i][0] += fxtmp;
|
||||
f[i][1] += fytmp;
|
||||
f[i][2] += fztmp;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double PairLJCutOMP::memory_usage()
|
||||
{
|
||||
double bytes = memory_usage_thr();
|
||||
bytes += PairLJCut::memory_usage();
|
||||
|
||||
return bytes;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/cut/omp,PairLJCutOMP)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CUT_OMP_H
|
||||
#define LMP_PAIR_LJ_CUT_OMP_H
|
||||
|
||||
#include "pair_lj_cut.h"
|
||||
#include "thr_omp.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCutOMP : public PairLJCut, public ThrOMP {
|
||||
|
||||
public:
|
||||
PairLJCutOMP(class LAMMPS *);
|
||||
|
||||
virtual void compute(int, int);
|
||||
virtual double memory_usage();
|
||||
|
||||
private:
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
|
||||
void eval(double **f, int ifrom, int ito, int tid);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,392 @@
|
|||
/* -------------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
OpenMP based threading support for LAMMPS
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "thr_omp.h"
|
||||
|
||||
#include "memory.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
|
||||
#include "pair.h"
|
||||
#include "dihedral.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr)
|
||||
{
|
||||
// initialize fixed size per thread storage
|
||||
eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL;
|
||||
virial_thr = NULL;
|
||||
lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr");
|
||||
lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr");
|
||||
lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr");
|
||||
lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr");
|
||||
|
||||
// variable size per thread, per atom storage
|
||||
// the actually allocation happens via memory->grow() in ev_steup_thr()
|
||||
maxeatom_thr = maxvatom_thr = 0;
|
||||
eatom_thr = NULL;
|
||||
vatom_thr = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ThrOMP::~ThrOMP()
|
||||
{
|
||||
lmp->memory->destroy(eng_vdwl_thr);
|
||||
lmp->memory->destroy(eng_coul_thr);
|
||||
lmp->memory->destroy(eng_bond_thr);
|
||||
lmp->memory->destroy(virial_thr);
|
||||
lmp->memory->destroy(eatom_thr);
|
||||
lmp->memory->destroy(vatom_thr);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ThrOMP::ev_zero_acc_thr(int ntotal, int eflag_global, int vflag_global,
|
||||
int eflag_atom, int vflag_atom, int nthreads)
|
||||
{
|
||||
int t,i;
|
||||
|
||||
for (t = 0; t < nthreads; ++t) {
|
||||
|
||||
if (eflag_global)
|
||||
eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0;
|
||||
|
||||
if (vflag_global)
|
||||
for (i = 0; i < 6; ++i)
|
||||
virial_thr[t][i] = 0.0;
|
||||
|
||||
if (eflag_atom)
|
||||
for (i = 0; i < ntotal; ++i)
|
||||
eatom_thr[t][i] = 0.0;
|
||||
|
||||
if (vflag_atom)
|
||||
for (i = 0; i < ntotal; ++i) {
|
||||
vatom_thr[t][i][0] = 0.0;
|
||||
vatom_thr[t][i][1] = 0.0;
|
||||
vatom_thr[t][i][2] = 0.0;
|
||||
vatom_thr[t][i][3] = 0.0;
|
||||
vatom_thr[t][i][4] = 0.0;
|
||||
vatom_thr[t][i][5] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ThrOMP::ev_setup_thr(Dihedral *dihed)
|
||||
{
|
||||
int nthreads = lmp->comm->nthreads;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
|
||||
maxeatom_thr = lmp->atom->nmax;
|
||||
lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
|
||||
}
|
||||
if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
|
||||
maxvatom_thr = lmp->atom->nmax;
|
||||
lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
|
||||
}
|
||||
|
||||
int ntotal = (lmp->force->newton_bond) ?
|
||||
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
||||
|
||||
// zero per thread accumulators
|
||||
ev_zero_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global,
|
||||
dihed->eflag_atom, dihed->vflag_atom, nthreads);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ThrOMP::ev_setup_thr(Pair *pair)
|
||||
{
|
||||
int nthreads = lmp->comm->nthreads;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
|
||||
maxeatom_thr = lmp->atom->nmax;
|
||||
lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
|
||||
}
|
||||
if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
|
||||
maxvatom_thr = lmp->atom->nmax;
|
||||
lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
|
||||
}
|
||||
|
||||
int ntotal = (lmp->force->newton) ?
|
||||
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
||||
|
||||
// zero per thread accumulators
|
||||
ev_zero_acc_thr(ntotal, pair->eflag_global, pair->vflag_global,
|
||||
pair->eflag_atom, pair->vflag_atom, nthreads);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
reduce the per thread accumulated E/V data into the canonical accumulators.
|
||||
------------------------------------------------------------------------- */
|
||||
void ThrOMP::ev_reduce_thr(Dihedral *dihed)
|
||||
{
|
||||
int nthreads = lmp->comm->nthreads;
|
||||
int ntotal = (lmp->force->newton_bond) ?
|
||||
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
||||
|
||||
for (int n = 0; n < nthreads; ++n) {
|
||||
dihed->energy += eng_bond_thr[n];
|
||||
if (dihed->vflag_either) {
|
||||
dihed->virial[0] += virial_thr[n][0];
|
||||
dihed->virial[1] += virial_thr[n][1];
|
||||
dihed->virial[2] += virial_thr[n][2];
|
||||
dihed->virial[3] += virial_thr[n][3];
|
||||
dihed->virial[4] += virial_thr[n][4];
|
||||
dihed->virial[5] += virial_thr[n][5];
|
||||
if (dihed->vflag_atom) {
|
||||
for (int i = 0; i < ntotal; ++i) {
|
||||
dihed->vatom[i][0] += vatom_thr[n][i][0];
|
||||
dihed->vatom[i][1] += vatom_thr[n][i][1];
|
||||
dihed->vatom[i][2] += vatom_thr[n][i][2];
|
||||
dihed->vatom[i][3] += vatom_thr[n][i][3];
|
||||
dihed->vatom[i][4] += vatom_thr[n][i][4];
|
||||
dihed->vatom[i][5] += vatom_thr[n][i][5];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dihed->eflag_atom) {
|
||||
for (int i = 0; i < ntotal; ++i) {
|
||||
dihed->eatom[i] += eatom_thr[n][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
tally eng_vdwl and virial into per thread global and per-atom accumulators
|
||||
need i < nlocal test since called by bond_quartic and dihedral_charmm
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
|
||||
int newton_pair, double evdwl, double ecoul,
|
||||
double fpair, double delx, double dely,
|
||||
double delz, int tid)
|
||||
{
|
||||
double evdwlhalf,ecoulhalf,epairhalf,v[6];
|
||||
|
||||
if (pair->eflag_either) {
|
||||
if (pair->eflag_global) {
|
||||
if (newton_pair) {
|
||||
eng_vdwl_thr[tid] += evdwl;
|
||||
eng_coul_thr[tid] += ecoul;
|
||||
} else {
|
||||
evdwlhalf = 0.5*evdwl;
|
||||
ecoulhalf = 0.5*ecoul;
|
||||
if (i < nlocal) {
|
||||
eng_vdwl_thr[tid] += evdwlhalf;
|
||||
eng_coul_thr[tid] += ecoulhalf;
|
||||
}
|
||||
if (j < nlocal) {
|
||||
eng_vdwl_thr[tid] += evdwlhalf;
|
||||
eng_coul_thr[tid] += ecoulhalf;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pair->eflag_atom) {
|
||||
epairhalf = 0.5 * (evdwl + ecoul);
|
||||
if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
|
||||
if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (pair->vflag_either) {
|
||||
v[0] = delx*delx*fpair;
|
||||
v[1] = dely*dely*fpair;
|
||||
v[2] = delz*delz*fpair;
|
||||
v[3] = delx*dely*fpair;
|
||||
v[4] = delx*delz*fpair;
|
||||
v[5] = dely*delz*fpair;
|
||||
|
||||
if (pair->vflag_global) {
|
||||
if (newton_pair) {
|
||||
virial_thr[tid][0] += v[0];
|
||||
virial_thr[tid][1] += v[1];
|
||||
virial_thr[tid][2] += v[2];
|
||||
virial_thr[tid][3] += v[3];
|
||||
virial_thr[tid][4] += v[4];
|
||||
virial_thr[tid][5] += v[5];
|
||||
} else {
|
||||
if (i < nlocal) {
|
||||
virial_thr[tid][0] += 0.5*v[0];
|
||||
virial_thr[tid][1] += 0.5*v[1];
|
||||
virial_thr[tid][2] += 0.5*v[2];
|
||||
virial_thr[tid][3] += 0.5*v[3];
|
||||
virial_thr[tid][4] += 0.5*v[4];
|
||||
virial_thr[tid][5] += 0.5*v[5];
|
||||
}
|
||||
if (j < nlocal) {
|
||||
virial_thr[tid][0] += 0.5*v[0];
|
||||
virial_thr[tid][1] += 0.5*v[1];
|
||||
virial_thr[tid][2] += 0.5*v[2];
|
||||
virial_thr[tid][3] += 0.5*v[3];
|
||||
virial_thr[tid][4] += 0.5*v[4];
|
||||
virial_thr[tid][5] += 0.5*v[5];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pair->vflag_atom) {
|
||||
if (newton_pair || i < nlocal) {
|
||||
vatom_thr[tid][i][0] += 0.5*v[0];
|
||||
vatom_thr[tid][i][1] += 0.5*v[1];
|
||||
vatom_thr[tid][i][2] += 0.5*v[2];
|
||||
vatom_thr[tid][i][3] += 0.5*v[3];
|
||||
vatom_thr[tid][i][4] += 0.5*v[4];
|
||||
vatom_thr[tid][i][5] += 0.5*v[5];
|
||||
}
|
||||
if (newton_pair || j < nlocal) {
|
||||
vatom_thr[tid][j][0] += 0.5*v[0];
|
||||
vatom_thr[tid][j][1] += 0.5*v[1];
|
||||
vatom_thr[tid][j][2] += 0.5*v[2];
|
||||
vatom_thr[tid][j][3] += 0.5*v[3];
|
||||
vatom_thr[tid][j][4] += 0.5*v[4];
|
||||
vatom_thr[tid][j][5] += 0.5*v[5];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
reduce the per thread accumulated E/V data into the canonical accumulators.
|
||||
------------------------------------------------------------------------- */
|
||||
void ThrOMP::ev_reduce_thr(Pair *pair)
|
||||
{
|
||||
const int nthreads = lmp->comm->nthreads;
|
||||
const int ntotal = (lmp->force->newton) ?
|
||||
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
||||
|
||||
for (int n = 0; n < nthreads; ++n) {
|
||||
pair->eng_vdwl += eng_vdwl_thr[n];
|
||||
pair->eng_coul += eng_coul_thr[n];
|
||||
if (pair->vflag_either) {
|
||||
pair->virial[0] += virial_thr[n][0];
|
||||
pair->virial[1] += virial_thr[n][1];
|
||||
pair->virial[2] += virial_thr[n][2];
|
||||
pair->virial[3] += virial_thr[n][3];
|
||||
pair->virial[4] += virial_thr[n][4];
|
||||
pair->virial[5] += virial_thr[n][5];
|
||||
if (pair->vflag_atom) {
|
||||
for (int i = 0; i < ntotal; ++i) {
|
||||
pair->vatom[i][0] += vatom_thr[n][i][0];
|
||||
pair->vatom[i][1] += vatom_thr[n][i][1];
|
||||
pair->vatom[i][2] += vatom_thr[n][i][2];
|
||||
pair->vatom[i][3] += vatom_thr[n][i][3];
|
||||
pair->vatom[i][4] += vatom_thr[n][i][4];
|
||||
pair->vatom[i][5] += vatom_thr[n][i][5];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (pair->eflag_atom) {
|
||||
for (int i = 0; i < ntotal; ++i) {
|
||||
pair->eatom[i] += eatom_thr[n][i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
// set loop range thread id, and force array offset for threaded runs.
|
||||
double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid,
|
||||
int inum, int nall, int nthreads)
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
if (nthreads > 1) {
|
||||
tid = omp_get_thread_num();
|
||||
|
||||
// each thread works on a fixed chunk of atoms.
|
||||
const int idelta = 1 + inum/nthreads;
|
||||
ifrom = tid*idelta;
|
||||
ito = ifrom + idelta;
|
||||
if (ito > inum)
|
||||
ito = inum;
|
||||
|
||||
return f + nall*tid;
|
||||
|
||||
} else {
|
||||
#endif
|
||||
tid = 0;
|
||||
ifrom = 0;
|
||||
ito = inum;
|
||||
return f;
|
||||
#if defined(_OPENMP)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
// reduce per thread forces into the first part of the force
|
||||
// array that is used for the non-threaded parts and reset
|
||||
// the temporary storage to 0.0. this routine depends on the
|
||||
// forces arrays stored in this order x1,y1,z1,x2,y2,z2,...
|
||||
// we need to post a barrier to wait until all threads are done
|
||||
// with computing forces.
|
||||
void ThrOMP::force_reduce_thr(double *fall, int nall,
|
||||
int nthreads, int tid)
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
// NOOP in non-threaded execution.
|
||||
if (nthreads == 1) return;
|
||||
#pragma omp barrier
|
||||
{
|
||||
double *f;
|
||||
const int idelta = 1 + nall/nthreads;
|
||||
const int ifrom = 3*tid*idelta;
|
||||
const int ito = 3*(((ifrom + idelta) > nall) ? nall : (ifrom + idelta));
|
||||
|
||||
for (int n = 1; n < nthreads; ++n) {
|
||||
const int toffs = 3*n*nall;
|
||||
f = fall + toffs;
|
||||
for (int m = ifrom; m < ito; ++m) {
|
||||
fall[m] += f[m];
|
||||
f[m] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// NOOP in non-threaded execution.
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double ThrOMP::memory_usage_thr()
|
||||
{
|
||||
const int nthreads=lmp->comm->nthreads;
|
||||
|
||||
double bytes = nthreads * (3 + 7) * sizeof(double);
|
||||
bytes += nthreads * maxeatom_thr * sizeof(double);
|
||||
bytes += nthreads * maxvatom_thr * 6 * sizeof(double);
|
||||
return bytes;
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/* -*- c++ -*- -------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Axel Kohlmeyer (Temple U)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_THR_OMP_H
|
||||
#define LMP_THR_OMP_H
|
||||
|
||||
#include "pointers.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// forward declarations
|
||||
class Pair;
|
||||
class Dihedral;
|
||||
|
||||
class ThrOMP {
|
||||
|
||||
protected:
|
||||
const int thr_style;
|
||||
enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE};
|
||||
|
||||
LAMMPS *lmp; // reference to base lammps object.
|
||||
|
||||
double *eng_vdwl_thr; // per thread accumulated vdw energy
|
||||
double *eng_coul_thr; // per thread accumulated coulomb energies
|
||||
double *eng_bond_thr; // per thread accumlated bonded energy
|
||||
|
||||
double **virial_thr; // per thread virial
|
||||
double **eatom_thr; // per thread per atom energy
|
||||
double ***vatom_thr; // per thread per atom virial
|
||||
|
||||
int maxeatom_thr, maxvatom_thr;
|
||||
|
||||
public:
|
||||
ThrOMP(LAMMPS *, int);
|
||||
virtual ~ThrOMP();
|
||||
|
||||
double memory_usage_thr();
|
||||
|
||||
protected:
|
||||
// extra ev_tally work for threaded styles
|
||||
void ev_setup_thr(Pair *);
|
||||
void ev_setup_thr(Dihedral *);
|
||||
|
||||
void ev_reduce_thr(Pair *);
|
||||
void ev_reduce_thr(Dihedral *);
|
||||
|
||||
private:
|
||||
// internal method to be used by multiple ev_setup_thr() methods
|
||||
void ev_zero_acc_thr(int, int, int, int, int, int);
|
||||
|
||||
protected:
|
||||
// threading adapted versions of the ev_tally infrastructure
|
||||
void ev_tally_thr(Pair *, int, int, int, int, double, double,
|
||||
double, double, double, double, int);
|
||||
|
||||
protected:
|
||||
// set loop range, thread id, and force array offset for threaded runs.
|
||||
double **loop_setup_thr(double **, int &, int &, int &, int, int, int);
|
||||
|
||||
// reduce per thread forces into the first part of the force array
|
||||
void force_reduce_thr(double *, int, int, int);
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
|
@ -20,6 +20,8 @@
|
|||
namespace LAMMPS_NS {
|
||||
|
||||
class Dihedral : protected Pointers {
|
||||
friend class ThrOMP;
|
||||
|
||||
public:
|
||||
int allocated;
|
||||
int *setflag;
|
||||
|
|
|
@ -22,6 +22,7 @@ class Pair : protected Pointers {
|
|||
friend class BondQuartic;
|
||||
friend class DihedralCharmm;
|
||||
friend class FixGPU;
|
||||
friend class ThrOMP;
|
||||
|
||||
public:
|
||||
double eng_vdwl,eng_coul; // accumulated energies
|
||||
|
|
Loading…
Reference in New Issue