forked from lijiext/lammps
393 lines
12 KiB
C++
393 lines
12 KiB
C++
/* -------------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
http://lammps.sandia.gov, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
/* ----------------------------------------------------------------------
|
|
OpenMP based threading support for LAMMPS
|
|
Contributing author: Axel Kohlmeyer (Temple U)
|
|
------------------------------------------------------------------------- */
|
|
|
|
#include "thr_omp.h"
|
|
|
|
#include "memory.h"
|
|
|
|
#include "atom.h"
|
|
#include "comm.h"
|
|
#include "force.h"
|
|
|
|
#include "pair.h"
|
|
#include "dihedral.h"
|
|
|
|
#if defined(_OPENMP)
|
|
#include <omp.h>
|
|
#endif
|
|
|
|
using namespace LAMMPS_NS;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr)
|
|
{
|
|
// initialize fixed size per thread storage
|
|
eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL;
|
|
virial_thr = NULL;
|
|
lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr");
|
|
lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr");
|
|
lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr");
|
|
lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr");
|
|
|
|
// variable size per thread, per atom storage
|
|
// the actually allocation happens via memory->grow() in ev_steup_thr()
|
|
maxeatom_thr = maxvatom_thr = 0;
|
|
eatom_thr = NULL;
|
|
vatom_thr = NULL;
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
ThrOMP::~ThrOMP()
|
|
{
|
|
lmp->memory->destroy(eng_vdwl_thr);
|
|
lmp->memory->destroy(eng_coul_thr);
|
|
lmp->memory->destroy(eng_bond_thr);
|
|
lmp->memory->destroy(virial_thr);
|
|
lmp->memory->destroy(eatom_thr);
|
|
lmp->memory->destroy(vatom_thr);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
void ThrOMP::ev_zero_acc_thr(int ntotal, int eflag_global, int vflag_global,
|
|
int eflag_atom, int vflag_atom, int nthreads)
|
|
{
|
|
int t,i;
|
|
|
|
for (t = 0; t < nthreads; ++t) {
|
|
|
|
if (eflag_global)
|
|
eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0;
|
|
|
|
if (vflag_global)
|
|
for (i = 0; i < 6; ++i)
|
|
virial_thr[t][i] = 0.0;
|
|
|
|
if (eflag_atom)
|
|
for (i = 0; i < ntotal; ++i)
|
|
eatom_thr[t][i] = 0.0;
|
|
|
|
if (vflag_atom)
|
|
for (i = 0; i < ntotal; ++i) {
|
|
vatom_thr[t][i][0] = 0.0;
|
|
vatom_thr[t][i][1] = 0.0;
|
|
vatom_thr[t][i][2] = 0.0;
|
|
vatom_thr[t][i][3] = 0.0;
|
|
vatom_thr[t][i][4] = 0.0;
|
|
vatom_thr[t][i][5] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
void ThrOMP::ev_setup_thr(Dihedral *dihed)
|
|
{
|
|
int nthreads = lmp->comm->nthreads;
|
|
|
|
// reallocate per-atom arrays if necessary
|
|
if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
|
|
maxeatom_thr = lmp->atom->nmax;
|
|
lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
|
|
}
|
|
if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
|
|
maxvatom_thr = lmp->atom->nmax;
|
|
lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
|
|
}
|
|
|
|
int ntotal = (lmp->force->newton_bond) ?
|
|
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
|
|
|
// zero per thread accumulators
|
|
ev_zero_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global,
|
|
dihed->eflag_atom, dihed->vflag_atom, nthreads);
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
void ThrOMP::ev_setup_thr(Pair *pair)
|
|
{
|
|
int nthreads = lmp->comm->nthreads;
|
|
|
|
// reallocate per-atom arrays if necessary
|
|
if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
|
|
maxeatom_thr = lmp->atom->nmax;
|
|
lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
|
|
}
|
|
if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
|
|
maxvatom_thr = lmp->atom->nmax;
|
|
lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
|
|
}
|
|
|
|
int ntotal = (lmp->force->newton) ?
|
|
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
|
|
|
// zero per thread accumulators
|
|
ev_zero_acc_thr(ntotal, pair->eflag_global, pair->vflag_global,
|
|
pair->eflag_atom, pair->vflag_atom, nthreads);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
reduce the per thread accumulated E/V data into the canonical accumulators.
|
|
------------------------------------------------------------------------- */
|
|
void ThrOMP::ev_reduce_thr(Dihedral *dihed)
|
|
{
|
|
int nthreads = lmp->comm->nthreads;
|
|
int ntotal = (lmp->force->newton_bond) ?
|
|
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
|
|
|
for (int n = 0; n < nthreads; ++n) {
|
|
dihed->energy += eng_bond_thr[n];
|
|
if (dihed->vflag_either) {
|
|
dihed->virial[0] += virial_thr[n][0];
|
|
dihed->virial[1] += virial_thr[n][1];
|
|
dihed->virial[2] += virial_thr[n][2];
|
|
dihed->virial[3] += virial_thr[n][3];
|
|
dihed->virial[4] += virial_thr[n][4];
|
|
dihed->virial[5] += virial_thr[n][5];
|
|
if (dihed->vflag_atom) {
|
|
for (int i = 0; i < ntotal; ++i) {
|
|
dihed->vatom[i][0] += vatom_thr[n][i][0];
|
|
dihed->vatom[i][1] += vatom_thr[n][i][1];
|
|
dihed->vatom[i][2] += vatom_thr[n][i][2];
|
|
dihed->vatom[i][3] += vatom_thr[n][i][3];
|
|
dihed->vatom[i][4] += vatom_thr[n][i][4];
|
|
dihed->vatom[i][5] += vatom_thr[n][i][5];
|
|
}
|
|
}
|
|
}
|
|
if (dihed->eflag_atom) {
|
|
for (int i = 0; i < ntotal; ++i) {
|
|
dihed->eatom[i] += eatom_thr[n][i];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
tally eng_vdwl and virial into per thread global and per-atom accumulators
|
|
need i < nlocal test since called by bond_quartic and dihedral_charmm
|
|
------------------------------------------------------------------------- */
|
|
|
|
void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
|
|
int newton_pair, double evdwl, double ecoul,
|
|
double fpair, double delx, double dely,
|
|
double delz, int tid)
|
|
{
|
|
double evdwlhalf,ecoulhalf,epairhalf,v[6];
|
|
|
|
if (pair->eflag_either) {
|
|
if (pair->eflag_global) {
|
|
if (newton_pair) {
|
|
eng_vdwl_thr[tid] += evdwl;
|
|
eng_coul_thr[tid] += ecoul;
|
|
} else {
|
|
evdwlhalf = 0.5*evdwl;
|
|
ecoulhalf = 0.5*ecoul;
|
|
if (i < nlocal) {
|
|
eng_vdwl_thr[tid] += evdwlhalf;
|
|
eng_coul_thr[tid] += ecoulhalf;
|
|
}
|
|
if (j < nlocal) {
|
|
eng_vdwl_thr[tid] += evdwlhalf;
|
|
eng_coul_thr[tid] += ecoulhalf;
|
|
}
|
|
}
|
|
}
|
|
if (pair->eflag_atom) {
|
|
epairhalf = 0.5 * (evdwl + ecoul);
|
|
if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
|
|
if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
|
|
}
|
|
}
|
|
|
|
if (pair->vflag_either) {
|
|
v[0] = delx*delx*fpair;
|
|
v[1] = dely*dely*fpair;
|
|
v[2] = delz*delz*fpair;
|
|
v[3] = delx*dely*fpair;
|
|
v[4] = delx*delz*fpair;
|
|
v[5] = dely*delz*fpair;
|
|
|
|
if (pair->vflag_global) {
|
|
if (newton_pair) {
|
|
virial_thr[tid][0] += v[0];
|
|
virial_thr[tid][1] += v[1];
|
|
virial_thr[tid][2] += v[2];
|
|
virial_thr[tid][3] += v[3];
|
|
virial_thr[tid][4] += v[4];
|
|
virial_thr[tid][5] += v[5];
|
|
} else {
|
|
if (i < nlocal) {
|
|
virial_thr[tid][0] += 0.5*v[0];
|
|
virial_thr[tid][1] += 0.5*v[1];
|
|
virial_thr[tid][2] += 0.5*v[2];
|
|
virial_thr[tid][3] += 0.5*v[3];
|
|
virial_thr[tid][4] += 0.5*v[4];
|
|
virial_thr[tid][5] += 0.5*v[5];
|
|
}
|
|
if (j < nlocal) {
|
|
virial_thr[tid][0] += 0.5*v[0];
|
|
virial_thr[tid][1] += 0.5*v[1];
|
|
virial_thr[tid][2] += 0.5*v[2];
|
|
virial_thr[tid][3] += 0.5*v[3];
|
|
virial_thr[tid][4] += 0.5*v[4];
|
|
virial_thr[tid][5] += 0.5*v[5];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pair->vflag_atom) {
|
|
if (newton_pair || i < nlocal) {
|
|
vatom_thr[tid][i][0] += 0.5*v[0];
|
|
vatom_thr[tid][i][1] += 0.5*v[1];
|
|
vatom_thr[tid][i][2] += 0.5*v[2];
|
|
vatom_thr[tid][i][3] += 0.5*v[3];
|
|
vatom_thr[tid][i][4] += 0.5*v[4];
|
|
vatom_thr[tid][i][5] += 0.5*v[5];
|
|
}
|
|
if (newton_pair || j < nlocal) {
|
|
vatom_thr[tid][j][0] += 0.5*v[0];
|
|
vatom_thr[tid][j][1] += 0.5*v[1];
|
|
vatom_thr[tid][j][2] += 0.5*v[2];
|
|
vatom_thr[tid][j][3] += 0.5*v[3];
|
|
vatom_thr[tid][j][4] += 0.5*v[4];
|
|
vatom_thr[tid][j][5] += 0.5*v[5];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
reduce the per thread accumulated E/V data into the canonical accumulators.
|
|
------------------------------------------------------------------------- */
|
|
void ThrOMP::ev_reduce_thr(Pair *pair)
|
|
{
|
|
const int nthreads = lmp->comm->nthreads;
|
|
const int ntotal = (lmp->force->newton) ?
|
|
(lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
|
|
|
|
for (int n = 0; n < nthreads; ++n) {
|
|
pair->eng_vdwl += eng_vdwl_thr[n];
|
|
pair->eng_coul += eng_coul_thr[n];
|
|
if (pair->vflag_either) {
|
|
pair->virial[0] += virial_thr[n][0];
|
|
pair->virial[1] += virial_thr[n][1];
|
|
pair->virial[2] += virial_thr[n][2];
|
|
pair->virial[3] += virial_thr[n][3];
|
|
pair->virial[4] += virial_thr[n][4];
|
|
pair->virial[5] += virial_thr[n][5];
|
|
if (pair->vflag_atom) {
|
|
for (int i = 0; i < ntotal; ++i) {
|
|
pair->vatom[i][0] += vatom_thr[n][i][0];
|
|
pair->vatom[i][1] += vatom_thr[n][i][1];
|
|
pair->vatom[i][2] += vatom_thr[n][i][2];
|
|
pair->vatom[i][3] += vatom_thr[n][i][3];
|
|
pair->vatom[i][4] += vatom_thr[n][i][4];
|
|
pair->vatom[i][5] += vatom_thr[n][i][5];
|
|
}
|
|
}
|
|
}
|
|
if (pair->eflag_atom) {
|
|
for (int i = 0; i < ntotal; ++i) {
|
|
pair->eatom[i] += eatom_thr[n][i];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
// set loop range thread id, and force array offset for threaded runs.
|
|
double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid,
|
|
int inum, int nall, int nthreads)
|
|
{
|
|
#if defined(_OPENMP)
|
|
if (nthreads > 1) {
|
|
tid = omp_get_thread_num();
|
|
|
|
// each thread works on a fixed chunk of atoms.
|
|
const int idelta = 1 + inum/nthreads;
|
|
ifrom = tid*idelta;
|
|
ito = ifrom + idelta;
|
|
if (ito > inum)
|
|
ito = inum;
|
|
|
|
return f + nall*tid;
|
|
|
|
} else {
|
|
#endif
|
|
tid = 0;
|
|
ifrom = 0;
|
|
ito = inum;
|
|
return f;
|
|
#if defined(_OPENMP)
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
// reduce per thread forces into the first part of the force
|
|
// array that is used for the non-threaded parts and reset
|
|
// the temporary storage to 0.0. this routine depends on the
|
|
// forces arrays stored in this order x1,y1,z1,x2,y2,z2,...
|
|
// we need to post a barrier to wait until all threads are done
|
|
// with computing forces.
|
|
void ThrOMP::force_reduce_thr(double *fall, int nall,
|
|
int nthreads, int tid)
|
|
{
|
|
#if defined(_OPENMP)
|
|
// NOOP in non-threaded execution.
|
|
if (nthreads == 1) return;
|
|
#pragma omp barrier
|
|
{
|
|
double *f;
|
|
const int idelta = 1 + nall/nthreads;
|
|
const int ifrom = 3*tid*idelta;
|
|
const int ito = 3*(((ifrom + idelta) > nall) ? nall : (ifrom + idelta));
|
|
|
|
for (int n = 1; n < nthreads; ++n) {
|
|
const int toffs = 3*n*nall;
|
|
f = fall + toffs;
|
|
for (int m = ifrom; m < ito; ++m) {
|
|
fall[m] += f[m];
|
|
f[m] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
// NOOP in non-threaded execution.
|
|
return;
|
|
#endif
|
|
}
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
double ThrOMP::memory_usage_thr()
|
|
{
|
|
const int nthreads=lmp->comm->nthreads;
|
|
|
|
double bytes = nthreads * (3 + 7) * sizeof(double);
|
|
bytes += nthreads * maxeatom_thr * sizeof(double);
|
|
bytes += nthreads * maxvatom_thr * 6 * sizeof(double);
|
|
return bytes;
|
|
}
|