refactoring of USER-TALLY computes to handle sparse and hybrid systems

with sparse and hybrid systems, Pair::ev_tally() may not be called on
every processor and thus the computes in USER-TALLY may hang during
reverse communication because of the error->all() call after checking
whether callback from Pair::ev_tally() has been called at least once.
To address this cleanly, a second callback function needs to be added,
which is run during Pair::ev_setup() and will now handle all memory
re-allocation and clearing of accumulators, just like it is done for
regular tallied data.
This commit is contained in:
Axel Kohlmeyer 2017-06-12 22:12:12 -04:00
parent e530ba46f4
commit b01143102d
12 changed files with 147 additions and 176 deletions

View File

@ -44,8 +44,7 @@ ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) :
extscalar = 1;
peflag = 1; // we need Pair::ev_tally() to be run
did_compute = 0;
invoked_peratom = invoked_scalar = -1;
did_setup = invoked_peratom = invoked_scalar = -1;
nmax = -1;
fatom = NULL;
vector = new double[size_peratom_cols];
@ -77,15 +76,17 @@ void ComputeForceTally::init()
|| force->improper || force->kspace)
error->warning(FLERR,"Compute force/tally only called from pair style");
}
did_compute = -1;
did_setup = -1;
}
/* ---------------------------------------------------------------------- */
void ComputeForceTally::setup()
void ComputeForceTally::pair_setup_callback(int, int)
{
const int ntotal = atom->nlocal + atom->nghost;
// grow per-atom storage, if needed
if (atom->nmax > nmax) {
memory->destroy(fatom);
nmax = atom->nmax;
@ -93,7 +94,7 @@ void ComputeForceTally::setup()
array_atom = fatom;
}
// clear storage as needed
// clear storage
for (int i=0; i < ntotal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
@ -101,10 +102,11 @@ void ComputeForceTally::setup()
for (int i=0; i < size_peratom_cols; ++i)
vector[i] = ftotal[i] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton,
double, double, double fpair,
double dx, double dy, double dz)
@ -112,37 +114,6 @@ void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton
const int ntotal = atom->nlocal + atom->nghost;
const int * const mask = atom->mask;
// do setup work that needs to be done only once per timestep
if (did_compute != update->ntimestep) {
did_compute = update->ntimestep;
// grow local force array if necessary
// needs to be atom->nmax in length
if (atom->nmax > nmax) {
memory->destroy(fatom);
nmax = atom->nmax;
memory->create(fatom,nmax,size_peratom_cols,"force/tally:fatom");
array_atom = fatom;
}
// clear storage as needed
if (newton) {
for (int i=0; i < ntotal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
fatom[i][j] = 0.0;
} else {
for (int i=0; i < atom->nlocal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
fatom[i][j] = 0.0;
}
for (int i=0; i < size_peratom_cols; ++i)
vector[i] = ftotal[i] = 0.0;
}
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) {
@ -205,7 +176,8 @@ void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputeForceTally::compute_scalar()
{
invoked_scalar = update->ntimestep;
if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar))
if ((did_setup != invoked_scalar)
|| (update->eflag_global != invoked_scalar))
error->all(FLERR,"Energy was not tallied on needed timestep");
// sum accumulated forces across procs
@ -221,7 +193,8 @@ double ComputeForceTally::compute_scalar()
void ComputeForceTally::compute_peratom()
{
invoked_peratom = update->ntimestep;
if (((atom->nlocal > 0) && (did_compute != invoked_peratom)) || (update->eflag_global != invoked_peratom))
if ((did_setup != invoked_peratom)
|| (update->eflag_global != invoked_peratom))
error->all(FLERR,"Energy was not tallied on needed timestep");
// collect contributions from ghost atoms
@ -229,6 +202,7 @@ void ComputeForceTally::compute_peratom()
if (force->newton_pair) {
comm->reverse_comm_compute(this);
// clear out ghost atom data after it has been collected to local atoms
const int nall = atom->nlocal + atom->nghost;
for (int i = atom->nlocal; i < nall; ++i)
for (int j = 0; j < size_peratom_cols; ++j)

View File

@ -31,7 +31,6 @@ class ComputeForceTally : public Compute {
virtual ~ComputeForceTally();
void init();
void setup();
double compute_scalar();
void compute_peratom();
@ -40,12 +39,12 @@ class ComputeForceTally : public Compute {
void unpack_reverse_comm(int, int *, double *);
double memory_usage();
void pair_setup_callback(int, int);
void pair_tally_callback(int, int, int, int,
double, double, double,
double, double, double);
private:
bigint did_compute;
bigint did_setup;
int nmax,igroup2,groupbit2;
double **fatom;
double ftotal[3];

View File

@ -43,7 +43,7 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) :
size_vector = 6;
peflag = 1; // we need Pair::ev_tally() to be run
did_compute = 0;
did_setup = 0;
invoked_peratom = invoked_scalar = -1;
nmax = -1;
stress = NULL;
@ -80,9 +80,41 @@ void ComputeHeatFluxTally::init()
|| force->improper || force->kspace)
error->warning(FLERR,"Compute heat/flux/tally only called from pair style");
}
did_compute = -1;
did_setup = -1;
}
/* ---------------------------------------------------------------------- */
void ComputeHeatFluxTally::pair_setup_callback(int, int)
{
const int ntotal = atom->nlocal + atom->nghost;
// grow per-atom storage, if needed
if (atom->nmax > nmax) {
memory->destroy(stress);
memory->destroy(eatom);
nmax = atom->nmax;
memory->create(stress,nmax,6,"heat/flux/tally:stress");
memory->create(eatom,nmax,"heat/flux/tally:eatom");
}
// clear storage
for (int i=0; i < ntotal; ++i) {
eatom[i] = 0.0;
stress[i][0] = 0.0;
stress[i][1] = 0.0;
stress[i][2] = 0.0;
stress[i][3] = 0.0;
stress[i][4] = 0.0;
stress[i][5] = 0.0;
}
for (int i=0; i < size_vector; ++i)
vector[i] = heatj[i] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton,
@ -92,52 +124,6 @@ void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int new
const int ntotal = atom->nlocal + atom->nghost;
const int * const mask = atom->mask;
// do setup work that needs to be done only once per timestep
if (did_compute != update->ntimestep) {
did_compute = update->ntimestep;
// grow local stress and eatom arrays if necessary
// needs to be atom->nmax in length
if (atom->nmax > nmax) {
memory->destroy(stress);
nmax = atom->nmax;
memory->create(stress,nmax,6,"heat/flux/tally:stress");
memory->destroy(eatom);
nmax = atom->nmax;
memory->create(eatom,nmax,"heat/flux/tally:eatom");
}
// clear storage as needed
if (newton) {
for (int i=0; i < ntotal; ++i) {
eatom[i] = 0.0;
stress[i][0] = 0.0;
stress[i][1] = 0.0;
stress[i][2] = 0.0;
stress[i][3] = 0.0;
stress[i][4] = 0.0;
stress[i][5] = 0.0;
}
} else {
for (int i=0; i < atom->nlocal; ++i) {
eatom[i] = 0.0;
stress[i][0] = 0.0;
stress[i][1] = 0.0;
stress[i][2] = 0.0;
stress[i][3] = 0.0;
stress[i][4] = 0.0;
stress[i][5] = 0.0;
}
}
for (int i=0; i < size_vector; ++i)
vector[i] = heatj[i] = 0.0;
}
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) {
@ -215,7 +201,7 @@ void ComputeHeatFluxTally::unpack_reverse_comm(int n, int *list, double *buf)
void ComputeHeatFluxTally::compute_vector()
{
invoked_vector = update->ntimestep;
if ((did_compute != invoked_vector) || (update->eflag_global != invoked_vector))
if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
error->all(FLERR,"Energy was not tallied on needed timestep");
// collect contributions from ghost atoms

View File

@ -38,12 +38,13 @@ class ComputeHeatFluxTally : public Compute {
void unpack_reverse_comm(int, int *, double *);
double memory_usage();
void pair_setup_callback(int, int);
void pair_tally_callback(int, int, int, int,
double, double, double,
double, double, double);
private:
bigint did_compute;
bigint did_setup;
int nmax,igroup2,groupbit2;
double **stress,*eatom;
double *heatj;

View File

@ -42,7 +42,7 @@ ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) :
extvector = 1;
peflag = 1; // we need Pair::ev_tally() to be run
did_compute = invoked_vector = -1;
did_setup = invoked_vector = -1;
vector = new double[size_vector];
}
@ -74,9 +74,16 @@ void ComputePEMolTally::init()
|| force->improper || force->kspace)
error->warning(FLERR,"Compute pe/mol/tally only called from pair style");
}
did_compute = -1;
did_setup = -1;
}
/* ---------------------------------------------------------------------- */
void ComputePEMolTally::pair_setup_callback(int, int)
{
etotal[0] = etotal[1] = etotal[2] = etotal[3] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton,
@ -86,14 +93,6 @@ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton
const int * const mask = atom->mask;
const tagint * const molid = atom->molecule;
// do setup work that needs to be done only once per timestep
if (did_compute != update->ntimestep) {
did_compute = update->ntimestep;
etotal[0] = etotal[1] = etotal[2] = etotal[3] = 0.0;
}
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit)) ){
@ -120,7 +119,7 @@ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton
void ComputePEMolTally::compute_vector()
{
invoked_vector = update->ntimestep;
if ((did_compute != invoked_vector) || (update->eflag_global != invoked_vector))
if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector))
error->all(FLERR,"Energy was not tallied on needed timestep");
// sum accumulated energies across procs

View File

@ -33,12 +33,13 @@ class ComputePEMolTally : public Compute {
void init();
void compute_vector();
void pair_setup_callback(int, int);
void pair_tally_callback(int, int, int, int,
double, double, double,
double, double, double);
private:
bigint did_compute;
bigint did_setup;
int igroup2,groupbit2;
double etotal[4];
};

View File

@ -44,7 +44,7 @@ ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) :
extscalar = 1;
peflag = 1; // we need Pair::ev_tally() to be run
did_compute = invoked_peratom = invoked_scalar = -1;
did_setup = invoked_peratom = invoked_scalar = -1;
nmax = -1;
eatom = NULL;
vector = new double[size_peratom_cols];
@ -76,9 +76,33 @@ void ComputePETally::init()
|| force->improper || force->kspace)
error->warning(FLERR,"Compute pe/tally only called from pair style");
}
did_compute = -1;
did_setup = -1;
}
/* ---------------------------------------------------------------------- */
void ComputePETally::pair_setup_callback(int, int)
{
const int ntotal = atom->nlocal + atom->nghost;
// grow per-atom storage, if needed
if (atom->nmax > nmax) {
memory->destroy(eatom);
nmax = atom->nmax;
memory->create(eatom,nmax,size_peratom_cols,"pe/tally:eatom");
array_atom = eatom;
}
// clear storage
for (int i=0; i < ntotal; ++i)
eatom[i][0] = eatom[i][1] = 0.0;
vector[0] = etotal[0] = vector[1] = etotal[1] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton,
@ -88,36 +112,8 @@ void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton,
const int ntotal = atom->nlocal + atom->nghost;
const int * const mask = atom->mask;
// do setup work that needs to be done only once per timestep
if (did_compute != update->ntimestep) {
did_compute = update->ntimestep;
// grow local eatom array if necessary
// needs to be atom->nmax in length
if (atom->nmax > nmax) {
memory->destroy(eatom);
nmax = atom->nmax;
memory->create(eatom,nmax,size_peratom_cols,"pe/tally:eatom");
array_atom = eatom;
}
// clear storage as needed
if (newton) {
for (int i=0; i < ntotal; ++i)
eatom[i][0] = eatom[i][1] = 0.0;
} else {
for (int i=0; i < atom->nlocal; ++i)
eatom[i][0] = eatom[i][1] = 0.0;
}
vector[0] = etotal[0] = vector[1] = etotal[1] = 0.0;
}
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit)) ){
|| ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) {
evdwl *= 0.5; ecoul *= 0.5;
if (newton || i < nlocal) {
@ -165,7 +161,8 @@ void ComputePETally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputePETally::compute_scalar()
{
invoked_scalar = update->ntimestep;
if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar))
if ((did_setup != invoked_scalar)
|| (update->eflag_global != invoked_scalar))
error->all(FLERR,"Energy was not tallied on needed timestep");
// sum accumulated energies across procs
@ -181,13 +178,16 @@ double ComputePETally::compute_scalar()
void ComputePETally::compute_peratom()
{
invoked_peratom = update->ntimestep;
if ((did_compute != invoked_peratom) || (update->eflag_global != invoked_peratom))
if ((did_setup != invoked_peratom)
|| (update->eflag_global != invoked_peratom))
error->all(FLERR,"Energy was not tallied on needed timestep");
// collect contributions from ghost atoms
if (force->newton_pair) {
comm->reverse_comm_compute(this);
// clear out ghost atom data after it has been collected to local atoms
const int nall = atom->nlocal + atom->nghost;
for (int i = atom->nlocal; i < nall; ++i)
eatom[i][0] = eatom[i][1] = 0.0;

View File

@ -39,12 +39,13 @@ class ComputePETally : public Compute {
void unpack_reverse_comm(int, int *, double *);
double memory_usage();
void pair_setup_callback(int, int);
void pair_tally_callback(int, int, int, int,
double, double, double,
double, double, double);
private:
bigint did_compute;
bigint did_setup;
int nmax,igroup2,groupbit2;
double **eatom;
double etotal[2];

View File

@ -44,11 +44,11 @@ ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) :
extscalar = 0;
peflag = 1; // we need Pair::ev_tally() to be run
did_compute = 0;
invoked_peratom = invoked_scalar = -1;
did_setup = invoked_peratom = invoked_scalar = -1;
nmax = -1;
stress = NULL;
vector = new double[size_peratom_cols];
virial = new double[size_peratom_cols];
}
/* ---------------------------------------------------------------------- */
@ -57,6 +57,7 @@ ComputeStressTally::~ComputeStressTally()
{
if (force && force->pair) force->pair->del_tally_callback(this);
memory->destroy(stress);
delete[] virial;
delete[] vector;
}
@ -77,9 +78,35 @@ void ComputeStressTally::init()
|| force->improper || force->kspace)
error->warning(FLERR,"Compute stress/tally only called from pair style");
}
did_compute = -1;
did_setup = -1;
}
/* ---------------------------------------------------------------------- */
void ComputeStressTally::pair_setup_callback(int, int)
{
const int ntotal = atom->nlocal + atom->nghost;
// grow per-atom storage, if needed
if (atom->nmax > nmax) {
memory->destroy(stress);
nmax = atom->nmax;
memory->create(stress,nmax,size_peratom_cols,"stress/tally:stress");
array_atom = stress;
}
// clear storage
for (int i=0; i < ntotal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
stress[i][j] = 0.0;
for (int i=0; i < size_peratom_cols; ++i)
vector[i] = virial[i] = 0.0;
did_setup = update->ntimestep;
}
/* ---------------------------------------------------------------------- */
void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton,
@ -89,37 +116,6 @@ void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newto
const int ntotal = atom->nlocal + atom->nghost;
const int * const mask = atom->mask;
// do setup work that needs to be done only once per timestep
if (did_compute != update->ntimestep) {
did_compute = update->ntimestep;
// grow local stress array if necessary
// needs to be atom->nmax in length
if (atom->nmax > nmax) {
memory->destroy(stress);
nmax = atom->nmax;
memory->create(stress,nmax,size_peratom_cols,"stress/tally:stress");
array_atom = stress;
}
// clear storage as needed
if (newton) {
for (int i=0; i < ntotal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
stress[i][j] = 0.0;
} else {
for (int i=0; i < atom->nlocal; ++i)
for (int j=0; j < size_peratom_cols; ++j)
stress[i][j] = 0.0;
}
for (int i=0; i < size_peratom_cols; ++i)
vector[i] = virial[i] = 0.0;
}
if ( ((mask[i] & groupbit) && (mask[j] & groupbit2))
|| ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) {
@ -192,7 +188,8 @@ void ComputeStressTally::unpack_reverse_comm(int n, int *list, double *buf)
double ComputeStressTally::compute_scalar()
{
invoked_scalar = update->ntimestep;
if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar))
if ((did_setup != invoked_scalar)
|| (update->eflag_global != invoked_scalar))
error->all(FLERR,"Energy was not tallied on needed timestep");
// sum accumulated forces across procs
@ -212,7 +209,8 @@ double ComputeStressTally::compute_scalar()
void ComputeStressTally::compute_peratom()
{
invoked_peratom = update->ntimestep;
if ((did_compute != invoked_peratom) || (update->eflag_global != invoked_peratom))
if ((did_setup != invoked_peratom)
|| (update->eflag_global != invoked_peratom))
error->all(FLERR,"Energy was not tallied on needed timestep");
// collect contributions from ghost atoms

View File

@ -39,15 +39,16 @@ class ComputeStressTally : public Compute {
void unpack_reverse_comm(int, int *, double *);
double memory_usage();
void pair_setup_callback(int, int);
void pair_tally_callback(int, int, int, int,
double, double, double,
double, double, double);
private:
bigint did_compute;
bigint did_setup;
int nmax,igroup2,groupbit2;
double **stress;
double virial[6];
double *virial;
};
}

View File

@ -135,6 +135,7 @@ class Compute : protected Pointers {
virtual double memory_usage() {return 0.0;}
virtual void pair_setup_callback(int, int) {}
virtual void pair_tally_callback(int, int, int, int,
double, double, double,
double, double, double) {}

View File

@ -814,6 +814,16 @@ void Pair::ev_setup(int eflag, int vflag, int alloc)
if (vflag_atom == 0) vflag_either = 0;
if (vflag_either == 0 && eflag_either == 0) evflag = 0;
} else vflag_fdotr = 0;
// run ev_setup option for USER-TALLY computes
if (num_tally_compute > 0) {
for (int k=0; k < num_tally_compute; ++k) {
Compute *c = list_tally_compute[k];
c->pair_setup_callback(eflag,vflag);
}
}
}
/* ----------------------------------------------------------------------