lammps/src/GPU/fix_nh_gpu.cpp

/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://lammps.sandia.gov/, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

/* ----------------------------------------------------------------------
   Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */

#include "fix_nh_gpu.h"

#include "atom.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "gpu_extra.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "update.h"

#include <cstring>
#include <cmath>

using namespace LAMMPS_NS;
using namespace FixConst;

#define TILTMAX 1.5

enum{NOBIAS,BIAS};
enum{ISO,ANISO,TRICLINIC};

typedef struct { double x,y,z; } dbl3_t;

/* ----------------------------------------------------------------------
   NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
 ---------------------------------------------------------------------- */

FixNHGPU::FixNHGPU(LAMMPS *lmp, int narg, char **arg) :
  FixNH(lmp, narg, arg)
{
  _dtfm = 0;
  _nlocal3 = 0;
  _nlocal_max = 0;
}

/* ---------------------------------------------------------------------- */

FixNHGPU::~FixNHGPU()
{
}

/* ---------------------------------------------------------------------- */

void FixNHGPU::setup(int vflag)
{
  FixNH::setup(vflag);
  if (utils::strmatch(update->integrate_style,"^respa"))
    _respa_on = 1;
  else
    _respa_on = 0;
  reset_dt();
}

/* ----------------------------------------------------------------------
   change box size
   remap all atoms or dilate group atoms depending on allremap flag
   if rigid bodies exist, scale rigid body centers-of-mass
------------------------------------------------------------------------- */

void FixNHGPU::remap()
{
  if (_respa_on) { FixNH::remap(); return; }

  double oldlo,oldhi;
  double expfac;

  dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
  int *mask = atom->mask;
  int nlocal = atom->nlocal;
  double *h = domain->h;

  // omega is not used, except for book-keeping

  for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i];

  // convert pertinent atoms and rigid bodies to lamda coords
  const double hi0 = domain->h_inv[0];
  const double hi1 = domain->h_inv[1];
  const double hi2 = domain->h_inv[2];
  const double hi3 = domain->h_inv[3];
  const double hi4 = domain->h_inv[4];
  const double hi5 = domain->h_inv[5];
  const double b0 = domain->boxlo[0];
  const double b1 = domain->boxlo[1];
  const double b2 = domain->boxlo[2];

  if (allremap) {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < nlocal; i++) {
      const double d0 = x[i].x - b0;
      const double d1 = x[i].y - b1;
      const double d2 = x[i].z - b2;
      x[i].x = hi0*d0 + hi5*d1 + hi4*d2;
      x[i].y = hi1*d1 + hi3*d2;
      x[i].z = hi2*d2;
    }
  } else {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < nlocal; i++) {
      if (mask[i] & dilate_group_bit) {
        const double d0 = x[i].x - b0;
        const double d1 = x[i].y - b1;
        const double d2 = x[i].z - b2;
        x[i].x = hi0*d0 + hi5*d1 + hi4*d2;
        x[i].y = hi1*d1 + hi3*d2;
        x[i].z = hi2*d2;
      }
    }
  }

  if (nrigid)
    for (int i = 0; i < nrigid; i++)
      modify->fix[rfix[i]]->deform(0);

  // reset global and local box to new size/shape

  // this operation corresponds to applying the
  // translate and scale operations
  // corresponding to the solution of the following ODE:
  //
  // h_dot = omega_dot * h
  //
  // where h_dot, omega_dot and h are all upper-triangular
  // 3x3 tensors. In Voigt notation, the elements of the
  // RHS product tensor are:
  // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1]
  //
  // Ordering of operations preserves time symmetry.

  double dto2 = dto/2.0;
  double dto4 = dto/4.0;
  double dto8 = dto/8.0;

  // off-diagonal components, first half

  if (pstyle == TRICLINIC) {

    if (p_flag[4]) {
      expfac = exp(dto8*omega_dot[0]);
      h[4] *= expfac;
      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
      h[4] *= expfac;
    }

    if (p_flag[3]) {
      expfac = exp(dto4*omega_dot[1]);
      h[3] *= expfac;
      h[3] += dto2*(omega_dot[3]*h[2]);
      h[3] *= expfac;
    }

    if (p_flag[5]) {
      expfac = exp(dto4*omega_dot[0]);
      h[5] *= expfac;
      h[5] += dto2*(omega_dot[5]*h[1]);
      h[5] *= expfac;
    }

    if (p_flag[4]) {
      expfac = exp(dto8*omega_dot[0]);
      h[4] *= expfac;
      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
      h[4] *= expfac;
    }
  }

  // scale diagonal components
  // scale tilt factors with cell, if set

  if (p_flag[0]) {
    oldlo = domain->boxlo[0];
    oldhi = domain->boxhi[0];
    expfac = exp(dto*omega_dot[0]);
    domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0];
    domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0];
  }

  if (p_flag[1]) {
    oldlo = domain->boxlo[1];
    oldhi = domain->boxhi[1];
    expfac = exp(dto*omega_dot[1]);
    domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1];
    domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1];
    if (scalexy) h[5] *= expfac;
  }

  if (p_flag[2]) {
    oldlo = domain->boxlo[2];
    oldhi = domain->boxhi[2];
    expfac = exp(dto*omega_dot[2]);
    domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2];
    domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2];
    if (scalexz) h[4] *= expfac;
    if (scaleyz) h[3] *= expfac;
  }

  // off-diagonal components, second half

  if (pstyle == TRICLINIC) {

    if (p_flag[4]) {
      expfac = exp(dto8*omega_dot[0]);
      h[4] *= expfac;
      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
      h[4] *= expfac;
    }

    if (p_flag[3]) {
      expfac = exp(dto4*omega_dot[1]);
      h[3] *= expfac;
      h[3] += dto2*(omega_dot[3]*h[2]);
      h[3] *= expfac;
    }

    if (p_flag[5]) {
      expfac = exp(dto4*omega_dot[0]);
      h[5] *= expfac;
      h[5] += dto2*(omega_dot[5]*h[1]);
      h[5] *= expfac;
    }

    if (p_flag[4]) {
      expfac = exp(dto8*omega_dot[0]);
      h[4] *= expfac;
      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
      h[4] *= expfac;
    }

  }

  domain->yz = h[3];
  domain->xz = h[4];
  domain->xy = h[5];

  // tilt factor to cell length ratio can not exceed TILTMAX in one step

  if (domain->yz < -TILTMAX*domain->yprd ||
      domain->yz > TILTMAX*domain->yprd ||
      domain->xz < -TILTMAX*domain->xprd ||
      domain->xz > TILTMAX*domain->xprd ||
      domain->xy < -TILTMAX*domain->xprd ||
      domain->xy > TILTMAX*domain->xprd)
    error->all(FLERR,"Fix npt/nph has tilted box too far in one step - "
               "periodic cell is too far from equilibrium state");

  domain->set_global_box();
  domain->set_local_box();

  // convert pertinent atoms and rigid bodies back to box coords
  const double h0 = domain->h[0];
  const double h1 = domain->h[1];
  const double h2 = domain->h[2];
  const double h3 = domain->h[3];
  const double h4 = domain->h[4];
  const double h5 = domain->h[5];
  const double nb0 = domain->boxlo[0];
  const double nb1 = domain->boxlo[1];
  const double nb2 = domain->boxlo[2];

  if (allremap) {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < nlocal; i++) {
      x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
      x[i].y = h1*x[i].y + h3*x[i].z + nb1;
      x[i].z = h2*x[i].z + nb2;
    }
  } else {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < nlocal; i++) {
      if (mask[i] & dilate_group_bit) {
        x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
        x[i].y = h1*x[i].y + h3*x[i].z + nb1;
        x[i].z = h2*x[i].z + nb2;
      }
    }
  }

  if (nrigid)
    for (int i = 0; i < nrigid; i++)
      modify->fix[rfix[i]]->deform(1);
}

/* ----------------------------------------------------------------------
   2nd half of Verlet update
------------------------------------------------------------------------- */

void FixNHGPU::final_integrate() {
  if (neighbor->ago == 0 && _respa_on == 0) reset_dt();
  FixNH::final_integrate();
}

/* ---------------------------------------------------------------------- */

void FixNHGPU::reset_dt()
{
  if (_respa_on) { FixNH::reset_dt(); return; }
  dtv = update->dt;
  dtf = 0.5 * update->dt * force->ftm2v;
  dthalf = 0.5 * update->dt;
  dt4 = 0.25 * update->dt;
  dt8 = 0.125 * update->dt;
  dto = dthalf;

  if (pstat_flag)
    pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain);

  if (tstat_flag)
    tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);

  const int * const mask = atom->mask;
  const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
    atom->nlocal;

  if (nlocal > _nlocal_max) {
    if (_nlocal_max) memory->destroy(_dtfm);
    _nlocal_max = static_cast<int>(1.20 * nlocal);
    memory->create(_dtfm, _nlocal_max * 3, "fix_nh_gpu:dtfm");
  }

  _nlocal3 = nlocal * 3;

  if (igroup == 0) {
    if (atom->rmass) {
      const double * const rmass = atom->rmass;
      int n = 0;
      for (int i = 0; i < nlocal; i++) {
        const double dtfir = dtf / rmass[i];
        _dtfm[n++] = dtfir;
        _dtfm[n++] = dtfir;
        _dtfm[n++] = dtfir;
      }
    } else {
      const double * const mass = atom->mass;
      const int * const type = atom->type;
      int n = 0;
      for (int i = 0; i < nlocal; i++) {
        const double dtfim = dtf / mass[type[i]];
        _dtfm[n++] = dtfim;
        _dtfm[n++] = dtfim;
        _dtfm[n++] = dtfim;
      }
    }
  } else {
    if (atom->rmass) {
      const double * const rmass = atom->rmass;
      int n = 0;
      for (int i = 0; i < nlocal; i++)
        if (mask[i] & groupbit) {
          const double dtfir = dtf / rmass[i];
          _dtfm[n++] = dtfir;
          _dtfm[n++] = dtfir;
          _dtfm[n++] = dtfir;
        } else {
          _dtfm[n++] = 0.0;
          _dtfm[n++] = 0.0;
          _dtfm[n++] = 0.0;
        }
    } else {
      const double * const mass = atom->mass;
      const int * const type = atom->type;
      int n = 0;
      for (int i = 0; i < nlocal; i++)
        if (mask[i] & groupbit) {
          const double dtfim = dtf / mass[type[i]];
          _dtfm[n++] = dtfim;
          _dtfm[n++] = dtfim;
          _dtfm[n++] = dtfim;
        } else {
          _dtfm[n++] = 0.0;
          _dtfm[n++] = 0.0;
          _dtfm[n++] = 0.0;
        }
    }
  }
}

/* ----------------------------------------------------------------------
   perform half-step barostat scaling of velocities
-----------------------------------------------------------------------*/

void FixNHGPU::nh_v_press()
{
  if (pstyle == TRICLINIC || which == BIAS || _respa_on) {
    FixNH::nh_v_press();
    return;
  }

  dbl3_t * _noalias const v = (dbl3_t *)atom->v[0];
  int *mask = atom->mask;
  int nlocal = atom->nlocal;
  if (igroup == atom->firstgroup) nlocal = atom->nfirst;

  double f0 = exp(-dt4*(omega_dot[0]+mtk_term2));
  double f1 = exp(-dt4*(omega_dot[1]+mtk_term2));
  double f2 = exp(-dt4*(omega_dot[2]+mtk_term2));
  f0 *= f0;
  f1 *= f1;
  f2 *= f2;

  if (igroup == 0) {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < nlocal; i++) {
      v[i].x *= f0;
      v[i].y *= f1;
      v[i].z *= f2;
    }
  } else {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < nlocal; i++) {
      if (mask[i] & groupbit) {
        v[i].x *= f0;
        v[i].y *= f1;
        v[i].z *= f2;
      }
    }
  }
}

/* ----------------------------------------------------------------------
   perform half-step update of velocities
-----------------------------------------------------------------------*/

void FixNHGPU::nve_v()
{
  if (_respa_on) { FixNH::nve_v(); return; }

  double * _noalias const v = atom->v[0];
  const double * _noalias const f = atom->f[0];
  #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
  #pragma omp parallel for simd schedule(static)
  #elif (LAL_USE_OMP_SIMD == 1)
  #pragma omp simd
  #endif
  for (int i = 0; i < _nlocal3; i++)
    v[i] += _dtfm[i] * f[i];
}

/* ----------------------------------------------------------------------
   perform full-step update of positions
-----------------------------------------------------------------------*/

void FixNHGPU::nve_x()
{
  if (_respa_on) { FixNH::nve_x(); return; }

  double * _noalias const x = atom->x[0];
  double * _noalias const v = atom->v[0];

  // x update by full step only for atoms in group

  if (igroup == 0) {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < _nlocal3; i++)
      x[i] += dtv * v[i];
  } else {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < _nlocal3; i++) {
      if (_dtfm[i] != 0.0)
        x[i] += dtv * v[i];
    }
  }
}

/* ----------------------------------------------------------------------
   perform half-step thermostat scaling of velocities
-----------------------------------------------------------------------*/

void FixNHGPU::nh_v_temp()
{
  if (which == BIAS || _respa_on) {
    FixNH::nh_v_temp();
    return;
  }

  double * _noalias const v = atom->v[0];

  if (igroup == 0) {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < _nlocal3; i++)
        v[i] *= factor_eta;
  } else {
    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
    #pragma omp parallel for simd schedule(static)
    #elif (LAL_USE_OMP_SIMD == 1)
    #pragma omp simd
    #endif
    for (int i = 0; i < _nlocal3; i++) {
      if (_dtfm[i] != 0.0)
        v[i] *= factor_eta;
    }
  }
}

double FixNHGPU::memory_usage()
{
  return FixNH::memory_usage() + _nlocal_max * 3 * sizeof(double);
}