Merge pull request #314 from stanmoore1/fix-momentum-kokkos

Fix momentum kokkos
This commit is contained in:
sjplimp 2017-01-06 10:01:17 -07:00 committed by GitHub
commit 467377094a
11 changed files with 485 additions and 3 deletions

1
src/.gitignore vendored
View File

@ -24,6 +24,7 @@
/kokkos.cpp
/kokkos.h
/kokkos_type.h
/kokkos_few.h
/manifold*.cpp
/manifold*.h

View File

@ -91,6 +91,8 @@ action fix_reaxc_species_kokkos.cpp fix_reaxc_species.cpp
action fix_reaxc_species_kokkos.h fix_reaxc_species.h
action fix_setforce_kokkos.cpp
action fix_setforce_kokkos.h
action fix_momentum_kokkos.cpp
action fix_momentum_kokkos.h
action fix_wall_reflect_kokkos.cpp
action fix_wall_reflect_kokkos.h
action gridcomm_kokkos.cpp gridcomm.cpp
@ -100,6 +102,7 @@ action improper_harmonic_kokkos.h improper_harmonic.h
action kokkos.cpp
action kokkos.h
action kokkos_type.h
action kokkos_few.h
action memory_kokkos.h
action modify_kokkos.cpp
action modify_kokkos.h

View File

@ -16,6 +16,7 @@
#include "domain.h"
#include "kokkos_type.h"
#include "kokkos_few.h"
namespace LAMMPS_NS {
@ -35,6 +36,10 @@ class DomainKokkos : public Domain {
void image_flip(int, int, int);
void x2lamda(int);
void lamda2x(int);
// these lines bring in the x2lamda signatures from Domain
// that are not overloaded here
using Domain::x2lamda;
using Domain::lamda2x;
int closest_image(const int, int) const;
@ -50,6 +55,10 @@ class DomainKokkos : public Domain {
KOKKOS_INLINE_FUNCTION
void operator()(TagDomain_x2lamda, const int&) const;
static KOKKOS_INLINE_FUNCTION
Few<double,3> unmap(Few<double,3> prd, Few<double,6> h, int triclinic,
Few<double,3> x, imageint image);
private:
double lo[3],hi[3],period[3];
int n_flip, m_flip, p_flip;
@ -57,6 +66,26 @@ class DomainKokkos : public Domain {
ArrayTypes<LMPDeviceType>::t_imageint_1d image;
};
KOKKOS_INLINE_FUNCTION
Few<double,3> DomainKokkos::unmap(Few<double,3> prd, Few<double,6> h,
int triclinic, Few<double,3> x, imageint image)
{
int xbox = (image & IMGMASK) - IMGMAX;
int ybox = (image >> IMGBITS & IMGMASK) - IMGMAX;
int zbox = (image >> IMG2BITS) - IMGMAX;
Few<double,3> y;
if (triclinic == 0) {
y[0] = x[0] + xbox*prd[0];
y[1] = x[1] + ybox*prd[1];
y[2] = x[2] + zbox*prd[2];
} else {
y[0] = x[0] + h[0]*xbox + h[5]*ybox + h[4]*zbox;
y[1] = x[1] + h[1]*ybox + h[3]*zbox;
y[2] = x[2] + h[2]*zbox;
}
return y;
}
}
#endif

View File

@ -0,0 +1,204 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include "fix_momentum_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain.h"
#include "domain_kokkos.h"
#include "group.h"
#include "error.h"
#include "force.h"
#include "kokkos_few.h"
using namespace LAMMPS_NS;
using namespace FixConst;
/* ----------------------------------------------------------------------
Contributing author: Dan Ibanez (SNL)
------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixMomentumKokkos<DeviceType>::FixMomentumKokkos(LAMMPS *lmp, int narg, char **arg) :
FixMomentum(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
static double get_kinetic_energy(
AtomKokkos* atomKK,
MPI_Comm world,
int groupbit,
int nlocal,
typename ArrayTypes<DeviceType>::t_v_array_randomread v,
typename ArrayTypes<DeviceType>::t_int_1d_randomread mask)
{
using AT = ArrayTypes<DeviceType>;
auto execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
double ke=0.0;
if (atomKK->rmass) {
atomKK->sync(execution_space, RMASS_MASK);
typename AT::t_float_1d_randomread rmass = atomKK->k_rmass.view<DeviceType>();
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(int i, double& update) {
if (mask(i) & groupbit)
update += rmass(i) *
(v(i,0)*v(i,0) + v(i,1)*v(i,1) + v(i,2)*v(i,2));
}, ke);
} else {
// D.I. : why is there no MASS_MASK ?
atomKK->sync(execution_space, TYPE_MASK);
typename AT::t_int_1d_randomread type = atomKK->k_type.view<DeviceType>();
typename AT::t_float_1d_randomread mass = atomKK->k_mass.view<DeviceType>();
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(int i, double& update) {
if (mask(i) & groupbit)
update += mass(type(i)) *
(v(i,0)*v(i,0) + v(i,1)*v(i,1) + v(i,2)*v(i,2));
}, ke);
}
double ke_total;
MPI_Allreduce(&ke,&ke_total,1,MPI_DOUBLE,MPI_SUM,world);
return ke_total;
}
template<class DeviceType>
void FixMomentumKokkos<DeviceType>::end_of_step()
{
atomKK->sync(execution_space, V_MASK | MASK_MASK);
typename AT::t_v_array v = atomKK->k_v.view<DeviceType>();
typename AT::t_int_1d_randomread mask = atomKK->k_mask.view<DeviceType>();
const int nlocal = atom->nlocal;
double ekin_old,ekin_new;
ekin_old = ekin_new = 0.0;
if (dynamic)
masstotal = group->mass(igroup); // change once Group is ported to Kokkos
// do nothing if group is empty, i.e. mass is zero;
if (masstotal == 0.0) return;
// compute kinetic energy before momentum removal, if needed
if (rescale) {
ekin_old = get_kinetic_energy<DeviceType>(atomKK, world, groupbit, nlocal, v, mask);
}
auto groupbit2 = groupbit;
if (linear) {
/* this is needed because Group is not Kokkos-aware ! */
atomKK->sync(ExecutionSpaceFromDevice<LMPHostType>::space,
V_MASK | MASK_MASK | TYPE_MASK | RMASS_MASK);
Few<double, 3> vcm;
group->vcm(igroup,masstotal,&vcm[0]);
// adjust velocities by vcm to zero linear momentum
// only adjust a component if flag is set
auto xflag2 = xflag;
auto yflag2 = yflag;
auto zflag2 = zflag;
Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
if (mask(i) & groupbit2) {
if (xflag2) v(i,0) -= vcm[0];
if (yflag2) v(i,1) -= vcm[1];
if (zflag2) v(i,2) -= vcm[2];
}
});
atomKK->modified(execution_space, V_MASK);
}
if (angular) {
Few<double, 3> xcm, angmom, omega;
double inertia[3][3];
/* syncs for each Kokkos-unaware Group method */
atomKK->sync(ExecutionSpaceFromDevice<LMPHostType>::space,
X_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK);
group->xcm(igroup,masstotal,&xcm[0]);
atomKK->sync(ExecutionSpaceFromDevice<LMPHostType>::space,
X_MASK | V_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK);
group->angmom(igroup,&xcm[0],&angmom[0]);
atomKK->sync(ExecutionSpaceFromDevice<LMPHostType>::space,
X_MASK | MASK_MASK | TYPE_MASK | IMAGE_MASK | RMASS_MASK);
group->inertia(igroup,&xcm[0],inertia);
group->omega(&angmom[0],inertia,&omega[0]);
// adjust velocities to zero omega
// vnew_i = v_i - w x r_i
// must use unwrapped coords to compute r_i correctly
atomKK->sync(execution_space, X_MASK | IMAGE_MASK);
typename AT::t_x_array_randomread x = atomKK->k_x.view<DeviceType>();
typename AT::t_imageint_1d_randomread image = atomKK->k_image.view<DeviceType>();
int nlocal = atom->nlocal;
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
if (mask[i] & groupbit2) {
Few<double,3> x_i;
x_i[0] = x(i,0);
x_i[1] = x(i,1);
x_i[2] = x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
auto dx = unwrap[0] - xcm[0];
auto dy = unwrap[1] - xcm[1];
auto dz = unwrap[2] - xcm[2];
v(i,0) -= omega[1]*dz - omega[2]*dy;
v(i,1) -= omega[2]*dx - omega[0]*dz;
v(i,2) -= omega[0]*dy - omega[1]*dx;
}
});
atomKK->modified(execution_space, V_MASK);
}
// compute kinetic energy after momentum removal, if needed
if (rescale) {
ekin_new = get_kinetic_energy<DeviceType>(atomKK, world, groupbit, nlocal, v, mask);
double factor = 1.0;
if (ekin_new != 0.0) factor = sqrt(ekin_old/ekin_new);
Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
if (mask(i) & groupbit2) {
v(i,0) *= factor;
v(i,1) *= factor;
v(i,2) *= factor;
}
});
atomKK->modified(execution_space, V_MASK);
}
}
namespace LAMMPS_NS {
template class FixMomentumKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixMomentumKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,46 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(momentum/kk,FixMomentumKokkos<LMPDeviceType>)
FixStyle(momentum/kk/device,FixMomentumKokkos<LMPDeviceType>)
FixStyle(momentum/kk/host,FixMomentumKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_MOMENTUM_KOKKOS_H
#define LMP_FIX_MOMENTUM_KOKKOS_H
#include "fix_momentum.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class FixMomentumKokkos : public FixMomentum {
public:
typedef ArrayTypes<DeviceType> AT;
FixMomentumKokkos(class LAMMPS *, int, char **);
void end_of_step();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

68
src/KOKKOS/kokkos_few.h Normal file
View File

@ -0,0 +1,68 @@
#ifndef KOKKOS_FEW_H
#define KOKKOS_FEW_H
#include <Kokkos_Core.hpp>
template <typename T, std::size_t n>
class Few {
alignas(T) char array_[n * sizeof(T)];
public:
enum { size = n };
Few(std::initializer_list<T> l) {
std::size_t i = 0;
for (auto it = l.begin(); it != l.end(); ++it) {
new (data() + (i++)) T(*it);
}
}
KOKKOS_INLINE_FUNCTION Few(T const a[]) {
for (std::size_t i = 0; i < n; ++i) new (data() + i) T(a[i]);
}
KOKKOS_INLINE_FUNCTION Few() {
for (std::size_t i = 0; i < n; ++i) new (data() + i) T();
}
KOKKOS_INLINE_FUNCTION ~Few() {
for (std::size_t i = 0; i < n; ++i) (data()[i]).~T();
}
KOKKOS_INLINE_FUNCTION Few(Few<T, n> const& rhs) {
for (std::size_t i = 0; i < n; ++i) new (data() + i) T(rhs[i]);
}
KOKKOS_INLINE_FUNCTION Few(Few<T, n> const volatile& rhs) {
for (std::size_t i = 0; i < n; ++i) new (data() + i) T(rhs[i]);
}
KOKKOS_INLINE_FUNCTION void operator=(Few<T, n> const& rhs) volatile {
for (std::size_t i = 0; i < n; ++i) data()[i] = rhs[i];
}
KOKKOS_INLINE_FUNCTION void operator=(Few<T, n> const& rhs) {
for (std::size_t i = 0; i < n; ++i) data()[i] = rhs[i];
}
KOKKOS_INLINE_FUNCTION void operator=(Few<T, n> const volatile& rhs) {
for (std::size_t i = 0; i < n; ++i) data()[i] = rhs[i];
}
KOKKOS_INLINE_FUNCTION T* data() {
return reinterpret_cast<T*>(array_);
}
KOKKOS_INLINE_FUNCTION T const* data() const {
return reinterpret_cast<T const*>(array_);
}
KOKKOS_INLINE_FUNCTION T volatile* data() volatile {
return reinterpret_cast<T volatile*>(array_);
}
KOKKOS_INLINE_FUNCTION T const volatile* data() const volatile {
return reinterpret_cast<T const volatile*>(array_);
}
KOKKOS_INLINE_FUNCTION T& operator[](std::size_t i) {
return data()[i];
}
KOKKOS_INLINE_FUNCTION T const& operator[](std::size_t i) const {
return data()[i];
}
KOKKOS_INLINE_FUNCTION T volatile& operator[](std::size_t i) volatile {
return data()[i];
}
KOKKOS_INLINE_FUNCTION T const volatile& operator[](std::size_t i) const volatile {
return data()[i];
}
};
#endif

View File

@ -920,4 +920,10 @@ void memset_kokkos (ViewType &view) {
#define ISFINITE(x) std::isfinite(x)
#endif
#ifdef KOKKOS_HAVE_CUDA
#define LAMMPS_LAMBDA [=] __device__
#else
#define LAMMPS_LAMBDA [=]
#endif
#endif

View File

@ -0,0 +1,125 @@
# kokkos_cuda = KOKKOS/CUDA package, OpenMPI with nvcc compiler, Kepler GPU
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
CC = mpicxx
CCFLAGS = -g -O3
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpicxx
LINKFLAGS = -g -O3
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
KOKKOS_DEVICES = Cuda, OpenMP
KOKKOS_ARCH = Kepler35
KOKKOS_CUDA_OPTIONS = enable_lambda
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
FFT_PATH =
FFT_LIB =
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB =
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
gcc -O -o $@ $<
sinclude .depend

View File

@ -1451,7 +1451,7 @@ void Domain::unmap(double *x, imageint image)
for triclinic, use h[] to add in tilt factors in other dims as needed
------------------------------------------------------------------------- */
void Domain::unmap(double *x, imageint image, double *y)
void Domain::unmap(const double *x, imageint image, double *y)
{
int xbox = (image & IMGMASK) - IMGMAX;
int ybox = (image >> IMGBITS & IMGMASK) - IMGMAX;

View File

@ -119,7 +119,7 @@ class Domain : protected Pointers {
void remap(double *);
void remap_near(double *, double *);
void unmap(double *, imageint);
void unmap(double *, imageint, double *);
void unmap(const double *, imageint, double *);
void image_flip(int, int, int);
int ownatom(int, double *, imageint *, int);

View File

@ -31,7 +31,7 @@ class FixMomentum : public Fix {
void init();
void end_of_step();
private:
protected:
int linear,angular,rescale;
int xflag,yflag,zflag;
int dynamic;