forked from lijiext/lammps
Add Kokkos versions of pair_style snap and zbl
This commit is contained in:
parent
1bd9e175e9
commit
275c08453f
|
@ -619,7 +619,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
|
|||
"pour"_fix_pour.html,
|
||||
"press/berendsen"_fix_press_berendsen.html,
|
||||
"print"_fix_print.html,
|
||||
"property/atom"_fix_property_atom.html,
|
||||
"property/atom (k)"_fix_property_atom.html,
|
||||
"python/invoke"_fix_python_invoke.html,
|
||||
"python/move"_fix_python_move.html,
|
||||
"qeq/comb (o)"_fix_qeq_comb.html,
|
||||
|
@ -669,7 +669,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
|
|||
"wall/harmonic"_fix_wall.html,
|
||||
"wall/lj1043"_fix_wall.html,
|
||||
"wall/lj126"_fix_wall.html,
|
||||
"wall/lj93"_fix_wall.html,
|
||||
"wall/lj93 (k)"_fix_wall.html,
|
||||
"wall/piston"_fix_wall_piston.html,
|
||||
"wall/reflect (k)"_fix_wall_reflect.html,
|
||||
"wall/region"_fix_wall_region.html,
|
||||
|
@ -684,14 +684,14 @@ package"_Section_start.html#start_3.
|
|||
"atc"_fix_atc.html,
|
||||
"ave/correlate/long"_fix_ave_correlate_long.html,
|
||||
"colvars"_fix_colvars.html,
|
||||
"dpd/energy"_fix_dpd_energy.html,
|
||||
"dpd/energy (k)"_fix_dpd_energy.html,
|
||||
"drude"_fix_drude.html,
|
||||
"drude/transform/direct"_fix_drude_transform.html,
|
||||
"drude/transform/reverse"_fix_drude_transform.html,
|
||||
"edpd/source"_fix_dpd_source.html,
|
||||
"eos/cv"_fix_eos_cv.html,
|
||||
"eos/table"_fix_eos_table.html,
|
||||
"eos/table/rx"_fix_eos_table_rx.html,
|
||||
"eos/table/rx (k)"_fix_eos_table_rx.html,
|
||||
"filter/corotate"_fix_filter_corotate.html,
|
||||
"flow/gauss"_fix_flow_gauss.html,
|
||||
"gle"_fix_gle.html,
|
||||
|
@ -729,12 +729,12 @@ package"_Section_start.html#start_3.
|
|||
"qeq/reax (ko)"_fix_qeq_reax.html,
|
||||
"qmmm"_fix_qmmm.html,
|
||||
"qtb"_fix_qtb.html,
|
||||
"reax/c/bonds"_fix_reax_bonds.html,
|
||||
"reax/c/species"_fix_reaxc_species.html,
|
||||
"reax/c/bonds (k)"_fix_reax_bonds.html,
|
||||
"reax/c/species (k)"_fix_reaxc_species.html,
|
||||
"rhok"_fix_rhok.html,
|
||||
"rx"_fix_rx.html,
|
||||
"rx (k)"_fix_rx.html,
|
||||
"saed/vtk"_fix_saed_vtk.html,
|
||||
"shardlow"_fix_shardlow.html,
|
||||
"shardlow (k)"_fix_shardlow.html,
|
||||
"smd"_fix_smd.html,
|
||||
"smd/adjust/dt"_fix_smd_adjust_dt.html,
|
||||
"smd/integrate/tlsph"_fix_smd_integrate_tlsph.html,
|
||||
|
@ -907,7 +907,7 @@ KOKKOS, o = USER-OMP, t = OPT.
|
|||
"none"_pair_none.html,
|
||||
"zero"_pair_zero.html,
|
||||
"hybrid"_pair_hybrid.html,
|
||||
"hybrid/overlay"_pair_hybrid.html,
|
||||
"hybrid/overlay (k)"_pair_hybrid.html,
|
||||
"adp (o)"_pair_adp.html,
|
||||
"airebo (oi)"_pair_airebo.html,
|
||||
"airebo/morse (oi)"_pair_airebo.html,
|
||||
|
@ -1026,7 +1026,7 @@ KOKKOS, o = USER-OMP, t = OPT.
|
|||
"vashishta/table (o)"_pair_vashishta.html,
|
||||
"yukawa (gok)"_pair_yukawa.html,
|
||||
"yukawa/colloid (go)"_pair_yukawa_colloid.html,
|
||||
"zbl (go)"_pair_zbl.html :tb(c=4,ea=c)
|
||||
"zbl (gok)"_pair_zbl.html :tb(c=4,ea=c)
|
||||
|
||||
These are additional pair styles in USER packages, which can be used
|
||||
if "LAMMPS is built with the appropriate
|
||||
|
@ -1039,13 +1039,13 @@ package"_Section_start.html#start_3.
|
|||
"coul/diel (o)"_pair_coul_diel.html,
|
||||
"coul/long/soft (o)"_pair_lj_soft.html,
|
||||
"dpd/fdt"_pair_dpd_fdt.html,
|
||||
"dpd/fdt/energy"_pair_dpd_fdt.html,
|
||||
"dpd/fdt/energy (k)"_pair_dpd_fdt.html,
|
||||
"eam/cd (o)"_pair_eam.html,
|
||||
"edip (o)"_pair_edip.html,
|
||||
"edip/multi"_pair_edip.html,
|
||||
"edpd"_pair_meso.html,
|
||||
"eff/cut"_pair_eff.html,
|
||||
"exp6/rx"_pair_exp6_rx.html,
|
||||
"exp6/rx (k)"_pair_exp6_rx.html,
|
||||
"extep"_pair_extep.html,
|
||||
"gauss/cut"_pair_gauss.html,
|
||||
"kolmogorov/crespi/z"_pair_kolmogorov_crespi_z.html,
|
||||
|
@ -1072,7 +1072,7 @@ package"_Section_start.html#start_3.
|
|||
"morse/smooth/linear"_pair_morse.html,
|
||||
"morse/soft"_pair_morse.html,
|
||||
"multi/lucy"_pair_multi_lucy.html,
|
||||
"multi/lucy/rx"_pair_multi_lucy_rx.html,
|
||||
"multi/lucy/rx (k)"_pair_multi_lucy_rx.html,
|
||||
"oxdna/coaxstk"_pair_oxdna.html,
|
||||
"oxdna/excv"_pair_oxdna.html,
|
||||
"oxdna/hbond"_pair_oxdna.html,
|
||||
|
@ -1089,6 +1089,7 @@ package"_Section_start.html#start_3.
|
|||
"smd/triangulated/surface"_pair_smd_triangulated_surface.html,
|
||||
"smd/ulsph"_pair_smd_ulsph.html,
|
||||
"smtbq"_pair_smtbq.html,
|
||||
"snap (k)"_pair_sap.html,
|
||||
"sph/heatconduction"_pair_sph_heatconduction.html,
|
||||
"sph/idealgas"_pair_sph_idealgas.html,
|
||||
"sph/lj"_pair_sph_lj.html,
|
||||
|
@ -1096,7 +1097,7 @@ package"_Section_start.html#start_3.
|
|||
"sph/taitwater"_pair_sph_taitwater.html,
|
||||
"sph/taitwater/morris"_pair_sph_taitwater_morris.html,
|
||||
"srp"_pair_srp.html,
|
||||
"table/rx"_pair_table_rx.html,
|
||||
"table/rx (k)"_pair_table_rx.html,
|
||||
"tdpd"_pair_meso.html,
|
||||
"tersoff/table (o)"_pair_tersoff.html,
|
||||
"thole"_pair_thole.html,
|
||||
|
@ -1251,7 +1252,7 @@ USER-OMP, t = OPT.
|
|||
"ewald/disp"_kspace_style.html,
|
||||
"msm (o)"_kspace_style.html,
|
||||
"msm/cg (o)"_kspace_style.html,
|
||||
"pppm (go)"_kspace_style.html,
|
||||
"pppm (gok)"_kspace_style.html,
|
||||
"pppm/cg (o)"_kspace_style.html,
|
||||
"pppm/disp (i)"_kspace_style.html,
|
||||
"pppm/disp/tip4p"_kspace_style.html,
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
pair_style snap command :h3
|
||||
pair_style snap/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
@ -171,6 +172,29 @@ This pair style can only be used via the {pair} keyword of the
|
|||
|
||||
:line
|
||||
|
||||
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
|
||||
functionally the same as the corresponding style without the suffix.
|
||||
They have been optimized to run faster, depending on your available
|
||||
hardware, as discussed in "Section 5"_Section_accelerate.html
|
||||
of the manual. The accelerated styles take the same arguments and
|
||||
should produce the same results, except for round-off and precision
|
||||
issues.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
|
||||
USER-OMP and OPT packages, respectively. They are only enabled if
|
||||
LAMMPS was built with those packages. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
You can specify the accelerated styles explicitly in your input script
|
||||
by including their suffix, or you can use the "-suffix command-line
|
||||
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
|
||||
use the "suffix"_suffix.html command in your input script.
|
||||
|
||||
See "Section 5"_Section_accelerate.html of the manual for
|
||||
more instructions on how to use the accelerated styles effectively.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This style is part of the SNAP package. It is only enabled if
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
pair_style zbl command :h3
|
||||
pair_style zbl/gpu command :h3
|
||||
pair_style zbl/kk command :h3
|
||||
pair_style zbl/omp command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
/*_intel.h
|
||||
/*_intel.cpp
|
||||
/*_kokkos.h
|
||||
/*_kokkos_impl.h
|
||||
/*_kokkos.cpp
|
||||
/*_omp.h
|
||||
/*_omp.cpp
|
||||
|
|
|
@ -109,6 +109,10 @@ if (test $1 = "RIGID") then
|
|||
depend USER-OMP
|
||||
fi
|
||||
|
||||
if (test $1 = "SNAP") then
|
||||
depend KOKKOS
|
||||
fi
|
||||
|
||||
if (test $1 = "USER-CGSDK") then
|
||||
depend GPU
|
||||
depend KOKKOS
|
||||
|
|
|
@ -228,6 +228,9 @@ action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
|
|||
action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h
|
||||
action pair_reaxc_kokkos.cpp pair_reaxc.cpp
|
||||
action pair_reaxc_kokkos.h pair_reaxc.h
|
||||
action pair_snap_kokkos.cpp pair_snap.cpp
|
||||
action pair_snap_kokkos.h pair_snap.h
|
||||
action pair_snap_kokkos_impl.h pair_snap.cpp
|
||||
action pair_sw_kokkos.cpp pair_sw.cpp
|
||||
action pair_sw_kokkos.h pair_sw.h
|
||||
action pair_vashishta_kokkos.cpp pair_vashishta.cpp
|
||||
|
@ -244,12 +247,16 @@ action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
|
|||
action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
|
||||
action pair_yukawa_kokkos.cpp
|
||||
action pair_yukawa_kokkos.h
|
||||
action pair_zbl_kokkos.cpp
|
||||
action pair_zbl_kokkos.h
|
||||
action pppm_kokkos.cpp pppm.cpp
|
||||
action pppm_kokkos.h pppm.h
|
||||
action rand_pool_wrap_kokkos.cpp
|
||||
action rand_pool_wrap_kokkos.h
|
||||
action region_block_kokkos.cpp
|
||||
action region_block_kokkos.h
|
||||
action sna_kokkos.h sna.h
|
||||
action sna_kokkos_impl.h sna.cpp
|
||||
action verlet_kokkos.cpp
|
||||
action verlet_kokkos.h
|
||||
|
||||
|
|
|
@ -106,6 +106,71 @@ typedef double FFT_SCALAR;
|
|||
}
|
||||
};
|
||||
|
||||
template<class Scalar>
|
||||
struct t_scalar3 {
|
||||
Scalar x,y,z;
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3() {
|
||||
x = 0; y = 0; z = 0;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3(const t_scalar3& rhs) {
|
||||
x = rhs.x; y = rhs.y; z = rhs.z;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3(const Scalar& x_, const Scalar& y_, const Scalar& z_ ) {
|
||||
x = x_; y = y_; z = z_;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3 operator= (const t_scalar3& rhs) {
|
||||
x = rhs.x; y = rhs.y; z = rhs.z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3 operator= (const volatile t_scalar3& rhs) {
|
||||
x = rhs.x; y = rhs.y; z = rhs.z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3 operator+= (const t_scalar3& rhs) {
|
||||
x += rhs.x; y += rhs.y; z += rhs.z;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3 operator+= (const volatile t_scalar3& rhs) volatile {
|
||||
x += rhs.x; y += rhs.y; z += rhs.z;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Scalar>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3<Scalar> operator +
|
||||
(const t_scalar3<Scalar>& a, const t_scalar3<Scalar>& b) {
|
||||
return t_scalar3<Scalar>(a.x+b.x,a.y+b.y,a.z+b.z);
|
||||
}
|
||||
|
||||
template<class Scalar>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3<Scalar> operator *
|
||||
(const t_scalar3<Scalar>& a, const Scalar& b) {
|
||||
return t_scalar3<Scalar>(a.x*b,a.y*b,a.z*b);
|
||||
}
|
||||
|
||||
template<class Scalar>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
t_scalar3<Scalar> operator *
|
||||
(const Scalar& b, const t_scalar3<Scalar>& a) {
|
||||
return t_scalar3<Scalar>(a.x*b,a.y*b,a.z*b);
|
||||
}
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__VECTOR_TYPES_H__)
|
||||
struct double2 {
|
||||
double x, y;
|
||||
|
@ -324,6 +389,8 @@ typedef double2 K_FLOAT2;
|
|||
typedef double4 K_FLOAT4;
|
||||
#endif
|
||||
|
||||
typedef int T_INT;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// LAMMPS types
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_snap_kokkos.h"
|
||||
#include "pair_snap_kokkos_impl.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class PairSNAPKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairSNAPKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(snap/kk,PairSNAPKokkos<LMPDeviceType>)
|
||||
PairStyle(snap/kk/device,PairSNAPKokkos<LMPDeviceType>)
|
||||
PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_SNAP_KOKKOS_H
|
||||
#define LMP_PAIR_SNAP_KOKKOS_H
|
||||
|
||||
#include "pair_snap.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "sna_kokkos.h"
|
||||
#include "pair_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSNAP{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairSNAPKokkos : public PairSNAP {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairSNAPKokkos(class LAMMPS *);
|
||||
~PairSNAPKokkos();
|
||||
|
||||
void coeff(int, char**);
|
||||
void init_style();
|
||||
void compute(int, int);
|
||||
double memory_usage();
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
|
||||
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const;
|
||||
|
||||
protected:
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
typedef Kokkos::View<F_FLOAT**> t_bvec;
|
||||
t_bvec bvec;
|
||||
typedef Kokkos::View<F_FLOAT***> t_dbvec;
|
||||
t_dbvec dbvec;
|
||||
SNAKokkos<DeviceType> snaKK;
|
||||
|
||||
// How much parallelism to use within an interaction
|
||||
int vector_length;
|
||||
|
||||
int eflag,vflag,nlocal;
|
||||
|
||||
void allocate();
|
||||
//void read_files(char *, char *);
|
||||
/*template<class DeviceType>
|
||||
inline int equal(double* x,double* y);
|
||||
template<class DeviceType>
|
||||
inline double dist2(double* x,double* y);
|
||||
double extra_cutoff();
|
||||
void load_balance();
|
||||
void set_sna_to_shared(int snaid,int i);
|
||||
void build_per_atom_arrays();*/
|
||||
|
||||
int neighflag;
|
||||
|
||||
Kokkos::View<T_INT*, DeviceType> ilistmast;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostilist;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostnumneigh;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostneighs;
|
||||
Kokkos::View<T_INT*, DeviceType> ghostfirstneigh;
|
||||
|
||||
Kokkos::View<T_INT**, Kokkos::LayoutRight, DeviceType> i_pairs;
|
||||
Kokkos::View<T_INT***, Kokkos::LayoutRight, DeviceType> i_rij;
|
||||
Kokkos::View<T_INT**, Kokkos::LayoutRight, DeviceType> i_inside;
|
||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> i_wj;
|
||||
Kokkos::View<F_FLOAT***, Kokkos::LayoutRight, DeviceType>i_rcutij;
|
||||
Kokkos::View<T_INT*, DeviceType> i_ninside;
|
||||
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
|
||||
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
|
||||
|
||||
Kokkos::View<F_FLOAT*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
|
||||
typedef Kokkos::View<F_FLOAT**, DeviceType> t_fparams;
|
||||
t_fparams d_cutsq;
|
||||
typedef Kokkos::View<const F_FLOAT**, DeviceType,
|
||||
Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_fparams_rnd;
|
||||
t_fparams_rnd rnd_cutsq;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_int_1d_randomread type;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairSNAPKokkos>(PairSNAPKokkos*);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,634 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Christian Trott (SNL), Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pair_snap_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "atom_masks.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "kokkos.h"
|
||||
#include "sna.h"
|
||||
|
||||
#define MAXLINE 1024
|
||||
#define MAXWORD 3
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// Outstanding issues with quadratic term
|
||||
// 1. there seems to a problem with compute_optimized energy calc
|
||||
// it does not match compute_regular, even when quadratic coeffs = 0
|
||||
|
||||
//static double t1 = 0.0;
|
||||
//static double t2 = 0.0;
|
||||
//static double t3 = 0.0;
|
||||
//static double t4 = 0.0;
|
||||
//static double t5 = 0.0;
|
||||
//static double t6 = 0.0;
|
||||
//static double t7 = 0.0;
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairSNAPKokkos<DeviceType>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
vector_length = 8;
|
||||
d_cutsq = t_fparams("PairSNAPKokkos::cutsq",atom->ntypes+1,atom->ntypes+1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairSNAPKokkos<DeviceType>::~PairSNAPKokkos()
|
||||
{
|
||||
//if (copymode) return;
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::init_style()
|
||||
{
|
||||
if (force->newton_pair == 0)
|
||||
error->all(FLERR,"Pair style SNAP requires newton pair on");
|
||||
|
||||
if (diagonalstyle != 3)
|
||||
error->all(FLERR,"Must use diagonal style = 3 with pair snap/kk");
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == HALF || neighflag == HALFTHREAD) { // still need atomics, even though using a full neigh list
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Must use half neighbor list style with pair snap/kk");
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
struct FindMaxNumNeighs {
|
||||
typedef DeviceType device_type;
|
||||
NeighListKokkos<DeviceType> k_list;
|
||||
|
||||
FindMaxNumNeighs(NeighListKokkos<DeviceType>* nl): k_list(*nl) {}
|
||||
~FindMaxNumNeighs() {k_list.copymode = 1;}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& ii, int& max_neighs) const {
|
||||
const int i = k_list.d_ilist[ii];
|
||||
const int num_neighs = k_list.d_numneigh[i];
|
||||
if (max_neighs<num_neighs) max_neighs = num_neighs;
|
||||
}
|
||||
};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
This version is a straightforward implementation
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1; // FIX ME??
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag,0);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
|
||||
d_vatom = k_vatom.view<DeviceType>();
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
int newton_pair = force->newton_pair;
|
||||
if (newton_pair == false)
|
||||
error->all(FLERR,"PairSNAPKokkos requires 'newton on'");
|
||||
|
||||
atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
d_ilist = k_list->d_ilist;
|
||||
//int inum = list->inum;
|
||||
|
||||
/*
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
typename t_neigh_list::t_neighs neighs_i = neigh_list.get_neighs(i);
|
||||
const int num_neighs = neighs_i.get_num_neighs();
|
||||
if (max_neighs<num_neighs) max_neighs = num_neighs;
|
||||
}*/
|
||||
int max_neighs = 0;
|
||||
Kokkos::parallel_reduce("PairSNAPKokkos::find_max_neighs",nlocal, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(max_neighs));
|
||||
|
||||
snaKK.nmax = max_neighs;
|
||||
|
||||
T_INT team_scratch_size = snaKK.size_team_scratch_arrays();
|
||||
T_INT thread_scratch_size = snaKK.size_thread_scratch_arrays();
|
||||
|
||||
//printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024);
|
||||
int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
|
||||
int vector_length = 8;
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
int team_size = 20;//max_neighs;
|
||||
if (team_size*vector_length > team_size_max)
|
||||
team_size = team_size_max/vector_length;
|
||||
#else
|
||||
int team_size = 1;
|
||||
#endif
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
if (eflag) {
|
||||
if (neighflag == HALF) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALF,1> > policy(nlocal,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this,ev);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALFTHREAD,1> > policy(nlocal,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this,ev);
|
||||
}
|
||||
} else {
|
||||
if (neighflag == HALF) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALF,0> > policy(nlocal,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALFTHREAD,0> > policy(nlocal,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
}
|
||||
}
|
||||
|
||||
//static int step =0;
|
||||
//step++;
|
||||
//if (step%10==0)
|
||||
// printf(" %e %e %e %e %e (%e %e): %e\n",t1,t2,t3,t4,t5,t6,t7,t1+t2+t3+t4+t5);
|
||||
|
||||
if (eflag_global) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
atomKK->modified(execution_space,F_MASK);
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairSNAP::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
d_map = Kokkos::View<T_INT*, DeviceType>("PairSNAPKokkos::map",n+1);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::coeff(int narg, char **arg)
|
||||
{
|
||||
PairSNAP::coeff(narg,arg);
|
||||
|
||||
// Set up element lists
|
||||
|
||||
d_radelem = Kokkos::View<F_FLOAT*, DeviceType>("pair:radelem",nelements);
|
||||
d_wjelem = Kokkos::View<F_FLOAT*, DeviceType>("pair:wjelem",nelements);
|
||||
d_coeffelem = Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
|
||||
|
||||
auto h_radelem = Kokkos::create_mirror_view(d_radelem);
|
||||
auto h_wjelem = Kokkos::create_mirror_view(d_wjelem);
|
||||
auto h_coeffelem = Kokkos::create_mirror_view(d_coeffelem);
|
||||
auto h_map = Kokkos::create_mirror_view(d_map);
|
||||
auto h_cutsq = Kokkos::create_mirror_view(d_cutsq);
|
||||
|
||||
for (int ielem = 0; ielem < nelements; ielem++) {
|
||||
h_radelem(ielem) = radelem[ielem];
|
||||
h_wjelem(ielem) = wjelem[ielem];
|
||||
for (int jcoeff = 0; jcoeff < ncoeffall; jcoeff++) {
|
||||
h_coeffelem(ielem,jcoeff) = coeffelem[ielem][jcoeff];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
h_map(i) = map[i];
|
||||
for (int j = 1; j <= atom->ntypes; j++) {
|
||||
double cutone = (radelem[map[i]] +
|
||||
radelem[map[j]])*rcutfac;
|
||||
h_cutsq(i,j) = cutone*cutone;
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(d_radelem,h_radelem);
|
||||
Kokkos::deep_copy(d_wjelem,h_wjelem);
|
||||
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
||||
Kokkos::deep_copy(d_map,h_map);
|
||||
Kokkos::deep_copy(d_cutsq,h_cutsq);
|
||||
rnd_cutsq = d_cutsq;
|
||||
|
||||
// deallocate non-kokkos sna
|
||||
|
||||
if (sna) {
|
||||
for (int tid = 0; tid<nthreads; tid++)
|
||||
delete sna[tid];
|
||||
delete [] sna;
|
||||
sna = NULL;
|
||||
}
|
||||
|
||||
// allocate memory for per OpenMP thread data which
|
||||
// is wrapped into the sna class
|
||||
|
||||
snaKK = SNAKokkos<DeviceType>(rfac0,twojmax,
|
||||
diagonalstyle,use_shared_arrays,
|
||||
rmin0,switchflag,bzeroflag);
|
||||
//if (!use_shared_arrays)
|
||||
snaKK.grow_rij(nmax);
|
||||
snaKK.init();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
// The f array is atomic for Half/Thread neighbor style
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
|
||||
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
||||
const double x_i = x(i,0);
|
||||
const double y_i = x(i,1);
|
||||
const double z_i = x(i,2);
|
||||
const int type_i = type[i];
|
||||
const int elem_i = d_map[type_i];
|
||||
const double radi = d_radelem[elem_i];
|
||||
|
||||
const int num_neighs = d_numneigh[i];
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
// wj = weights for neighbors of I within cutoff
|
||||
// rcutij = cutoffs for neighbors of I within cutoff
|
||||
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
|
||||
|
||||
//Kokkos::Timer timer;
|
||||
int ninside = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
|
||||
[&] (const int jj, int& count) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - x_i;
|
||||
const F_FLOAT dy = x(j,1) - y_i;
|
||||
const F_FLOAT dz = x(j,2) - z_i;
|
||||
|
||||
const int type_j = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[type_j];
|
||||
|
||||
if ( rsq < rnd_cutsq(type_i,type_j) )
|
||||
count++;
|
||||
});
|
||||
},ninside);
|
||||
|
||||
//t1 += timer.seconds(); timer.reset();
|
||||
|
||||
if (team.team_rank() == 0)
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
||||
[&] (const int jj, int& offset, bool final){
|
||||
//for (int jj = 0; jj < num_neighs; jj++) {
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - x_i;
|
||||
const F_FLOAT dy = x(j,1) - y_i;
|
||||
const F_FLOAT dz = x(j,2) - z_i;
|
||||
|
||||
const int type_j = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[type_j];
|
||||
|
||||
if ( rsq < rnd_cutsq(type_i,type_j) ) {
|
||||
if (final) {
|
||||
my_sna.rij(offset,0) = dx;
|
||||
my_sna.rij(offset,1) = dy;
|
||||
my_sna.rij(offset,2) = dz;
|
||||
my_sna.inside[offset] = j;
|
||||
my_sna.wj[offset] = d_wjelem[elem_j];
|
||||
my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
|
||||
//t2 += timer.seconds(); timer.reset();
|
||||
|
||||
team.team_barrier();
|
||||
// compute Ui, Zi, and Bi for atom I
|
||||
my_sna.compute_ui(team,ninside);
|
||||
//t3 += timer.seconds(); timer.reset();
|
||||
team.team_barrier();
|
||||
my_sna.compute_zi(team);
|
||||
//t4 += timer.seconds(); timer.reset();
|
||||
team.team_barrier();
|
||||
|
||||
// for neighbors of I within cutoff:
|
||||
// compute dUi/drj and dBi/drj
|
||||
// Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
|
||||
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
d_coeffi(d_coeffelem,elem_i,Kokkos::ALL);
|
||||
|
||||
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
||||
[&] (const int jj) {
|
||||
//for (int jj = 0; jj < ninside; jj++) {
|
||||
int j = my_sna.inside[jj];
|
||||
|
||||
//Kokkos::Timer timer2;
|
||||
my_sna.compute_duidrj(team,&my_sna.rij(jj,0),
|
||||
my_sna.wj[jj],my_sna.rcutij[jj]);
|
||||
//t6 += timer2.seconds(); timer2.reset();
|
||||
my_sna.compute_dbidrj(team);
|
||||
//t7 += timer2.seconds(); timer2.reset();
|
||||
my_sna.copy_dbi2dbvec(team);
|
||||
if (quadraticflag) {
|
||||
my_sna.compute_bi(team);
|
||||
my_sna.copy_bi2bvec(team);
|
||||
}
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
F_FLOAT fij[3];
|
||||
|
||||
fij[0] = 0.0;
|
||||
fij[1] = 0.0;
|
||||
fij[2] = 0.0;
|
||||
|
||||
// linear contributions
|
||||
|
||||
for (int k = 1; k <= ncoeff; k++) {
|
||||
double bgb = d_coeffi[k];
|
||||
fij[0] += bgb*my_sna.dbvec(k-1,0);
|
||||
fij[1] += bgb*my_sna.dbvec(k-1,1);
|
||||
fij[2] += bgb*my_sna.dbvec(k-1,2);
|
||||
}
|
||||
|
||||
if (quadraticflag) {
|
||||
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = my_sna.bvec[icoeff];
|
||||
double fack = d_coeffi[k]*bveci;
|
||||
double dbvecix = my_sna.dbvec(icoeff,0);
|
||||
double dbveciy = my_sna.dbvec(icoeff,1);
|
||||
double dbveciz = my_sna.dbvec(icoeff,2);
|
||||
fij[0] += fack*dbvecix;
|
||||
fij[1] += fack*dbveciy;
|
||||
fij[2] += fack*dbveciz;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double facki = d_coeffi[k]*bveci;
|
||||
double fackj = d_coeffi[k]*my_sna.bvec[jcoeff];
|
||||
fij[0] += facki*my_sna.dbvec(jcoeff,0)+fackj*dbvecix;
|
||||
fij[1] += facki*my_sna.dbvec(jcoeff,1)+fackj*dbveciy;
|
||||
fij[2] += facki*my_sna.dbvec(jcoeff,2)+fackj*dbveciz;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Hard-coded ZBL potential
|
||||
//const double dx = my_sna.rij(jj,0);
|
||||
//const double dy = my_sna.rij(jj,1);
|
||||
//const double dz = my_sna.rij(jj,2);
|
||||
//const double fdivr = -1.5e6/pow(dx*dx + dy*dy + dz*dz,7.0);
|
||||
//fij[0] += dx*fdivr;
|
||||
//fij[1] += dy*fdivr;
|
||||
//fij[2] += dz*fdivr;
|
||||
|
||||
//OK
|
||||
//printf("%lf %lf %lf %lf %lf %lf %lf %lf %lf SNAP-COMPARE: FIJ\n"
|
||||
// ,x(i,0),x(i,1),x(i,2),x(j,0),x(j,1),x(j,2),fij[0],fij[1],fij[2] );
|
||||
a_f(i,0) += fij[0];
|
||||
a_f(i,1) += fij[1];
|
||||
a_f(i,2) += fij[2];
|
||||
a_f(j,0) -= fij[0];
|
||||
a_f(j,1) -= fij[1];
|
||||
a_f(j,2) -= fij[2];
|
||||
|
||||
// tally per-atom virial contribution
|
||||
|
||||
if (EVFLAG) {
|
||||
if (vflag) {
|
||||
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
||||
fij[0],fij[1],fij[2],
|
||||
-my_sna.rij(jj,0),-my_sna.rij(jj,1),
|
||||
-my_sna.rij(jj,2));
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
});
|
||||
//t5 += timer.seconds(); timer.reset();
|
||||
|
||||
// tally energy contribution
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag) {
|
||||
|
||||
if (!quadraticflag) {
|
||||
my_sna.compute_bi(team);
|
||||
my_sna.copy_bi2bvec(team);
|
||||
}
|
||||
|
||||
// E = beta.B + 0.5*B^t.alpha.B
|
||||
// coeff[k] = beta[k-1] or
|
||||
// coeff[k] = alpha_ii or
|
||||
// coeff[k] = alpha_ij = alpha_ji, j != i
|
||||
|
||||
if (team.team_rank() == 0)
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
|
||||
// evdwl = energy of atom I, sum over coeffs_k * Bi_k
|
||||
|
||||
double evdwl = d_coeffi[0];
|
||||
|
||||
// linear contributions
|
||||
for (int k = 1; k <= ncoeff; k++)
|
||||
evdwl += d_coeffi[k]*my_sna.bvec[k-1];
|
||||
|
||||
// quadratic contributions
|
||||
|
||||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = my_sna.bvec[icoeff];
|
||||
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
evdwl += d_coeffi[k++]*bveci*my_sna.bvec[jcoeff];
|
||||
}
|
||||
}
|
||||
}
|
||||
// ev_tally_full(i,2.0*evdwl,0.0,0.0,0.0,0.0,0.0);
|
||||
if (eflag_either) {
|
||||
if (eflag_global) ev.evdwl += evdwl;
|
||||
if (eflag_atom) d_eatom[i] += evdwl;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAP<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
|
||||
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
// The vatom array is atomic for Half/Thread neighbor style
|
||||
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
|
||||
|
||||
const E_FLOAT v0 = delx*fx;
|
||||
const E_FLOAT v1 = dely*fy;
|
||||
const E_FLOAT v2 = delz*fz;
|
||||
const E_FLOAT v3 = delx*fy;
|
||||
const E_FLOAT v4 = delx*fz;
|
||||
const E_FLOAT v5 = dely*fz;
|
||||
|
||||
if (vflag_global) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
v_vatom(i,0) += 0.5*v0;
|
||||
v_vatom(i,1) += 0.5*v1;
|
||||
v_vatom(i,2) += 0.5*v2;
|
||||
v_vatom(i,3) += 0.5*v3;
|
||||
v_vatom(i,4) += 0.5*v4;
|
||||
v_vatom(i,5) += 0.5*v5;
|
||||
v_vatom(j,0) += 0.5*v0;
|
||||
v_vatom(j,1) += 0.5*v1;
|
||||
v_vatom(j,2) += 0.5*v2;
|
||||
v_vatom(j,3) += 0.5*v3;
|
||||
v_vatom(j,4) += 0.5*v4;
|
||||
v_vatom(j,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairSNAPKokkos<DeviceType>::memory_usage()
|
||||
{
|
||||
double bytes = Pair::memory_usage();
|
||||
int n = atom->ntypes+1;
|
||||
bytes += n*n*sizeof(int);
|
||||
bytes += n*n*sizeof(double);
|
||||
bytes += 3*nmax*sizeof(double);
|
||||
bytes += nmax*sizeof(int);
|
||||
bytes += (2*ncoeffall)*sizeof(double);
|
||||
bytes += (ncoeff*3)*sizeof(double);
|
||||
//bytes += snaKK.memory_usage(); // FIXME
|
||||
return bytes;
|
||||
}
|
|
@ -0,0 +1,438 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pair_zbl_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "math_const.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
#include "kokkos.h"
|
||||
|
||||
// From J.F. Zeigler, J. P. Biersack and U. Littmark,
|
||||
// "The Stopping and Range of Ions in Matter" volume 1, Pergamon, 1985.
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
using namespace PairZBLConstants;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairZBLKokkos<DeviceType>::PairZBLKokkos(LAMMPS *lmp) : PairZBL(lmp)
|
||||
{
|
||||
respa_enable = 0;
|
||||
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
PairZBLKokkos<DeviceType>::~PairZBLKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
if (allocated) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memory->sfree(cutsq);
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
cutsq = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairZBLKokkos<DeviceType>::init_style()
|
||||
{
|
||||
PairZBL::init_style();
|
||||
|
||||
// error if rRESPA with inner levels
|
||||
|
||||
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
|
||||
int respa = 0;
|
||||
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
|
||||
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
|
||||
if (respa)
|
||||
error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
|
||||
}
|
||||
|
||||
// irequest = neigh request made by parent class
|
||||
|
||||
neighflag = lmp->kokkos->neighflag;
|
||||
int irequest = neighbor->nrequest - 1;
|
||||
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
|
||||
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
neighbor->requests[irequest]->
|
||||
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
|
||||
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(d_cutsq,cut_globalsq);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag,0);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memoryKK->destroy_kokkos(k_eatom,eatom);
|
||||
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memoryKK->destroy_kokkos(k_vatom,vatom);
|
||||
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
|
||||
d_vatom = k_vatom.view<DeviceType>();
|
||||
}
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
|
||||
else atomKK->modified(execution_space,F_MASK);
|
||||
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
f = atomKK->k_f.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
nlocal = atom->nlocal;
|
||||
nall = atom->nlocal + atom->nghost;
|
||||
newton_pair = force->newton_pair;
|
||||
special_lj[0] = force->special_lj[0];
|
||||
special_lj[1] = force->special_lj[1];
|
||||
special_lj[2] = force->special_lj[2];
|
||||
special_lj[3] = force->special_lj[3];
|
||||
|
||||
k_z.sync<DeviceType>();
|
||||
k_d1a.sync<DeviceType>();
|
||||
k_d2a.sync<DeviceType>();
|
||||
k_d3a.sync<DeviceType>();
|
||||
k_d4a.sync<DeviceType>();
|
||||
k_zze.sync<DeviceType>();
|
||||
k_sw1.sync<DeviceType>();
|
||||
k_sw2.sync<DeviceType>();
|
||||
k_sw3.sync<DeviceType>();
|
||||
k_sw4.sync<DeviceType>();
|
||||
k_sw5.sync<DeviceType>();
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
EV_FLOAT ev = pair_compute<PairZBLKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);
|
||||
|
||||
if (eflag_global) eng_vdwl += ev.evdwl;
|
||||
if (vflag_global) {
|
||||
virial[0] += ev.v[0];
|
||||
virial[1] += ev.v[1];
|
||||
virial[2] += ev.v[2];
|
||||
virial[3] += ev.v[3];
|
||||
virial[4] += ev.v[4];
|
||||
virial[5] += ev.v[5];
|
||||
}
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairZBLKokkos<DeviceType>::
|
||||
compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
(void) i;
|
||||
(void) j;
|
||||
const F_FLOAT r = sqrt(rsq);
|
||||
F_FLOAT fpair = dzbldr(r, itype, jtype);
|
||||
|
||||
if (rsq > cut_innersq) {
|
||||
const F_FLOAT t = r - cut_inner;
|
||||
const F_FLOAT fswitch = t*t *
|
||||
(d_sw1(itype,jtype) + d_sw2(itype,jtype)*t);
|
||||
fpair += fswitch;
|
||||
}
|
||||
|
||||
fpair *= -1.0/r;
|
||||
return fpair;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairZBLKokkos<DeviceType>::
|
||||
compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
(void) i;
|
||||
(void) j;
|
||||
const F_FLOAT r = sqrt(rsq);
|
||||
F_FLOAT evdwl = e_zbl(r, itype, jtype);
|
||||
evdwl += d_sw5(itype,jtype);
|
||||
if (rsq > cut_innersq) {
|
||||
const F_FLOAT t = r - cut_inner;
|
||||
const F_FLOAT eswitch = t*t*t *
|
||||
(d_sw3(itype,jtype) + d_sw4(itype,jtype)*t);
|
||||
evdwl += eswitch;
|
||||
}
|
||||
return evdwl;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairZBLKokkos<DeviceType>::allocate()
|
||||
{
|
||||
PairZBL::allocate();
|
||||
|
||||
int n = atom->ntypes;
|
||||
|
||||
k_z = DAT::tdual_ffloat_1d("pair_zbl:z ",n+1);
|
||||
k_d1a = DAT::tdual_ffloat_2d_dl("pair_zbl:d1a",n+1,n+1);
|
||||
k_d2a = DAT::tdual_ffloat_2d_dl("pair_zbl:d2a",n+1,n+1);
|
||||
k_d3a = DAT::tdual_ffloat_2d_dl("pair_zbl:d3a",n+1,n+1);
|
||||
k_d4a = DAT::tdual_ffloat_2d_dl("pair_zbl:d4a",n+1,n+1);
|
||||
k_zze = DAT::tdual_ffloat_2d_dl("pair_zbl:zze",n+1,n+1);
|
||||
k_sw1 = DAT::tdual_ffloat_2d_dl("pair_zbl:sw1",n+1,n+1);
|
||||
k_sw2 = DAT::tdual_ffloat_2d_dl("pair_zbl:sw2",n+1,n+1);
|
||||
k_sw3 = DAT::tdual_ffloat_2d_dl("pair_zbl:sw3",n+1,n+1);
|
||||
k_sw4 = DAT::tdual_ffloat_2d_dl("pair_zbl:sw4",n+1,n+1);
|
||||
k_sw5 = DAT::tdual_ffloat_2d_dl("pair_zbl:sw5",n+1,n+1);
|
||||
|
||||
d_z = k_z.view<DeviceType>();
|
||||
d_d1a = k_d1a.view<DeviceType>();
|
||||
d_d2a = k_d2a.view<DeviceType>();
|
||||
d_d3a = k_d3a.view<DeviceType>();
|
||||
d_d4a = k_d4a.view<DeviceType>();
|
||||
d_zze = k_zze.view<DeviceType>();
|
||||
d_sw1 = k_sw1.view<DeviceType>();
|
||||
d_sw2 = k_sw2.view<DeviceType>();
|
||||
d_sw3 = k_sw3.view<DeviceType>();
|
||||
d_sw4 = k_sw4.view<DeviceType>();
|
||||
d_sw5 = k_sw5.view<DeviceType>();
|
||||
|
||||
d_cutsq = typename AT::t_ffloat_2d_dl("pair_zbl:cutsq",n+1,n+1);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double PairZBLKokkos<DeviceType>::init_one(int i, int j)
|
||||
{
|
||||
double cutone = PairZBL::init_one(i,j);
|
||||
|
||||
k_z.h_view(i) = z[i];
|
||||
k_d1a.h_view(i,j) = d1a[i][j];
|
||||
k_d2a.h_view(i,j) = d2a[i][j];
|
||||
k_d3a.h_view(i,j) = d3a[i][j];
|
||||
k_d4a.h_view(i,j) = d4a[i][j];
|
||||
k_zze.h_view(i,j) = zze[i][j];
|
||||
k_sw1.h_view(i,j) = sw1[i][j];
|
||||
k_sw2.h_view(i,j) = sw2[i][j];
|
||||
k_sw3.h_view(i,j) = sw3[i][j];
|
||||
k_sw4.h_view(i,j) = sw4[i][j];
|
||||
k_sw5.h_view(i,j) = sw5[i][j];
|
||||
|
||||
k_z.modify<LMPHostType>();
|
||||
k_d1a.modify<LMPHostType>();
|
||||
k_d2a.modify<LMPHostType>();
|
||||
k_d3a.modify<LMPHostType>();
|
||||
k_d4a.modify<LMPHostType>();
|
||||
k_zze.modify<LMPHostType>();
|
||||
k_sw1.modify<LMPHostType>();
|
||||
k_sw2.modify<LMPHostType>();
|
||||
k_sw3.modify<LMPHostType>();
|
||||
k_sw4.modify<LMPHostType>();
|
||||
k_sw5.modify<LMPHostType>();
|
||||
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
|
||||
}
|
||||
|
||||
return cutone;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute ZBL pair energy
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairZBLKokkos<DeviceType>::e_zbl(F_FLOAT r, int i, int j) const {
|
||||
|
||||
const F_FLOAT d1aij = d_d1a(i,j);
|
||||
const F_FLOAT d2aij = d_d2a(i,j);
|
||||
const F_FLOAT d3aij = d_d3a(i,j);
|
||||
const F_FLOAT d4aij = d_d4a(i,j);
|
||||
const F_FLOAT zzeij = d_zze(i,j);
|
||||
const F_FLOAT rinv = 1.0/r;
|
||||
|
||||
F_FLOAT sum = c1*exp(-d1aij*r);
|
||||
sum += c2*exp(-d2aij*r);
|
||||
sum += c3*exp(-d3aij*r);
|
||||
sum += c4*exp(-d4aij*r);
|
||||
|
||||
F_FLOAT result = zzeij*sum*rinv;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute ZBL first derivative
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairZBLKokkos<DeviceType>::dzbldr(F_FLOAT r, int i, int j) const {
|
||||
|
||||
const F_FLOAT d1aij = d_d1a(i,j);
|
||||
const F_FLOAT d2aij = d_d2a(i,j);
|
||||
const F_FLOAT d3aij = d_d3a(i,j);
|
||||
const F_FLOAT d4aij = d_d4a(i,j);
|
||||
const F_FLOAT zzeij = d_zze(i,j);
|
||||
const F_FLOAT rinv = 1.0/r;
|
||||
|
||||
const F_FLOAT e1 = exp(-d1aij*r);
|
||||
const F_FLOAT e2 = exp(-d2aij*r);
|
||||
const F_FLOAT e3 = exp(-d3aij*r);
|
||||
const F_FLOAT e4 = exp(-d4aij*r);
|
||||
|
||||
F_FLOAT sum = c1*e1;
|
||||
sum += c2*e2;
|
||||
sum += c3*e3;
|
||||
sum += c4*e4;
|
||||
|
||||
F_FLOAT sum_p = -c1*d1aij*e1;
|
||||
sum_p -= c2*d2aij*e2;
|
||||
sum_p -= c3*d3aij*e3;
|
||||
sum_p -= c4*d4aij*e4;
|
||||
|
||||
F_FLOAT result = zzeij*(sum_p - sum*rinv)*rinv;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute ZBL second derivative
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT PairZBLKokkos<DeviceType>::d2zbldr2(F_FLOAT r, int i, int j) const {
|
||||
|
||||
const F_FLOAT d1aij = d_d1a(i,j);
|
||||
const F_FLOAT d2aij = d_d2a(i,j);
|
||||
const F_FLOAT d3aij = d_d3a(i,j);
|
||||
const F_FLOAT d4aij = d_d4a(i,j);
|
||||
const F_FLOAT zzeij = d_zze(i,j);
|
||||
const F_FLOAT rinv = 1.0/r;
|
||||
|
||||
const F_FLOAT e1 = exp(-d1aij*r);
|
||||
const F_FLOAT e2 = exp(-d2aij*r);
|
||||
const F_FLOAT e3 = exp(-d3aij*r);
|
||||
const F_FLOAT e4 = exp(-d4aij*r);
|
||||
|
||||
F_FLOAT sum = c1*e1;
|
||||
sum += c2*e2;
|
||||
sum += c3*e3;
|
||||
sum += c4*e4;
|
||||
|
||||
F_FLOAT sum_p = c1*e1*d1aij;
|
||||
sum_p += c2*e2*d2aij;
|
||||
sum_p += c3*e3*d3aij;
|
||||
sum_p += c4*e4*d4aij;
|
||||
|
||||
F_FLOAT sum_pp = c1*e1*d1aij*d1aij;
|
||||
sum_pp += c2*e2*d2aij*d2aij;
|
||||
sum_pp += c3*e3*d3aij*d3aij;
|
||||
sum_pp += c4*e4*d4aij*d4aij;
|
||||
|
||||
F_FLOAT result = zzeij*(sum_pp + 2.0*sum_p*rinv +
|
||||
2.0*sum*rinv*rinv)*rinv;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairZBLKokkos<DeviceType>::cleanup_copy() {
|
||||
// WHY needed: this prevents parent copy from deallocating any arrays
|
||||
allocated = 0;
|
||||
cutsq = NULL;
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class PairZBLKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class PairZBLKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(zbl/kk,PairZBLKokkos<LMPDeviceType>)
|
||||
PairStyle(zbl/kk/device,PairZBLKokkos<LMPDeviceType>)
|
||||
PairStyle(zbl/kk/host,PairZBLKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_ZBL_KOKKOS_H
|
||||
#define LMP_PAIR_ZBL_KOKKOS_H
|
||||
|
||||
#include "pair_zbl.h"
|
||||
#include "pair_kokkos.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class PairZBLKokkos : public PairZBL {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
PairZBLKokkos(class LAMMPS *);
|
||||
virtual ~PairZBLKokkos();
|
||||
void compute(int, int);
|
||||
void init_style();
|
||||
F_FLOAT init_one(int, int);
|
||||
|
||||
private:
|
||||
DAT::tdual_ffloat_1d k_z;
|
||||
DAT::tdual_ffloat_2d_dl k_d1a,k_d2a,k_d3a,k_d4a,k_zze,k_sw1,k_sw2,k_sw3,k_sw4,k_sw5;
|
||||
|
||||
typename AT::t_ffloat_1d d_z;
|
||||
typename AT::t_ffloat_2d_dl d_d1a,d_d2a,d_d3a,d_d4a,d_zze,d_sw1,d_sw2,d_sw3,d_sw4,d_sw5;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_int_1d_randomread type;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename AT::t_efloat_1d d_eatom;
|
||||
typename AT::t_virial_array d_vatom;
|
||||
|
||||
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
|
||||
typename AT::t_ffloat_2d_dl d_cutsq;
|
||||
|
||||
int newton_pair;
|
||||
int neighflag;
|
||||
int nlocal,nall,eflag,vflag;
|
||||
double special_lj[4];
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT e_zbl(F_FLOAT, int, int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT dzbldr(F_FLOAT, int, int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT d2zbldr2(F_FLOAT, int, int) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const;
|
||||
|
||||
template<bool STACKPARAMS, class Specialisation>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void cleanup_copy();
|
||||
void allocate();
|
||||
|
||||
friend class PairComputeFunctor<PairZBLKokkos,FULL,true>;
|
||||
friend class PairComputeFunctor<PairZBLKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairZBLKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairZBLKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairZBLKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairZBLKokkos,HALFTHREAD,false>;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALF,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALFTHREAD,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairZBLKokkos,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairZBLKokkos>(PairZBLKokkos*);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -0,0 +1,209 @@
|
|||
/* -*- c++ -*- -------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Christian Trott (SNL), Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_SNA_KOKKOS_H
|
||||
#define LMP_SNA_KOKKOS_H
|
||||
|
||||
#include <complex>
|
||||
#include <ctime>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct SNAKK_LOOPINDICES {
|
||||
int j1, j2, j;
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
class SNAKokkos {
|
||||
|
||||
public:
|
||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_3d_atomic;
|
||||
typedef Kokkos::View<double***[3], Kokkos::LayoutRight, DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<double**[3], Kokkos::LayoutRight, DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<double*****, Kokkos::LayoutRight, DeviceType> t_sna_5d;
|
||||
|
||||
inline
|
||||
SNAKokkos() {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAKokkos(const SNAKokkos<DeviceType>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||
|
||||
inline
|
||||
SNAKokkos(double, int, int, int, double, int, int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~SNAKokkos();
|
||||
|
||||
inline
|
||||
void build_indexlist(); // SNAKokkos()
|
||||
|
||||
inline
|
||||
void init(); //
|
||||
|
||||
inline
|
||||
T_INT size_team_scratch_arrays();
|
||||
|
||||
inline
|
||||
T_INT size_thread_scratch_arrays();
|
||||
|
||||
int ncoeff;
|
||||
|
||||
// functions for bispectrum coefficients
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void copy_bi2bvec(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
|
||||
// functions for derivatives
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double*, double, double); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_dbidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void copy_dbi2dbvec(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_sfac(double, double); // add_uarraytot, compute_duarray
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_dsfac(double, double); // compute_duarray
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
double* timers;
|
||||
timespec starttime, endtime;
|
||||
int print;
|
||||
int counter;
|
||||
#endif
|
||||
|
||||
//per sna class instance for OMP use
|
||||
|
||||
|
||||
// Per InFlight Particle
|
||||
t_sna_2d rij;
|
||||
t_sna_1i inside;
|
||||
t_sna_1d wj;
|
||||
t_sna_1d rcutij;
|
||||
int nmax;
|
||||
|
||||
void grow_rij(int);
|
||||
|
||||
int twojmax, diagonalstyle;
|
||||
// Per InFlight Particle
|
||||
t_sna_3d barray;
|
||||
t_sna_3d uarraytot_r, uarraytot_i;
|
||||
t_sna_3d_atomic uarraytot_r_a, uarraytot_i_a;
|
||||
t_sna_5d zarray_r, zarray_i;
|
||||
|
||||
// Per InFlight Interaction
|
||||
t_sna_3d uarray_r, uarray_i;
|
||||
|
||||
Kokkos::View<double*, Kokkos::LayoutRight, DeviceType> bvec;
|
||||
|
||||
// derivatives of data
|
||||
Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType> dbvec;
|
||||
t_sna_4d duarray_r, duarray_i;
|
||||
t_sna_4d dbarray;
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
|
||||
//use indexlist instead of loops, constructor generates these
|
||||
// Same accross all SNAKokkos
|
||||
Kokkos::View<SNAKK_LOOPINDICES*, DeviceType> idxj,idxj_full;
|
||||
int idxj_max,idxj_full_max;
|
||||
// data for bispectrum coefficients
|
||||
|
||||
// Same accross all SNAKokkos
|
||||
t_sna_5d cgarray;
|
||||
t_sna_2d rootpqarray;
|
||||
|
||||
|
||||
static const int nmaxfactorial = 167;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double factorial(int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void create_team_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // SNAKokkos()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void create_thread_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // SNAKokkos()
|
||||
|
||||
inline
|
||||
void init_clebsch_gordan(); // init()
|
||||
|
||||
inline
|
||||
void init_rootpqarray(); // init()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double, double, double); // compute_ui
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_uarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
double, double, double,
|
||||
double, double); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double deltacg(int, int, int); // init_clebsch_gordan
|
||||
|
||||
inline
|
||||
int compute_ncoeff(); // SNAKokkos()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duarray(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
double, double, double, // compute_duidrj
|
||||
double, double, double, double, double);
|
||||
|
||||
// if number of atoms are small use per atom arrays
|
||||
// for twojmax arrays, rij, inside, bvec
|
||||
// this will increase the memory footprint considerably,
|
||||
// but allows parallel filling and reuse of these arrays
|
||||
int use_shared_arrays;
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
// 1 = cosine
|
||||
int switch_flag;
|
||||
|
||||
// Self-weight
|
||||
double wself;
|
||||
|
||||
int bzero_flag; // 1 if bzero subtracted from barray
|
||||
Kokkos::View<double*, Kokkos::LayoutRight, DeviceType> bzero; // array of B values for isolated atoms
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#include "sna_kokkos_impl.h"
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Invalid argument to factorial %d
|
||||
|
||||
N must be >= 0 and <= 167, otherwise the factorial result is too
|
||||
large.
|
||||
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
|
@ -107,6 +107,8 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp)
|
|||
|
||||
PairSNAP::~PairSNAP()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
if (nelements) {
|
||||
for (int i = 0; i < nelements; i++)
|
||||
delete[] elements[i];
|
||||
|
|
|
@ -28,14 +28,14 @@ class PairSNAP : public Pair {
|
|||
public:
|
||||
PairSNAP(class LAMMPS *);
|
||||
~PairSNAP();
|
||||
void compute(int, int);
|
||||
virtual void compute(int, int);
|
||||
void compute_regular(int, int);
|
||||
void compute_optimized(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_style();
|
||||
virtual void coeff(int, char **);
|
||||
virtual void init_style();
|
||||
double init_one(int, int);
|
||||
double memory_usage();
|
||||
virtual double memory_usage();
|
||||
|
||||
protected:
|
||||
int ncoeff, ncoeffq, ncoeffall;
|
||||
|
@ -43,7 +43,7 @@ protected:
|
|||
class SNA** sna;
|
||||
int nmax;
|
||||
int nthreads;
|
||||
void allocate();
|
||||
virtual void allocate();
|
||||
void read_files(char *, char *);
|
||||
inline int equal(double* x,double* y);
|
||||
inline double dist2(double* x,double* y);
|
||||
|
|
|
@ -48,6 +48,8 @@ PairZBL::PairZBL(LAMMPS *lmp) : Pair(lmp) {}
|
|||
|
||||
PairZBL::~PairZBL()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(cutsq);
|
||||
|
|
|
@ -31,8 +31,8 @@ class PairZBL : public Pair {
|
|||
virtual void compute(int, int);
|
||||
void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
virtual void init_style();
|
||||
virtual double init_one(int, int);
|
||||
double single(int, int, int, int, double, double, double, double &);
|
||||
|
||||
protected:
|
||||
|
@ -42,7 +42,7 @@ class PairZBL : public Pair {
|
|||
double **d1a,**d2a,**d3a,**d4a,**zze;
|
||||
double **sw1,**sw2,**sw3,**sw4,**sw5;
|
||||
|
||||
void allocate();
|
||||
virtual void allocate();
|
||||
double e_zbl(double, int, int);
|
||||
double dzbldr(double, int, int);
|
||||
double d2zbldr2(double, int, int);
|
||||
|
|
Loading…
Reference in New Issue