Merge pull request #632 from timattox/USER-DPD_kokkos_merge

Add Kokkos version of the USER-DPD package
This commit is contained in:
Steve Plimpton 2017-09-06 08:50:43 -06:00 committed by GitHub
commit 99791ce01c
110 changed files with 18806 additions and 1581 deletions

View File

@ -7,6 +7,7 @@
:line
fix dpd/energy command :h3
fix dpd/energy/kk command :h3
[Syntax:]
@ -46,6 +47,29 @@ examples/USER/dpd directory.
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -7,6 +7,7 @@
:line
fix eos/table/rx command :h3
fix eos/table/rx/kk command :h3
[Syntax:]
@ -152,6 +153,29 @@ no 0.93 0.00 0.000 -1.76 :pre
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -7,6 +7,7 @@
:line
fix rx command :h3
fix rx/kk command :h3
[Syntax:]
@ -182,6 +183,29 @@ read_data data.dpd fix foo_SPECIES NULL Species
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -7,6 +7,7 @@
:line
fix shardlow command :h3
fix shardlow/kk command :h3
[Syntax:]
@ -52,6 +53,29 @@ examples/USER/dpd directory.
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -7,6 +7,7 @@
:line
fix wall/lj93 command :h3
fix wall/lj93/kk command :h3
fix wall/lj126 command :h3
fix wall/lj1043 command :h3
fix wall/colloid command :h3
@ -277,6 +278,31 @@ the total potential energy of the system (the quantity being
minimized), you MUST enable the "fix_modify"_fix_modify.html {energy}
option for this fix.
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:] none
[Related commands:]

View File

@ -8,6 +8,7 @@
pair_style dpd/fdt command :h3
pair_style dpd/fdt/energy command :h3
pair_style dpd/fdt/energy/kk command :h3
[Syntax:]
@ -125,6 +126,29 @@ significantly larger timesteps to be taken.
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
These commands are part of the USER-DPD package. They are only

View File

@ -7,6 +7,7 @@
:line
pair_style exp6/rx command :h3
pair_style exp6/rx/kk command :h3
[Syntax:]
@ -147,6 +148,31 @@ This style does not support the pair_modify tail option for adding long-range
tail corrections to energy and pressure for the A,C terms in the
pair interaction.
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -10,6 +10,7 @@ pair_style hybrid command :h3
pair_style hybrid/omp command :h3
pair_style hybrid/overlay command :h3
pair_style hybrid/overlay/omp command :h3
pair_style hybrid/overlay/kk command :h3
[Syntax:]

View File

@ -7,6 +7,7 @@
:line
pair_style multi/lucy/rx command :h3
pair_style multi/lucy/rx/kk command :h3
[Syntax:]
@ -200,6 +201,29 @@ This pair style can only be used via the {pair} keyword of the
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -7,6 +7,7 @@
:line
pair_style table/rx command :h3
pair_style table/rx/kk command :h3
[Syntax:]
@ -223,6 +224,29 @@ This pair style can only be used via the {pair} keyword of the
:line
Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
functionally the same as the corresponding style without the suffix.
They have been optimized to run faster, depending on your available
hardware, as discussed in "Section 5"_Section_accelerate.html
of the manual. The accelerated styles take the same arguments and
should produce the same results, except for round-off and precision
issues.
These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
USER-OMP and OPT packages, respectively. They are only enabled if
LAMMPS was built with those packages. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
You can specify the accelerated styles explicitly in your input script
by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
This command is part of the USER-DPD package. It is only enabled if

View File

@ -755,6 +755,12 @@ namespace Kokkos {
return Random_XorShift64<DeviceType>(state_(i),i);
}
// NOTE: state_idx MUST be unique and less than num_states
KOKKOS_INLINE_FUNCTION
Random_XorShift64<DeviceType> get_state(const int state_idx) const {
return Random_XorShift64<DeviceType>(state_(state_idx),state_idx);
}
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_) = state.state_;
@ -1010,6 +1016,12 @@ namespace Kokkos {
return Random_XorShift1024<DeviceType>(state_,p_(i),i);
};
// NOTE: state_idx MUST be unique and less than num_states
KOKKOS_INLINE_FUNCTION
Random_XorShift1024<DeviceType> get_state(const int state_idx) const {
return Random_XorShift1024<DeviceType>(state_,p_(state_idx),state_idx);
}
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const {
for(int i = 0; i<16; i++)
@ -1208,8 +1220,8 @@ Random_XorShift64<Kokkos::Cuda> Random_XorShift64_Pool<Kokkos::Cuda>::get_state(
template<>
KOKKOS_INLINE_FUNCTION
void Random_XorShift64_Pool<Kokkos::Cuda>::free_state(const Random_XorShift64<Kokkos::Cuda> &state) const {
#ifdef __CUDA_ARCH__
state_(state.state_idx_) = state.state_;
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif
@ -1244,9 +1256,9 @@ Random_XorShift1024<Kokkos::Cuda> Random_XorShift1024_Pool<Kokkos::Cuda>::get_st
template<>
KOKKOS_INLINE_FUNCTION
void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift1024<Kokkos::Cuda> &state) const {
#ifdef __CUDA_ARCH__
for(int i=0; i<16; i++)
state_(state.state_idx_,i) = state.state_[i];
#ifdef __CUDA_ARCH__
locks_(state.state_idx_) = 0;
return;
#endif

View File

@ -115,6 +115,10 @@ if (test $1 = "USER-CGSDK") then
depend USER-OMP
fi
if (test $1 = "USER-DPD") then
depend KOKKOS
fi
if (test $1 = "USER-FEP") then
depend USER-OMP
fi

View File

@ -49,8 +49,12 @@ action atom_vec_bond_kokkos.cpp atom_vec_bond.cpp
action atom_vec_bond_kokkos.h atom_vec_bond.h
action atom_vec_charge_kokkos.cpp
action atom_vec_charge_kokkos.h
action atom_vec_dpd_kokkos.cpp atom_vec_dpd.cpp
action atom_vec_dpd_kokkos.h atom_vec_dpd.h
action atom_vec_full_kokkos.cpp atom_vec_full.cpp
action atom_vec_full_kokkos.h atom_vec_full.h
action atom_vec_hybrid_kokkos.cpp
action atom_vec_hybrid_kokkos.h
action atom_vec_kokkos.cpp
action atom_vec_kokkos.h
action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp
@ -77,6 +81,8 @@ action domain_kokkos.cpp
action domain_kokkos.h
action fix_deform_kokkos.cpp
action fix_deform_kokkos.h
action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h
action fix_langevin_kokkos.cpp
action fix_langevin_kokkos.h
action fix_nh_kokkos.cpp
@ -89,6 +95,8 @@ action fix_nve_kokkos.cpp
action fix_nve_kokkos.h
action fix_nvt_kokkos.cpp
action fix_nvt_kokkos.h
action fix_property_atom_kokkos.cpp
action fix_property_atom_kokkos.h
action fix_qeq_reax_kokkos.cpp fix_qeq_reax.cpp
action fix_qeq_reax_kokkos.h fix_qeq_reax.h
action fix_reaxc_bonds_kokkos.cpp fix_reaxc_bonds.cpp
@ -97,10 +105,18 @@ action fix_reaxc_species_kokkos.cpp fix_reaxc_species.cpp
action fix_reaxc_species_kokkos.h fix_reaxc_species.h
action fix_setforce_kokkos.cpp
action fix_setforce_kokkos.h
action fix_shardlow_kokkos.cpp fix_shardlow.cpp
action fix_shardlow_kokkos.h fix_shardlow.h
action fix_momentum_kokkos.cpp
action fix_momentum_kokkos.h
action fix_wall_lj93_kokkos.cpp
action fix_wall_lj93_kokkos.h
action fix_wall_reflect_kokkos.cpp
action fix_wall_reflect_kokkos.h
action fix_dpd_energy_kokkos.cpp fix_dpd_energy.cpp
action fix_dpd_energy_kokkos.h fix_dpd_energy.h
action fix_rx_kokkos.cpp fix_rx.cpp
action fix_rx_kokkos.h fix_rx.h
action gridcomm_kokkos.cpp gridcomm.cpp
action gridcomm_kokkos.h gridcomm.h
action improper_class2_kokkos.cpp improper_class2.cpp
@ -124,8 +140,12 @@ action npair_copy_kokkos.cpp
action npair_copy_kokkos.h
action npair_kokkos.cpp
action npair_kokkos.h
action npair_ssa_kokkos.cpp npair_half_bin_newton_ssa.cpp
action npair_ssa_kokkos.h npair_half_bin_newton_ssa.h
action nbin_kokkos.cpp
action nbin_kokkos.h
action nbin_ssa_kokkos.cpp nbin_ssa.cpp
action nbin_ssa_kokkos.h nbin_ssa.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action pair_buck_coul_cut_kokkos.cpp
@ -144,12 +164,20 @@ action pair_coul_long_kokkos.cpp pair_coul_long.cpp
action pair_coul_long_kokkos.h pair_coul_long.h
action pair_coul_wolf_kokkos.cpp
action pair_coul_wolf_kokkos.h
action pair_dpd_fdt_energy_kokkos.cpp pair_dpd_fdt_energy.cpp
action pair_dpd_fdt_energy_kokkos.h pair_dpd_fdt_energy.h
action pair_eam_kokkos.cpp pair_eam.cpp
action pair_eam_kokkos.h pair_eam.h
action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp
action pair_eam_alloy_kokkos.h pair_eam_alloy.h
action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp
action pair_eam_fs_kokkos.h pair_eam_fs.h
action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp
action pair_exp6_rx_kokkos.h pair_exp6_rx.h
action pair_hybrid_kokkos.cpp
action pair_hybrid_kokkos.h
action pair_hybrid_overlay_kokkos.cpp
action pair_hybrid_overlay_kokkos.h
action pair_kokkos.h
action pair_lj_charmm_coul_charmm_implicit_kokkos.cpp pair_lj_charmm_coul_charmm_implicit.cpp
action pair_lj_charmm_coul_charmm_implicit_kokkos.h pair_lj_charmm_coul_charmm_implicit.h
@ -183,6 +211,8 @@ action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
action pair_morse_kokkos.cpp
action pair_morse_kokkos.h
action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h
action pair_reaxc_kokkos.cpp pair_reaxc.cpp
action pair_reaxc_kokkos.h pair_reaxc.h
action pair_sw_kokkos.cpp pair_sw.cpp
@ -191,6 +221,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp
action pair_vashishta_kokkos.h pair_vashishta.h
action pair_table_kokkos.cpp
action pair_table_kokkos.h
action pair_table_rx_kokkos.cpp pair_table_rx.cpp
action pair_table_rx_kokkos.h pair_table_rx.h
action pair_tersoff_kokkos.cpp pair_tersoff.cpp
action pair_tersoff_kokkos.h pair_tersoff.h
action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
@ -199,6 +231,8 @@ action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
action pppm_kokkos.cpp pppm.cpp
action pppm_kokkos.h pppm.h
action rand_pool_wrap_kokkos.cpp
action rand_pool_wrap_kokkos.h
action region_block_kokkos.cpp
action region_block_kokkos.h
action verlet_kokkos.cpp

View File

@ -49,6 +49,7 @@ AtomKokkos::~AtomKokkos()
memory->destroy_kokkos(k_radius, radius);
memory->destroy_kokkos(k_rmass, rmass);
memory->destroy_kokkos(k_omega, omega);
memory->destroy_kokkos(k_angmom, angmom);
memory->destroy_kokkos(k_torque, torque);
memory->destroy_kokkos(k_nspecial, nspecial);
@ -73,6 +74,19 @@ AtomKokkos::~AtomKokkos()
memory->destroy_kokkos(k_improper_atom2, improper_atom2);
memory->destroy_kokkos(k_improper_atom3, improper_atom3);
memory->destroy_kokkos(k_improper_atom4, improper_atom4);
// USER-DPD package
memory->destroy_kokkos(k_uCond,uCond);
memory->destroy_kokkos(k_uMech,uMech);
memory->destroy_kokkos(k_uChem,uChem);
memory->destroy_kokkos(k_uCG,uCG);
memory->destroy_kokkos(k_uCGnew,uCGnew);
memory->destroy_kokkos(k_rho,rho);
memory->destroy_kokkos(k_dpdTheta,dpdTheta);
memory->destroy_kokkos(k_duChem,duChem);
memory->destroy_kokkos(k_dvector,dvector);
dvector = NULL;
}
/* ---------------------------------------------------------------------- */
@ -227,6 +241,63 @@ void AtomKokkos::grow(unsigned int mask){
}
}
/* ----------------------------------------------------------------------
add a custom variable with name of type flag = 0/1 for int/double
assumes name does not already exist
return index in ivector or dvector of its location
------------------------------------------------------------------------- */
int AtomKokkos::add_custom(const char *name, int flag)
{
int index;
if (flag == 0) {
index = nivector;
nivector++;
iname = (char **) memory->srealloc(iname,nivector*sizeof(char *),
"atom:iname");
int n = strlen(name) + 1;
iname[index] = new char[n];
strcpy(iname[index],name);
ivector = (int **) memory->srealloc(ivector,nivector*sizeof(int *),
"atom:ivector");
memory->create(ivector[index],nmax,"atom:ivector");
} else {
index = ndvector;
ndvector++;
dname = (char **) memory->srealloc(dname,ndvector*sizeof(char *),
"atom:dname");
int n = strlen(name) + 1;
dname[index] = new char[n];
strcpy(dname[index],name);
memory->grow_kokkos(k_dvector,dvector,ndvector,nmax,
"atom:dvector");
}
return index;
}
/* ----------------------------------------------------------------------
remove a custom variable of type flag = 0/1 for int/double at index
free memory for vector and name and set ptrs to NULL
ivector/dvector and iname/dname lists never shrink
------------------------------------------------------------------------- */
void AtomKokkos::remove_custom(int flag, int index)
{
if (flag == 0) {
memory->destroy(ivector[index]);
ivector[index] = NULL;
delete [] iname[index];
iname[index] = NULL;
} else {
//memory->destroy_kokkos(dvector);
dvector[index] = NULL;
delete [] dname[index];
dname[index] = NULL;
}
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::deallocate_topology()

View File

@ -34,6 +34,7 @@ class AtomKokkos : public Atom {
DAT::tdual_float_1d k_radius;
DAT::tdual_float_1d k_rmass;
DAT::tdual_v_array k_omega;
DAT::tdual_v_array k_angmom;
DAT::tdual_f_array k_torque;
DAT::tdual_tagint_1d k_molecule;
DAT::tdual_int_2d k_nspecial;
@ -51,6 +52,14 @@ class AtomKokkos : public Atom {
DAT::tdual_int_2d k_improper_type;
DAT::tdual_tagint_2d k_improper_atom1, k_improper_atom2, k_improper_atom3, k_improper_atom4;
DAT::tdual_float_2d k_dvector;
// USER-DPD package
DAT::tdual_efloat_1d k_uCond, k_uMech, k_uChem, k_uCG, k_uCGnew,
k_rho,k_dpdTheta,k_duChem;
AtomKokkos(class LAMMPS *);
~AtomKokkos();
@ -60,6 +69,8 @@ class AtomKokkos : public Atom {
void sync_overlapping_device(const ExecutionSpace space, unsigned int mask);
virtual void sort();
virtual void grow(unsigned int mask);
int add_custom(const char *, int);
void remove_custom(int, int);
virtual void deallocate_topology();
void sync_modify(ExecutionSpace, unsigned int, unsigned int);
private:

View File

@ -308,7 +308,6 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
if(pbc_flag) {
@ -336,7 +335,6 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*size_forward;
@ -430,7 +428,6 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
@ -463,7 +460,6 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*3;
}
@ -501,13 +497,11 @@ void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
modified(Host,X_MASK);
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -753,13 +747,11 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecAngleKokkos_PackBorder<LMPDeviceType,1> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
} else {
@ -769,13 +761,11 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecAngleKokkos_PackBorder<LMPDeviceType,0> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
return n*size_border;
@ -977,12 +967,10 @@ void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
struct AtomVecAngleKokkos_UnpackBorder<LMPDeviceType>
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -1241,13 +1229,11 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_
AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPHostType::fence();
return nsend*elements;
} else {
AtomVecAngleKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPDeviceType::fence();
return nsend*elements;
}
}
@ -1405,7 +1391,6 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPHostType::fence();
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -1414,7 +1399,6 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPDeviceType::fence();
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -224,7 +224,6 @@ int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
if(pbc_flag) {
@ -252,7 +251,6 @@ int AtomVecAtomicKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*size_forward;
@ -340,7 +338,6 @@ int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
@ -369,7 +366,6 @@ int AtomVecAtomicKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*3;
}
@ -407,13 +403,11 @@ void AtomVecAtomicKokkos::unpack_comm_kokkos(const int &n, const int &first,
modified(Host,X_MASK);
struct AtomVecAtomicKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecAtomicKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -655,13 +649,11 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecAtomicKokkos_PackBorder<LMPDeviceType,1> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
} else {
@ -671,13 +663,11 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecAtomicKokkos_PackBorder<LMPDeviceType,0> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
return n*6;
@ -853,11 +843,9 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
if(space==Host) {
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
struct AtomVecAtomicKokkos_UnpackBorder<LMPDeviceType> f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -1009,12 +997,10 @@ int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
if(space == Host) {
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPHostType::fence();
return nsend*11;
} else {
AtomVecAtomicKokkos_PackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPDeviceType::fence();
return nsend*11;
}
}
@ -1106,7 +1092,6 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
k_count.h_view(0) = nlocal;
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/11,f);
LMPHostType::fence();
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -1114,7 +1099,6 @@ int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
k_count.sync<LMPDeviceType>();
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPDeviceType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/11,f);
LMPDeviceType::fence();
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -266,7 +266,6 @@ int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
if(pbc_flag) {
@ -294,7 +293,6 @@ int AtomVecBondKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*size_forward;
@ -382,7 +380,6 @@ int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
@ -411,7 +408,6 @@ int AtomVecBondKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*3;
}
@ -449,13 +445,11 @@ void AtomVecBondKokkos::unpack_comm_kokkos(const int &n, const int &first,
modified(Host,X_MASK);
struct AtomVecBondKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecBondKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -701,13 +695,11 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecBondKokkos_PackBorder<LMPDeviceType,1> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
} else {
@ -717,13 +709,11 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecBondKokkos_PackBorder<LMPDeviceType,0> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
return n*size_border;
@ -925,12 +915,10 @@ void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
struct AtomVecBondKokkos_UnpackBorder<LMPDeviceType>
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -1157,13 +1145,11 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPHostType::fence();
return nsend*elements;
} else {
AtomVecBondKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPDeviceType::fence();
return nsend*elements;
}
}
@ -1299,7 +1285,6 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPHostType::fence();
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -1308,7 +1293,6 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
AtomVecBondKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPDeviceType::fence();
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -236,7 +236,6 @@ int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
if(pbc_flag) {
@ -264,7 +263,6 @@ int AtomVecChargeKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*size_forward;
@ -352,7 +350,6 @@ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
@ -381,7 +378,6 @@ int AtomVecChargeKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &l
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*3;
}
@ -419,13 +415,11 @@ void AtomVecChargeKokkos::unpack_comm_kokkos(const int &n, const int &first,
modified(Host,X_MASK);
struct AtomVecChargeKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecChargeKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -669,13 +663,11 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecChargeKokkos_PackBorder<LMPDeviceType,1> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
} else {
@ -685,13 +677,11 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecChargeKokkos_PackBorder<LMPDeviceType,0> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
return n*size_border;
@ -890,12 +880,10 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
struct AtomVecChargeKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
struct AtomVecChargeKokkos_UnpackBorder<LMPDeviceType>
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
}
@ -1078,13 +1066,11 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPHostType::fence();
return nsend*12;
} else {
AtomVecChargeKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPDeviceType::fence();
return nsend*12;
}
}
@ -1181,7 +1167,6 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
k_count.h_view(0) = nlocal;
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/12,f);
LMPHostType::fence();
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -1190,7 +1175,6 @@ int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/12,f);
LMPDeviceType::fence();
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,137 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale AtomicKokkos/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(dpd/kk,AtomVecDPDKokkos)
AtomStyle(dpd/kk/device,AtomVecDPDKokkos)
AtomStyle(dpd/kk/host,AtomVecDPDKokkos)
#else
#ifndef LMP_ATOM_VEC_DPD_KOKKOS_H
#define LMP_ATOM_VEC_DPD_KOKKOS_H
#include "atom_vec_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class AtomVecDPDKokkos : public AtomVecKokkos {
public:
AtomVecDPDKokkos(class LAMMPS *);
virtual ~AtomVecDPDKokkos() {}
void grow(int);
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
int pack_comm_hybrid(int, int *, double *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int unpack_comm_hybrid(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, tagint, char **);
int data_atom_hybrid(int, char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
bigint memory_usage();
void grow_reset();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
double *uCond,*uMech,*uChem,*uCG,*uCGnew,*rho,*dpdTheta;
double *duChem;
protected:
DAT::t_efloat_1d d_uCond, d_uMech, d_uChem, d_uCG, d_uCGnew,d_rho,d_dpdTheta,d_duChem;
HAT::t_efloat_1d h_uCond, h_uMech, h_uChem, h_uCG, h_uCGnew,h_rho,h_dpdTheta,h_duChem;
tagint *tag;
imageint *image;
int *type,*mask;
double **x,**v,**f;
DAT::t_tagint_1d d_tag;
HAT::t_tagint_1d h_tag;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_int_1d d_type, d_mask;
HAT::t_int_1d h_type, h_mask;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Per-processor system is too big
The number of owned atoms plus ghost atoms on a single
processor must fit in 32-bit integer.
E: Invalid atom type in Atoms section of data file
Atom types must range from 1 to specified # of types.
*/

View File

@ -396,7 +396,6 @@ int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
if(pbc_flag) {
@ -424,7 +423,6 @@ int AtomVecFullKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*size_forward;
@ -515,7 +513,6 @@ int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
@ -544,7 +541,6 @@ int AtomVecFullKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &lis
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*3;
}
@ -582,13 +578,11 @@ void AtomVecFullKokkos::unpack_comm_kokkos(const int &n, const int &first,
modified(Host,X_MASK);
struct AtomVecFullKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecFullKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -838,13 +832,11 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecFullKokkos_PackBorder<LMPDeviceType,1> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
} else {
@ -854,13 +846,11 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecFullKokkos_PackBorder<LMPDeviceType,0> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
return n*size_border;
@ -1071,12 +1061,10 @@ void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
struct AtomVecFullKokkos_UnpackBorder<LMPDeviceType>
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,d_molecule,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -1422,13 +1410,11 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPHostType::fence();
return nsend*elements;
} else {
AtomVecFullKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPDeviceType::fence();
return nsend*elements;
}
}
@ -1643,7 +1629,6 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPHostType::fence();
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -1652,7 +1637,6 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
AtomVecFullKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPDeviceType::fence();
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,161 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(hybrid/kk,AtomVecHybridKokkos)
#else
#ifndef LMP_ATOM_VEC_HYBRID_KOKKOS_H
#define LMP_ATOM_VEC_HYBRID_KOKKOS_H
#include <stdio.h>
#include "atom_vec_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class AtomVecHybridKokkos : public AtomVecKokkos {
public:
int nstyles;
class AtomVec **styles;
char **keywords;
AtomVecHybridKokkos(class LAMMPS *);
~AtomVecHybridKokkos();
void process_args(int, char **);
void init();
void grow(int);
void grow_reset();
void copy(int, int, int);
void clear_bonus();
void force_clear(int, size_t);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int pack_reverse(int, int, double *);
void unpack_reverse(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, imageint, char **);
int data_atom_hybrid(int, char **) {return 0;}
void data_vel(int, char **);
void pack_data(double **);
void write_data(FILE *, int, double **);
void pack_vel(double **);
void write_vel(FILE *, int, double **);
int property_atom(char *);
void pack_property_atom(int, double *, int, int);
bigint memory_usage();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
private:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
double **omega,**angmom;
DAT::t_tagint_1d d_tag;
DAT::t_int_1d d_type, d_mask;
HAT::t_tagint_1d h_tag;
HAT::t_int_1d h_type, h_mask;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
HAT::t_x_array h_x;
HAT::t_v_array h_v;
HAT::t_f_array h_f;
DAT::t_v_array d_omega, d_angmom;
HAT::t_v_array h_omega, h_angmom;
DAT::tdual_int_1d k_count;
int nallstyles;
char **allstyles;
void build_styles();
int known_style(char *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Atom style hybrid cannot have hybrid as an argument
Self-explanatory.
E: Atom style hybrid cannot use same atom style twice
Self-explanatory.
E: Cannot mix molecular and molecule template atom styles
Self-explanatory.
E: Per-processor system is too big
The number of owned atoms plus ghost atoms on a single
processor must fit in 32-bit integer.
E: Invalid atom type in Atoms section of data file
Atom types must range from 1 to specified # of types.
*/

View File

@ -387,7 +387,6 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
if(pbc_flag) {
@ -415,7 +414,6 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*size_forward;
@ -506,7 +504,6 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
Kokkos::parallel_for(n,f);
}
}
LMPHostType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
@ -535,7 +532,6 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
Kokkos::parallel_for(n,f);
}
}
LMPDeviceType::fence();
}
return n*3;
}
@ -573,13 +569,11 @@ void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
modified(Host,X_MASK);
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -825,13 +819,11 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecMolecularKokkos_PackBorder<LMPDeviceType,1> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
} else {
@ -841,13 +833,11 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
AtomVecMolecularKokkos_PackBorder<LMPDeviceType,0> f(
buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
return n*size_border;
@ -1049,12 +1039,10 @@ void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
Kokkos::parallel_for(n,f);
LMPHostType::fence();
} else {
struct AtomVecMolecularKokkos_UnpackBorder<LMPDeviceType>
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_molecule,first);
Kokkos::parallel_for(n,f);
LMPDeviceType::fence();
}
}
@ -1389,13 +1377,11 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl
AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPHostType::fence();
return nsend*elements;
} else {
AtomVecMolecularKokkos_PackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
LMPDeviceType::fence();
return nsend*elements;
}
}
@ -1608,7 +1594,6 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPHostType::fence();
return k_count.h_view(0);
} else {
k_count.h_view(0) = nlocal;
@ -1617,7 +1602,6 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPDeviceType>
f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/elements,f);
LMPDeviceType::fence();
k_count.modify<LMPDeviceType>();
k_count.sync<LMPHostType>();

View File

@ -499,7 +499,6 @@ void CommKokkos::exchange_device()
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
nlocal,dim,lo,hi);
Kokkos::parallel_for(nlocal,f);
DeviceType::fence();
k_exchange_sendlist.modify<DeviceType>();
k_sendflag.modify<DeviceType>();
k_count.modify<DeviceType>();
@ -535,7 +534,6 @@ void CommKokkos::exchange_device()
k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::
space,dim,lo,hi);
DeviceType::fence();
} else {
while (i < nlocal) {
@ -560,7 +558,6 @@ void CommKokkos::exchange_device()
atom->nlocal=avec->
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
} else {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
@ -593,7 +590,6 @@ void CommKokkos::exchange_device()
atom->nlocal = avec->
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
}
@ -765,7 +761,6 @@ void CommKokkos::borders_device() {
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
DeviceType::fence();
total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>();
@ -782,7 +777,6 @@ void CommKokkos::borders_device() {
total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
DeviceType::fence();
total_send.template modify<DeviceType>();
total_send.template sync<LMPHostType>();
}
@ -911,7 +905,6 @@ void CommKokkos::borders_device() {
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
atomKK->modified(exec_space,ALL_MASK);
DeviceType::fence();
atomKK->sync(Host,TAG_MASK);
if (map_style) atom->map_set();
}

View File

@ -99,7 +99,6 @@ void DomainKokkos::reset_box()
DomainResetBoxFunctor<LMPDeviceType>
f(atomKK->k_x);
Kokkos::parallel_reduce(nlocal,f,result);
LMPDeviceType::fence();
double (*extent)[2] = result.value;
double all[3][2];
@ -384,7 +383,6 @@ void DomainKokkos::pbc()
Kokkos::parallel_for(nlocal,f);
}
}
LMPDeviceType::fence();
atomKK->modified(Device,X_MASK|V_MASK|IMAGE_MASK);
}
@ -424,7 +422,6 @@ void DomainKokkos::remap_all()
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_remap_all>(0,nlocal),*this);
LMPDeviceType::fence();
copymode = 0;
atomKK->modified(Device,X_MASK | IMAGE_MASK);
@ -528,7 +525,6 @@ void DomainKokkos::image_flip(int m_in, int n_in, int p_in)
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_image_flip>(0,nlocal),*this);
LMPDeviceType::fence();
copymode = 0;
atomKK->modified(Device,IMAGE_MASK);
@ -561,7 +557,6 @@ void DomainKokkos::lamda2x(int n)
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_lamda2x>(0,n),*this);
LMPDeviceType::fence();
copymode = 0;
atomKK->modified(Device,X_MASK);
@ -587,7 +582,6 @@ void DomainKokkos::x2lamda(int n)
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagDomain_x2lamda>(0,n),*this);
LMPDeviceType::fence();
copymode = 0;
atomKK->modified(Device,X_MASK);

View File

@ -0,0 +1,96 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <stdio.h>
#include <string.h>
#include "fix_dpd_energy_kokkos.h"
#include "atom_masks.h"
#include "atom_kokkos.h"
#include "force.h"
#include "update.h"
#include "respa.h"
#include "modify.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
template <typename DeviceType>
FixDPDenergyKokkos<DeviceType>::FixDPDenergyKokkos(LAMMPS *lmp, int narg, char **arg) :
FixDPDenergy(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
pairDPDEKK = dynamic_cast<decltype(pairDPDEKK)>(pairDPDE);
if (!pairDPDEKK)
error->all(FLERR,"Must use pair_style dpd/fdt/energy/kk with fix dpd/energy/kk");
}
/* ---------------------------------------------------------------------- */
template <typename DeviceType>
void FixDPDenergyKokkos<DeviceType>::take_half_step()
{
int nlocal = atom->nlocal;
if (igroup == atom->firstgroup) nlocal = atom->nfirst;
using AT = ArrayTypes<DeviceType>;
atomKK->sync(execution_space, UCOND_MASK);
typename AT::t_efloat_1d uCond = atomKK->k_uCond.view<DeviceType>();
atomKK->sync(execution_space, UMECH_MASK);
typename AT::t_efloat_1d uMech = atomKK->k_uMech.view<DeviceType>();
pairDPDEKK->k_duCond.template sync<DeviceType>();
typename AT::t_efloat_1d_const duCond = pairDPDEKK->k_duCond.template view<DeviceType>();
pairDPDEKK->k_duMech.template sync<DeviceType>();
typename AT::t_efloat_1d_const duMech = pairDPDEKK->k_duMech.template view<DeviceType>();
auto dt = update->dt;
Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
uCond(i) += 0.5*dt*duCond(i);
uMech(i) += 0.5*dt*duMech(i);
});
atomKK->modified(execution_space, UCOND_MASK);
atomKK->modified(execution_space, UMECH_MASK);
}
/* ---------------------------------------------------------------------- */
template <typename DeviceType>
void FixDPDenergyKokkos<DeviceType>::initial_integrate(int)
{
take_half_step();
}
/* ---------------------------------------------------------------------- */
template <typename DeviceType>
void FixDPDenergyKokkos<DeviceType>::final_integrate()
{
take_half_step();
}
namespace LAMMPS_NS {
template class FixDPDenergyKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixDPDenergyKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,54 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(dpd/energy/kk,FixDPDenergyKokkos<LMPDeviceType>)
FixStyle(dpd/energy/kk/device,FixDPDenergyKokkos<LMPDeviceType>)
FixStyle(dpd/energy/kk/host,FixDPDenergyKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_DPDE_KOKKOS_H
#define LMP_FIX_DPDE_KOKKOS_H
#include "fix_dpd_energy.h"
#include "pair_dpd_fdt_energy_kokkos.h"
namespace LAMMPS_NS {
template <typename DeviceType>
class FixDPDenergyKokkos : public FixDPDenergy {
public:
FixDPDenergyKokkos(class LAMMPS *, int, char **);
virtual ~FixDPDenergyKokkos() {}
virtual void initial_integrate(int);
virtual void final_integrate();
void take_half_step();
protected:
PairDPDfdtEnergyKokkos<DeviceType>* pairDPDEKK;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Must use pair_style dpd/fdt/energy/kk with fix dpd/energy/kk
Self-explanatory.
*/

View File

@ -0,0 +1,569 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (Sandia)
------------------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include "fix_eos_table_rx_kokkos.h"
#include "atom_kokkos.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#include "comm.h"
#include <math.h>
#include "modify.h"
#include "atom_masks.h"
#define MAXLINE 1024
#ifdef DBL_EPSILON
#define MY_EPSILON (10.0*DBL_EPSILON)
#else
#define MY_EPSILON (10.0*2.220446049250313e-16)
#endif
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixEOStableRXKokkos<DeviceType>::FixEOStableRXKokkos(LAMMPS *lmp, int narg, char **arg) :
FixEOStableRX(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
update_table = 1;
h_table = new TableHost();
d_table = new TableDevice();
k_error_flag = DAT::tdual_int_scalar("fix:error_flag");
k_warning_flag = DAT::tdual_int_scalar("fix:warning_flag");
k_dHf = DAT::tdual_float_1d("fix:dHf",nspecies);
k_energyCorr = DAT::tdual_float_1d("fix:energyCorr",nspecies);
k_tempCorrCoeff = DAT::tdual_float_1d("fix:tempCorrCoeff",nspecies);
k_moleculeCorrCoeff = DAT::tdual_float_1d("fix:moleculeCorrCoeff",nspecies);
for (int n = 0; n < nspecies; n++) {
k_dHf.h_view(n) = dHf[n];
k_energyCorr.h_view(n) = energyCorr[n];
k_tempCorrCoeff.h_view(n) = tempCorrCoeff[n];
k_moleculeCorrCoeff.h_view(n) = moleculeCorrCoeff[n];
}
k_dHf.modify<LMPHostType>();
k_dHf.sync<DeviceType>();
d_dHf = k_dHf.view<DeviceType>();
k_energyCorr.modify<LMPHostType>();
k_energyCorr.sync<DeviceType>();
d_energyCorr = k_energyCorr.view<DeviceType>();
k_tempCorrCoeff.modify<LMPHostType>();
k_tempCorrCoeff.sync<DeviceType>();
d_tempCorrCoeff = k_tempCorrCoeff.view<DeviceType>();
k_moleculeCorrCoeff.modify<LMPHostType>();
k_moleculeCorrCoeff.sync<DeviceType>();
d_moleculeCorrCoeff = k_moleculeCorrCoeff.view<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixEOStableRXKokkos<DeviceType>::~FixEOStableRXKokkos()
{
if (copymode) return;
delete h_table;
delete d_table;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::setup(int vflag)
{
if (update_table)
create_kokkos_tables();
copymode = 1;
int nlocal = atom->nlocal;
boltz = force->boltz;
mask = atomKK->k_mask.view<DeviceType>();
uCond = atomKK->k_uCond.view<DeviceType>();
uMech = atomKK->k_uMech.view<DeviceType>();
uChem = atomKK->k_uChem.view<DeviceType>();
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
uCG = atomKK->k_uCG.view<DeviceType>();
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
dvector = atomKK->k_dvector.view<DeviceType>();
if (!this->restart_reset) {
atomKK->sync(execution_space,MASK_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXSetup>(0,nlocal),*this);
atomKK->modified(execution_space,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
}
// Communicate the updated momenta and velocities to all nodes
atomKK->sync(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
comm->forward_comm_fix(this);
atomKK->modified(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup>(0,nlocal),*this);
atomKK->modified(execution_space,DPDTHETA_MASK);
error_check();
copymode = 0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXSetup, const int &i) const {
if (mask[i] & groupbit) {
const double duChem = uCG[i] - uCGnew[i];
uChem[i] += duChem;
uCG[i] = 0.0;
uCGnew[i] = 0.0;
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup, const int &i) const {
if (mask[i] & groupbit)
temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::init()
{
if (update_table)
create_kokkos_tables();
copymode = 1;
int nlocal = atom->nlocal;
boltz = force->boltz;
mask = atomKK->k_mask.view<DeviceType>();
uCond = atomKK->k_uCond.view<DeviceType>();
uMech = atomKK->k_uMech.view<DeviceType>();
uChem = atomKK->k_uChem.view<DeviceType>();
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
dvector = atomKK->k_dvector.view<DeviceType>();
if (this->restart_reset) {
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup>(0,nlocal),*this);
atomKK->modified(execution_space,DPDTHETA_MASK);
} else {
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXInit>(0,nlocal),*this);
atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK | UCHEM_MASK);
}
error_check();
copymode = 0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXInit, const int &i) const {
double tmp;
if (mask[i] & groupbit) {
if(dpdTheta[i] <= 0.0)
k_error_flag.template view<DeviceType>()() = 1;
energy_lookup(i,dpdTheta[i],tmp);
uCond[i] = 0.0;
uMech[i] = tmp;
uChem[i] = 0.0;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::post_integrate()
{
if (update_table)
create_kokkos_tables();
copymode = 1;
int nlocal = atom->nlocal;
boltz = force->boltz;
mask = atomKK->k_mask.view<DeviceType>();
uCond = atomKK->k_uCond.view<DeviceType>();
uMech = atomKK->k_uMech.view<DeviceType>();
uChem = atomKK->k_uChem.view<DeviceType>();
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
dvector = atomKK->k_dvector.view<DeviceType>();
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup2>(0,nlocal),*this);
atomKK->modified(execution_space,DPDTHETA_MASK);
error_check();
copymode = 0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup2, const int &i) const {
if (mask[i] & groupbit){
temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
if (dpdTheta[i] <= 0.0)
k_error_flag.template view<DeviceType>()() = 1;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::end_of_step()
{
if (update_table)
create_kokkos_tables();
copymode = 1;
int nlocal = atom->nlocal;
boltz = force->boltz;
mask = atomKK->k_mask.view<DeviceType>();
uCond = atomKK->k_uCond.view<DeviceType>();
uMech = atomKK->k_uMech.view<DeviceType>();
uChem = atomKK->k_uChem.view<DeviceType>();
dpdTheta= atomKK->k_dpdTheta.view<DeviceType>();
uCG = atomKK->k_uCG.view<DeviceType>();
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
dvector = atomKK->k_dvector.view<DeviceType>();
// Communicate the ghost uCGnew
atomKK->sync(Host,UCG_MASK | UCGNEW_MASK);
comm->reverse_comm_fix(this);
atomKK->modified(Host,UCG_MASK | UCGNEW_MASK);
atomKK->sync(execution_space,MASK_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXSetup>(0,nlocal),*this);
atomKK->modified(execution_space,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
// Communicate the updated momenta and velocities to all nodes
atomKK->sync(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
comm->forward_comm_fix(this);
atomKK->modified(Host,UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
atomKK->sync(execution_space,MASK_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DPDTHETA_MASK | DVECTOR_MASK);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixEOStableRXTemperatureLookup2>(0,nlocal),*this);
atomKK->modified(execution_space,DPDTHETA_MASK);
error_check();
copymode = 0;
}
/* ----------------------------------------------------------------------
calculate potential ui at temperature thetai
------------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, double &ui) const
{
int itable, nPG;
double fraction, uTmp, nMolecules, nTotal, nTotalPG;
double tolerance = 1.0e-10;
ui = 0.0;
nTotal = 0.0;
nTotalPG = 0.0;
nPG = 0;
if (rx_flag) {
for (int ispecies = 0; ispecies < nspecies; ispecies++ ) {
nTotal += dvector(ispecies,id);
if (fabs(d_moleculeCorrCoeff[ispecies]) > tolerance) {
nPG++;
nTotalPG += dvector(ispecies,id);
}
}
} else {
nTotal = 1.0;
}
for(int ispecies=0;ispecies<nspecies;ispecies++){
//Table *tb = &tables[ispecies];
//thetai = MAX(thetai,tb->lo);
thetai = MAX(thetai,d_table_const.lo(ispecies));
//thetai = MIN(thetai,tb->hi);
thetai = MIN(thetai,d_table_const.hi(ispecies));
if (tabstyle == LINEAR) {
//itable = static_cast<int> ((thetai - tb->lo) * tb->invdelta);
itable = static_cast<int> ((thetai - d_table_const.lo(ispecies)) * d_table_const.invdelta(ispecies));
//fraction = (thetai - tb->r[itable]) * tb->invdelta;
fraction = (thetai - d_table_const.r(ispecies,itable)) * d_table_const.invdelta(ispecies);
//uTmp = tb->e[itable] + fraction*tb->de[itable];
uTmp = d_table_const.e(ispecies,itable) + fraction*d_table_const.de(ispecies,itable);
uTmp += d_dHf[ispecies];
uTmp += d_tempCorrCoeff[ispecies]*thetai; // temperature correction
uTmp += d_energyCorr[ispecies]; // energy correction
if (nPG > 0) ui += d_moleculeCorrCoeff[ispecies]*nTotalPG/double(nPG); // molecule correction
if (rx_flag) nMolecules = dvector(ispecies,id);
else nMolecules = 1.0;
ui += nMolecules*uTmp;
}
}
ui = ui - double(nTotal+1.5)*boltz*thetai;
}
/* ----------------------------------------------------------------------
calculate temperature thetai at energy ui
------------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, double &thetai) const
{
//Table *tb = &tables[0];
int it;
double t1,t2,u1,u2,f1,f2;
double maxit = 100;
double temp;
double delta = 0.001;
double tolerance = 1.0e-10;
int lo = d_table_const.lo(0);
int hi = d_table_const.hi(0);
// Store the current thetai in t1
t1 = MAX(thetai,lo);
t1 = MIN(t1,hi);
if(t1==hi) delta = -delta;
// Compute u1 at thetai
energy_lookup(id,t1,u1);
// Compute f1
f1 = u1 - ui;
// Compute guess of t2
t2 = (1.0 + delta)*t1;
// Compute u2 at t2
energy_lookup(id,t2,u2);
// Compute f1
f2 = u2 - ui;
// Apply the Secant Method
for(it=0; it<maxit; it++){
if(fabs(f2-f1) < MY_EPSILON){
if(isnan(f1) || isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
temp = t1;
temp = MAX(temp,lo);
temp = MIN(temp,hi);
k_warning_flag.template view<DeviceType>()() = 1;
break;
}
temp = t2 - f2*(t2-t1)/(f2-f1);
if(fabs(temp-t2) < tolerance) break;
f1 = f2;
t1 = t2;
t2 = temp;
energy_lookup(id,t2,u2);
f2 = u2 - ui;
}
if(it==maxit){
if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2))
k_error_flag.template view<DeviceType>()() = 2;
else
k_error_flag.template view<DeviceType>()() = 3;
}
thetai = temp;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixEOStableRXKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
int ii,jj,m;
HAT::t_efloat_1d h_uChem = atomKK->k_uChem.h_view;
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
m = 0;
for (ii = 0; ii < n; ii++) {
jj = list[ii];
buf[m++] = h_uChem[jj];
buf[m++] = h_uCG[jj];
buf[m++] = h_uCGnew[jj];
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
{
int ii,m,last;
HAT::t_efloat_1d h_uChem = atomKK->k_uChem.h_view;
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
m = 0;
last = first + n ;
for (ii = first; ii < last; ii++){
h_uChem[ii] = buf[m++];
h_uCG[ii] = buf[m++];
h_uCGnew[ii] = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixEOStableRXKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = h_uCG[i];
buf[m++] = h_uCGnew[i];
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
HAT::t_efloat_1d h_uCG = atomKK->k_uCG.h_view;
HAT::t_efloat_1d h_uCGnew = atomKK->k_uCGnew.h_view;
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
h_uCG[j] += buf[m++];
h_uCGnew[j] += buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::error_check()
{
k_error_flag.template modify<DeviceType>();
k_error_flag.template sync<LMPHostType>();
if (k_error_flag.h_view() == 1)
error->one(FLERR,"Internal temperature <= zero");
else if (k_error_flag.h_view() == 2)
error->one(FLERR,"NaN detected in secant solver.");
else if (k_error_flag.h_view() == 3)
error->one(FLERR,"Maxit exceeded in secant solver.");
k_warning_flag.template modify<DeviceType>();
k_warning_flag.template sync<LMPHostType>();
if (k_warning_flag.h_view()) {
error->warning(FLERR,"Secant solver did not converge because table bounds were exceeded.");
k_warning_flag.h_view() = 0;
k_warning_flag.template modify<LMPHostType>();
k_warning_flag.template sync<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEOStableRXKokkos<DeviceType>::create_kokkos_tables()
{
const int tlm1 = tablength-1;
memory->create_kokkos(d_table->lo,h_table->lo,ntables,"Table::lo");
memory->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi");
memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
if(tabstyle == LINEAR) {
memory->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r");
memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
}
for(int i=0; i < ntables; i++) {
Table* tb = &tables[i];
h_table->lo[i] = tb->lo;
h_table->hi[i] = tb->hi;
h_table->invdelta[i] = tb->invdelta;
for(int j = 0; j<h_table->r.dimension_1(); j++)
h_table->r(i,j) = tb->r[j];
for(int j = 0; j<h_table->e.dimension_1(); j++)
h_table->e(i,j) = tb->e[j];
for(int j = 0; j<h_table->de.dimension_1(); j++)
h_table->de(i,j) = tb->de[j];
}
Kokkos::deep_copy(d_table->lo,h_table->lo);
Kokkos::deep_copy(d_table->hi,h_table->hi);
Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
Kokkos::deep_copy(d_table->r,h_table->r);
Kokkos::deep_copy(d_table->e,h_table->e);
Kokkos::deep_copy(d_table->de,h_table->de);
d_table_const.lo = d_table->lo;
d_table_const.hi = d_table->hi;
d_table_const.invdelta = d_table->invdelta;
d_table_const.r = d_table->r;
d_table_const.e = d_table->e;
d_table_const.de = d_table->de;
update_table = 0;
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class FixEOStableRXKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixEOStableRXKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,212 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(eos/table/rx/kk,FixEOStableRXKokkos<LMPDeviceType>)
FixStyle(eos/table/rx/kk/device,FixEOStableRXKokkos<LMPDeviceType>)
FixStyle(eos/table/rx/kk/host,FixEOStableRXKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_EOS_TABLE_RX_KOKKOS_H
#define LMP_FIX_EOS_TABLE_RX_KOKKOS_H
#include "fix_eos_table_rx.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
struct TagFixEOStableRXInit{};
struct TagFixEOStableRXSetup{};
struct TagFixEOStableRXTemperatureLookup{};
struct TagFixEOStableRXTemperatureLookup2{};
template<class DeviceType>
class FixEOStableRXKokkos : public FixEOStableRX {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
FixEOStableRXKokkos(class LAMMPS *, int, char **);
virtual ~FixEOStableRXKokkos();
void setup(int);
void init();
void post_integrate();
void end_of_step();
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEOStableRXInit, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEOStableRXSetup, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEOStableRXTemperatureLookup, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEOStableRXTemperatureLookup2, const int&) const;
KOKKOS_INLINE_FUNCTION
void energy_lookup(int, double, double &) const;
KOKKOS_INLINE_FUNCTION
void temperature_lookup(int, double, double &) const;
protected:
//struct Table {
// int ninput;
// double lo,hi;
// double *rfile,*efile;
// double *e2file;
// double delta,invdelta,deltasq6;
// double *r,*e,*de,*e2;
//};
//Table *tables, *tables2;
/*struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_int_1d_randomread lo,hi;
typename ArrayTypes<DeviceType>::t_ffloat_1d_randomread invdelta;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread r,e,de;
};*/
//Its faster not to use texture fetch if the number of tables is less than 32!
struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_int_1d lo,hi;
typename ArrayTypes<DeviceType>::t_ffloat_1d invdelta;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread r,e,de;
};
struct TableDevice {
typename ArrayTypes<DeviceType>::t_int_1d lo,hi;
typename ArrayTypes<DeviceType>::t_ffloat_1d invdelta;
typename ArrayTypes<DeviceType>::t_ffloat_2d r,e,de;
};
struct TableHost {
typename ArrayTypes<LMPHostType>::t_int_1d lo,hi;
typename ArrayTypes<LMPHostType>::t_ffloat_1d invdelta;
typename ArrayTypes<LMPHostType>::t_ffloat_2d r,e,de;
};
TableDeviceConst d_table_const;
TableDevice* d_table;
TableHost* h_table;
int **tabindex;
double boltz;
void allocate();
void error_check();
int update_table;
void create_kokkos_tables();
DAT::tdual_float_1d k_dHf,k_energyCorr,k_tempCorrCoeff,k_moleculeCorrCoeff;
typename AT::t_float_1d d_dHf,d_energyCorr,d_tempCorrCoeff,d_moleculeCorrCoeff;
typename AT::t_int_1d mask;
typename AT::t_efloat_1d uCond,uMech,uChem,uCG,uCGnew,rho,dpdTheta,duChem;
typename AT::t_float_2d dvector;
DAT::tdual_int_scalar k_error_flag;
DAT::tdual_int_scalar k_warning_flag;
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
int pack_forward_comm(int , int *, double *, int, int *);
void unpack_forward_comm(int , int , double *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: FixEOStableRXKokkos requires a fix rx command.
The fix rx command must come before the pair style command in the input file
E: There are no rx species specified
There must be at least one species specified through the fix rx command
E: Invalid eos/table/rx length
The eos/table/rx table must have more than one entry.
E: eos/table/rx values are not increasing
The equation-of-state must an increasing function
E: FixEOStableRX requires atom_style with internal temperature and energies (e.g. dpd)
Self-explanatory.
E: Internal temperature <= zero.
Self-explanatory.
E: Cannot open eos table/rx potential file %s
Self-explanatory.
E: Incorrect format in eos table/rx file
Self-explanatory.
E: Cannot open file %s
Self-explanatory.
E: Did not find keyword in table file
Self-explanatory.
E: Illegal fix eos/table/rx command
Incorrect number of arguments specified for the fix eos/table/rx command.
E: Invalid keyword in fix eos/table/rx parameters
Self-explanatory.
E: The number of columns in fix eos/table/rx does not match the number of species.
Self-explanatory. Check format for fix eos/table/rx file.
E: fix eos/table/rx parameters did not set N
The number of table entries was not set in the eos/table/rx file
W: Secant solver did not converge because table bounds were exceeded
The secant solver failed to converge, resulting in the lower or upper table bound temperature to be returned
E: NaN detected in secant solver.
Self-explanatory.
E: Maxit exceeded in secant solver
The maximum number of interations was exceeded in the secant solver
*/

View File

@ -0,0 +1,72 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include "fix_property_atom_kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "memory.h"
#include "error.h"
#include "update.h"
using namespace LAMMPS_NS;
using namespace FixConst;
enum{MOLECULE,CHARGE,RMASS,INTEGER,DOUBLE};
/* ---------------------------------------------------------------------- */
FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg) :
FixPropertyAtom(lmp, narg, arg)
{
atomKK = (AtomKokkos *) atom;
}
/* ----------------------------------------------------------------------
allocate atom-based arrays
initialize new values to 0,
since AtomVec class won't do it as atoms are added,
e.g. in create_atom() or data_atom()
------------------------------------------------------------------------- */
void FixPropertyAtomKokkos::grow_arrays(int nmax)
{
for (int m = 0; m < nvalue; m++) {
if (style[m] == MOLECULE) {
memory->grow(atom->molecule,nmax,"atom:molecule");
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
memset(&atom->molecule[nmax_old],0,nbytes);
} else if (style[m] == CHARGE) {
memory->grow(atom->q,nmax,"atom:q");
size_t nbytes = (nmax-nmax_old) * sizeof(double);
memset(&atom->q[nmax_old],0,nbytes);
} else if (style[m] == RMASS) {
memory->grow(atom->rmass,nmax,"atom:rmass");
size_t nbytes = (nmax-nmax_old) * sizeof(double);
memset(&atom->rmass[nmax_old],0,nbytes);
} else if (style[m] == INTEGER) {
memory->grow(atom->ivector[index[m]],nmax,"atom:ivector");
size_t nbytes = (nmax-nmax_old) * sizeof(int);
memset(&atom->ivector[index[m]][nmax_old],0,nbytes);
} else if (style[m] == DOUBLE) {
memory->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.dimension_0(),nmax,
"atom:dvector");
//memory->grow(atom->dvector[index[m]],nmax,"atom:dvector");
//size_t nbytes = (nmax-nmax_old) * sizeof(double);
//memset(&atom->dvector[index[m]][nmax_old],0,nbytes);
}
}
nmax_old = nmax;
}

View File

@ -0,0 +1,90 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(property/atom/kk,FixPropertyAtomKokkos)
#else
#ifndef LMP_FIX_PROPERTY_ATOM_KOKKOS_H
#define LMP_FIX_PROPERTY_ATOM_KOKKOS_H
#include "fix_property_atom.h"
namespace LAMMPS_NS {
class FixPropertyAtomKokkos : public FixPropertyAtom {
public:
FixPropertyAtomKokkos(class LAMMPS *, int, char **);
virtual ~FixPropertyAtomKokkos() {}
void grow_arrays(int);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Fix property/atom mol when atom_style already has molecule attribute
Self-explanatory.
E: Fix property/atom cannot specify mol twice
Self-explanatory.
E: Fix property/atom q when atom_style already has charge attribute
Self-explanatory.
E: Fix property/atom cannot specify q twice
Self-explanatory.
E: Fix property/atom vector name already exists
The name for an integer or floating-point vector must be unique.
W: Fix property/atom mol or charge w/out ghost communication
A model typically needs these properties defined for ghost atoms.
E: Atom style was redefined after using fix property/atom
This is not allowed.
E: Incorrect %s format in data file
A section of the data file being read by fix property/atom does
not have the correct number of values per line.
E: Too few lines in %s section of data file
Self-explanatory.
E: Invalid atom ID in %s section of data file
An atom in a section of the data file being read by fix property/atom
has an invalid atom ID that is <= 0 or > the maximum existing atom ID.
*/

2279
src/KOKKOS/fix_rx_kokkos.cpp Normal file

File diff suppressed because it is too large Load Diff

282
src/KOKKOS/fix_rx_kokkos.h Normal file
View File

@ -0,0 +1,282 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(rx/kk,FixRxKokkos<LMPDeviceType>)
FixStyle(rx/kk/device,FixRxKokkos<LMPDeviceType>)
FixStyle(rx/kk/host,FixRxKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_RX_KOKKOS_H
#define LMP_FIX_RX_KOKKOS_H
#include "fix_rx.h"
#include "pair_dpd_fdt_energy_kokkos.h"
#include "kokkos_type.h"
#include "neigh_list.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
struct Tag_FixRxKokkos_zeroTemperatureViews {};
struct Tag_FixRxKokkos_zeroCounterViews {};
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
struct Tag_FixRxKokkos_firstPairOperator {};
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
struct Tag_FixRxKokkos_2ndPairOperator {};
template <bool ZERO_RATES>
struct Tag_FixRxKokkos_solveSystems {};
struct s_CounterType
{
int nSteps, nIters, nFuncs, nFails;
KOKKOS_INLINE_FUNCTION
s_CounterType() : nSteps(0), nIters(0), nFuncs(0), nFails(0) {};
KOKKOS_INLINE_FUNCTION
s_CounterType& operator+=(const s_CounterType &rhs)
{
nSteps += rhs.nSteps;
nIters += rhs.nIters;
nFuncs += rhs.nFuncs;
nFails += rhs.nFails;
return *this;
}
KOKKOS_INLINE_FUNCTION
volatile s_CounterType& operator+=(const volatile s_CounterType &rhs) volatile
{
nSteps += rhs.nSteps;
nIters += rhs.nIters;
nFuncs += rhs.nFuncs;
nFails += rhs.nFails;
return *this;
}
};
typedef struct s_CounterType CounterType;
template <typename DeviceType>
class FixRxKokkos : public FixRX {
public:
typedef ArrayTypes<DeviceType> AT;
FixRxKokkos(class LAMMPS *, int, char **);
virtual ~FixRxKokkos();
virtual void init();
void init_list(int, class NeighList *);
void post_constructor();
virtual void setup_pre_force(int);
virtual void pre_force(int);
// Define a value_type here for the reduction operator on CounterType.
typedef CounterType value_type;
KOKKOS_INLINE_FUNCTION
void operator()(Tag_FixRxKokkos_zeroCounterViews, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(Tag_FixRxKokkos_zeroTemperatureViews, const int&) const;
template <int WT_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(Tag_FixRxKokkos_firstPairOperator<WT_FLAG,NEWTON_PAIR,NEIGHFLAG>, const int&) const;
template <int WT_FLAG, int LOCAL_TEMP_FLAG>
KOKKOS_INLINE_FUNCTION
void operator()(Tag_FixRxKokkos_2ndPairOperator<WT_FLAG,LOCAL_TEMP_FLAG>, const int&) const;
template <bool ZERO_RATES>
KOKKOS_INLINE_FUNCTION
void operator()(Tag_FixRxKokkos_solveSystems<ZERO_RATES>, const int&, CounterType&) const;
//protected:
PairDPDfdtEnergyKokkos<DeviceType>* pairDPDEKK;
double VDPD;
double boltz;
double t_stop;
template <typename T, int stride = 1>
struct StridedArrayType
{
typedef T value_type;
enum { Stride = stride };
value_type *m_data;
KOKKOS_INLINE_FUNCTION
StridedArrayType() : m_data(NULL) {}
KOKKOS_INLINE_FUNCTION
StridedArrayType(value_type *ptr) : m_data(ptr) {}
KOKKOS_INLINE_FUNCTION value_type& operator()(const int idx) { return m_data[Stride*idx]; }
KOKKOS_INLINE_FUNCTION const value_type& operator()(const int idx) const { return m_data[Stride*idx]; }
KOKKOS_INLINE_FUNCTION value_type& operator[](const int idx) { return m_data[Stride*idx]; }
KOKKOS_INLINE_FUNCTION const value_type& operator[](const int idx) const { return m_data[Stride*idx]; }
};
template <int stride = 1>
struct UserRHSDataKokkos
{
StridedArrayType<double,1> kFor;
StridedArrayType<double,1> rxnRateLaw;
};
void solve_reactions(const int vflag, const bool isPreForce);
int rhs (double, const double *, double *, void *) const;
int rhs_dense (double, const double *, double *, void *) const;
int rhs_sparse(double, const double *, double *, void *) const;
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
int k_rhs (double, const VectorType&, VectorType&, UserDataType& ) const;
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
int k_rhs_dense (double, const VectorType&, VectorType&, UserDataType& ) const;
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
int k_rhs_sparse(double, const VectorType&, VectorType&, UserDataType& ) const;
//!< Classic Runge-Kutta 4th-order stepper.
void rk4(const double t_stop, double *y, double *rwork, void *v_params) const;
//!< Runge-Kutta-Fehlberg ODE Solver.
void rkf45(const int neq, const double t_stop, double *y, double *rwork, void *v_params, CounterType& counter) const;
//!< Runge-Kutta-Fehlberg ODE stepper function.
void rkf45_step (const int neq, const double h, double y[], double y_out[],
double rwk[], void *) const;
//!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
int rkf45_h0 (const int neq, const double t, const double t_stop,
const double hmin, const double hmax,
double& h0, double y[], double rwk[], void *v_params) const;
//!< Classic Runge-Kutta 4th-order stepper.
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
void k_rk4(const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData) const;
//!< Runge-Kutta-Fehlberg ODE Solver.
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
void k_rkf45(const int neq, const double t_stop, VectorType& y, VectorType& rwork, UserDataType& userData, CounterType& counter) const;
//!< Runge-Kutta-Fehlberg ODE stepper function.
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
void k_rkf45_step (const int neq, const double h, VectorType& y, VectorType& y_out,
VectorType& rwk, UserDataType& userData) const;
//!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
template <typename VectorType, typename UserDataType>
KOKKOS_INLINE_FUNCTION
int k_rkf45_h0 (const int neq, const double t, const double t_stop,
const double hmin, const double hmax,
double& h0, VectorType& y, VectorType& rwk, UserDataType& userData) const;
//!< ODE Solver diagnostics.
void odeDiagnostics(void);
//!< Special counters per-ode.
int *diagnosticCounterPerODEnSteps;
int *diagnosticCounterPerODEnFuncs;
DAT::tdual_int_1d k_diagnosticCounterPerODEnSteps;
DAT::tdual_int_1d k_diagnosticCounterPerODEnFuncs;
//typename ArrayTypes<DeviceType>::t_int_1d d_diagnosticCounterPerODEnSteps;
//typename ArrayTypes<DeviceType>::t_int_1d d_diagnosticCounterPerODEnFuncs;
typename AT::t_int_1d d_diagnosticCounterPerODEnSteps;
typename AT::t_int_1d d_diagnosticCounterPerODEnFuncs;
HAT::t_int_1d h_diagnosticCounterPerODEnSteps;
HAT::t_int_1d h_diagnosticCounterPerODEnFuncs;
template <typename KokkosDeviceType>
struct KineticsType
{
// Arrhenius rate coefficients.
typename ArrayTypes<KokkosDeviceType>::t_float_1d Arr, nArr, Ea;
// Dense versions.
typename ArrayTypes<KokkosDeviceType>::t_float_2d stoich, stoichReactants, stoichProducts;
// Sparse versions.
typename ArrayTypes<KokkosDeviceType>::t_int_2d nuk, inu;
typename ArrayTypes<KokkosDeviceType>::t_float_2d nu;
typename ArrayTypes<KokkosDeviceType>::t_int_1d isIntegral;
};
//!< Kokkos versions of the kinetics data.
KineticsType<LMPHostType> h_kineticsData;
KineticsType<DeviceType> d_kineticsData;
bool update_kinetics_data;
void create_kinetics_data(void);
// Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks.
DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights;
//typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdThetaLocal, d_sumWeights;
typename AT::t_efloat_1d d_dpdThetaLocal, d_sumWeights;
HAT::t_efloat_1d h_dpdThetaLocal, h_sumWeights;
typename ArrayTypes<DeviceType>::t_x_array_randomread d_x ;
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_type ;
typename ArrayTypes<DeviceType>::t_efloat_1d d_dpdTheta;
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
//double **h_cutsq;
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
typename ArrayTypes<DeviceType>::t_int_1d d_ilist ;
typename ArrayTypes<DeviceType>::t_int_1d d_numneigh ;
typename ArrayTypes<DeviceType>::t_float_2d d_dvector;
typename ArrayTypes<DeviceType>::t_int_1d d_mask ;
typename ArrayTypes<DeviceType>::t_double_1d d_scratchSpace;
size_t scratchSpaceSize;
// Error flag for any failures.
DAT::tdual_int_scalar k_error_flag;
template <int WT_FLAG, int LOCAL_TEMP_FLAG, bool NEWTON_PAIR, int NEIGHFLAG>
void computeLocalTemperature();
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
int pack_forward_comm(int , int *, double *, int, int *);
void unpack_forward_comm(int , int , double *);
//private: // replicate a few from FixRX
int my_restartFlag;
int nlocal;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,856 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos (U.S. Army Research Laboratory)
and Timothy I. Mattox (Engility Corporation)
Martin Lisal (Institute of Chemical Process Fundamentals
of the Czech Academy of Sciences and J. E. Purkinje University)
John Brennan, Joshua Moore and William Mattson (Army Research Lab)
Please cite the related publications:
J. P. Larentzos, J. K. Brennan, J. D. Moore, M. Lisal, W. D. Mattson,
"Parallel implementation of isothermal and isoenergetic Dissipative
Particle Dynamics using Shardlow-like splitting algorithms",
Computer Physics Communications, 2014, 185, pp 1987--1998.
M. Lisal, J. K. Brennan, J. Bonet Avalos, "Dissipative particle dynamics
at isothermal, isobaric, isoenergetic, and isoenthalpic conditions using
Shardlow-like splitting algorithms", Journal of Chemical Physics, 2011,
135, 204105.
------------------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fix_shardlow_kokkos.h"
#include "atom.h"
#include "atom_masks.h"
#include "atom_kokkos.h"
#include "force.h"
#include "update.h"
#include "respa.h"
#include "error.h"
#include <math.h>
#include "atom_vec.h"
#include "comm.h"
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "neigh_request.h"
#include "memory.h"
#include "domain.h"
#include "modify.h"
// #include "pair_dpd_fdt.h"
#include "pair_dpd_fdt_energy_kokkos.h"
#include "pair.h"
#include "npair_ssa_kokkos.h"
#include "citeme.h"
using namespace LAMMPS_NS;
using namespace FixConst;
#define EPSILON 1.0e-10
#define EPSILON_SQUARED ((EPSILON) * (EPSILON))
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **arg) :
FixShardlow(lmp, narg, arg), k_pairDPDE(NULL), ghostmax(0), nlocal(0) , nghost(0)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
if (narg != 3) error->all(FLERR,"Illegal fix shardlow command");
// k_pairDPD = NULL;
k_pairDPDE = NULL;
// k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1);
k_pairDPDE = dynamic_cast<PairDPDfdtEnergyKokkos<DeviceType> *>(force->pair_match("dpd/fdt/energy",0));
// if(k_pairDPDE){
comm_forward = 3;
comm_reverse = 5;
maxRNG = 0;
#ifdef DPD_USE_RAN_MARS
pp_random = NULL;
#endif
// } else {
// comm_forward = 3;
// comm_reverse = 3;
// }
if(/* k_pairDPD == NULL &&*/ k_pairDPDE == NULL)
error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk");
#ifdef DEBUG_SSA_PAIR_CT
d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3);
d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32);
#ifndef KOKKOS_USE_CUDA_UVM
h_counters = Kokkos::create_mirror_view(d_counters);
h_hist = Kokkos::create_mirror_view(d_hist);
#else
h_counters = d_counters;
h_hist = d_hist;
#endif
#endif
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixShardlowKokkos<DeviceType>::~FixShardlowKokkos()
{
ghostmax = 0;
#ifdef DPD_USE_RAN_MARS
if (pp_random) {
for (int i = 1; i < maxRNG; ++i) delete pp_random[i];
delete[] pp_random;
pp_random = NULL;
}
#endif
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShardlowKokkos<DeviceType>::setmask()
{
int mask = 0;
mask |= INITIAL_INTEGRATE | PRE_NEIGHBOR;
return mask;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::init()
{
FixShardlow::init();
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
// neighbor->requests[irequest]->pair = 0;
// neighbor->requests[irequest]->fix = 1;
// neighbor->requests[irequest]->ghost= 1;
// neighbor->requests[irequest]->ssa = 1;
int ntypes = atom->ntypes;
k_params = Kokkos::DualView<params_ssa**,Kokkos::LayoutRight,DeviceType>
("FixShardlowKokkos::params",ntypes+1,ntypes+1);
params = k_params.template view<DeviceType>();
k_pairDPDE->k_cutsq.template sync<DeviceType>();
d_cutsq = k_pairDPDE->k_cutsq.template view<DeviceType>();
const double boltz2 = 2.0*force->boltz;
for (int i = 1; i <= ntypes; i++) {
for (int j = i; j <= ntypes; j++) {
F_FLOAT cutone = k_pairDPDE->cut[i][j];
if (cutone > EPSILON) k_params.h_view(i,j).cutinv = 1.0/cutone;
else k_params.h_view(i,j).cutinv = FLT_MAX;
k_params.h_view(i,j).halfsigma = 0.5*k_pairDPDE->sigma[i][j];
k_params.h_view(i,j).kappa = k_pairDPDE->kappa[i][j];
k_params.h_view(i,j).alpha = sqrt(boltz2*k_pairDPDE->kappa[i][j]);
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = k_pairDPDE->k_cutsq.h_view(i,j);
}
}
}
k_params.template modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::init_list(int id, NeighList *ptr)
{
FixShardlow::init_list(id, ptr);
k_list = static_cast<NeighListKokkos<DeviceType>*>(ptr);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::pre_neighbor()
{
// NOTE: this logic is specific to orthogonal boxes, not triclinic
// Enforce the constraint that ghosts must be contained in the nearest sub-domains
double bbx = domain->subhi[0] - domain->sublo[0];
double bby = domain->subhi[1] - domain->sublo[1];
double bbz = domain->subhi[2] - domain->sublo[2];
double rcut = 2.0*neighbor->cutneighmax;
if (domain->triclinic)
error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");
if(rcut >= bbx || rcut >= bby || rcut>= bbz )
{
char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
char *msg = (char *) malloc(sizeof(fmt) + 4*15);
sprintf(msg, fmt, rcut, bbx, bby, bbz);
error->one(FLERR, msg);
}
nlocal = atomKK->nlocal;
nghost = atomKK->nghost;
// Allocate memory for h_v_t0 to hold the initial velocities for the ghosts
if (nghost > ghostmax) {
ghostmax = nghost;
k_v_t0 = DAT::tdual_v_array("FixShardlowKokkos:v_t0", ghostmax);
// d_v_t0 = k_v_t0.template view<DeviceType>();
h_v_t0 = k_v_t0.h_view;
}
// Setup views of relevant data
x = atomKK->k_x.template view<DeviceType>();
v = atomKK->k_v.template view<DeviceType>();
h_v = atomKK->k_v.h_view;
uCond = atomKK->k_uCond.template view<DeviceType>();
h_uCond = atomKK->k_uCond.h_view;
uMech = atomKK->k_uMech.template view<DeviceType>();
h_uMech = atomKK->k_uMech.h_view;
type = atomKK->k_type.view<DeviceType>();
if (atomKK->rmass) {
massPerI = true;
masses = atomKK->k_rmass.view<DeviceType>();
} else {
massPerI = false;
masses = atomKK->k_mass.view<DeviceType>();
}
// if(k_pairDPDE){
dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
//} else {
//}
}
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::setup_pre_neighbor()
{
pre_neighbor();
}
/* ---------------------------------------------------------------------- */
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
#error "FixShardlowKokkos::ssa_update_dpd() is not functional yet - TIM 20170830"
/* ----------------------------------------------------------------------
Perform the stochastic integration and Shardlow update for constant temperature
Allow for both per-type and per-atom mass
NOTE: only implemented for orthogonal boxes, not triclinic
------------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS>
void FixShardlowKokkos<DeviceType>::ssa_update_dpd(
int start_ii, int count, int id
)
{
#ifdef DPD_USE_RAN_MARS
class RanMars *pRNG = pp_random[id];
#else
rand_type rand_gen = rand_pool.get_state(id);
#endif
int ct = count;
int ii = start_ii;
while (ct-- > 0) {
const int i = d_ilist(ii);
const int jlen = d_numneigh(ii);
const double xtmp = x(i, 0);
const double ytmp = x(i, 1);
const double ztmp = x(i, 2);
// load velocity for i from memory
double vxi = v(i, 0);
double vyi = v(i, 1);
double vzi = v(i, 2);
const int itype = type(i);
const double mass_i = masses(massPerI ? i : itype);
const double massinv_i = 1.0 / mass_i;
// Loop over Directional Neighbors only
for (int jj = 0; jj < jlen; jj++) {
const int j = d_neighbors(ii,jj) & NEIGHMASK;
int jtype = type[j];
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
else Kokkos::atomic_increment(&(d_counters(0, 1)));
Kokkos::atomic_increment(&(d_counters(0, 2)));
int rsqi = rsq / 8;
if (rsqi < 0) rsqi = 0;
else if (rsqi > 31) rsqi = 31;
Kokkos::atomic_increment(&(d_hist(rsqi)));
#endif
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
&& (rsq >= EPSILON_SQUARED)) {
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
else Kokkos::atomic_increment(&(d_counters(1, 1)));
Kokkos::atomic_increment(&(d_counters(1, 2)));
#endif
double r = sqrt(rsq);
double rinv = 1.0/r;
double delx_rinv = delx*rinv;
double dely_rinv = dely*rinv;
double delz_rinv = delz*rinv;
double wr = 1.0 - r*(STACKPARAMS?m_params[itype][jtype].cutinv:params(itype,jtype).cutinv);
double wdt = wr*wr*dt;
double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma;
double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv;
double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v *
#ifdef DPD_USE_RAN_MARS
pRNG->gaussian();
#else
rand_gen.normal();
#endif
const double mass_j = masses(massPerI ? j : jtype);
double massinv_j = 1.0 / mass_j;
double gammaFactor = halfgamma_ij*wdt*ftm2v;
double inv_1p_mu_gammaFactor = 1.0/(1.0 + (massinv_i + massinv_j)*gammaFactor);
double vxj = v(j, 0);
double vyj = v(j, 1);
double vzj = v(j, 2);
// Compute the initial velocity difference between atom i and atom j
double delvx = vxi - vxj;
double delvy = vyi - vyj;
double delvz = vzi - vzj;
double dot_rinv = (delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz);
// Compute momentum change between t and t+dt
double factorA = sigmaRand - gammaFactor*dot_rinv;
// Update the velocity on i
vxi += delx_rinv*factorA*massinv_i;
vyi += dely_rinv*factorA*massinv_i;
vzi += delz_rinv*factorA*massinv_i;
// Update the velocity on j
vxj -= delx_rinv*factorA*massinv_j;
vyj -= dely_rinv*factorA*massinv_j;
vzj -= delz_rinv*factorA*massinv_j;
//ii. Compute the new velocity diff
delvx = vxi - vxj;
delvy = vyi - vyj;
delvz = vzi - vzj;
dot_rinv = delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz;
// Compute the new momentum change between t and t+dt
double factorB = (sigmaRand - gammaFactor*dot_rinv)*inv_1p_mu_gammaFactor;
// Update the velocity on i
vxi += delx_rinv*factorB*massinv_i;
vyi += dely_rinv*factorB*massinv_i;
vzi += delz_rinv*factorB*massinv_i;
// Update the velocity on j
vxj -= delx_rinv*factorB*massinv_j;
vyj -= dely_rinv*factorB*massinv_j;
vzj -= delz_rinv*factorB*massinv_j;
// Store updated velocity for j
v(j, 0) = vxj;
v(j, 1) = vyj;
v(j, 2) = vzj;
}
}
// store updated velocity for i
v(i, 0) = vxi;
v(i, 1) = vyi;
v(i, 2) = vzi;
}
#ifndef DPD_USE_RAN_MARS
rand_pool.free_state(rand_gen);
#endif
}
#endif
/* ----------------------------------------------------------------------
Perform the stochastic integration and Shardlow update for constant energy
Allow for both per-type and per-atom mass
NOTE: only implemented for orthogonal boxes, not triclinic
------------------------------------------------------------------------- */
template<class DeviceType>
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void FixShardlowKokkos<DeviceType>::ssa_update_dpde(
int start_ii, int count, int id
) const
{
#ifdef DPD_USE_RAN_MARS
class RanMars *pRNG = pp_random[id];
#else
rand_type rand_gen = rand_pool.get_state(id);
#endif
int ct = count;
int ii = start_ii;
while (ct-- > 0) {
const int i = d_ilist(ii);
const int jlen = d_numneigh(ii);
const double xtmp = x(i, 0);
const double ytmp = x(i, 1);
const double ztmp = x(i, 2);
// load velocity for i from memory
double vxi = v(i, 0);
double vyi = v(i, 1);
double vzi = v(i, 2);
double uMech_i = uMech(i);
double uCond_i = uCond(i);
const int itype = type(i);
const double theta_i_inv = 1.0/dpdTheta(i);
const double mass_i = masses(massPerI ? i : itype);
const double massinv_i = 1.0 / mass_i;
const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v;
// Loop over Directional Neighbors only
for (int jj = 0; jj < jlen; jj++) {
const int j = d_neighbors(ii,jj) & NEIGHMASK;
const int jtype = type(j);
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(0, 0)));
else Kokkos::atomic_increment(&(d_counters(0, 1)));
Kokkos::atomic_increment(&(d_counters(0, 2)));
int rsqi = rsq / 8;
if (rsqi < 0) rsqi = 0;
else if (rsqi > 31) rsqi = 31;
Kokkos::atomic_increment(&(d_hist(rsqi)));
#endif
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
if ((rsq < (STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype)))
&& (rsq >= EPSILON_SQUARED)) {
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) Kokkos::atomic_increment(&(d_counters(1, 0)));
else Kokkos::atomic_increment(&(d_counters(1, 1)));
Kokkos::atomic_increment(&(d_counters(1, 2)));
#endif
double r = sqrt(rsq);
double rinv = 1.0/r;
double delx_rinv = delx*rinv;
double dely_rinv = dely*rinv;
double delz_rinv = delz*rinv;
double wr = 1.0 - r*(STACKPARAMS?m_params[itype][jtype].cutinv:params(itype,jtype).cutinv);
double wdt = wr*wr*dt;
// Compute the current temperature
double theta_j_inv = 1.0/dpdTheta(j);
double theta_ij_inv = 0.5*(theta_i_inv + theta_j_inv);
double halfsigma_ij = STACKPARAMS?m_params[itype][jtype].halfsigma:params(itype,jtype).halfsigma;
double halfgamma_ij = halfsigma_ij*halfsigma_ij*boltz_inv*theta_ij_inv;
double sigmaRand = halfsigma_ij*wr*dtsqrt*ftm2v *
#ifdef DPD_USE_RAN_MARS
pRNG->gaussian();
#else
rand_gen.normal();
#endif
const double mass_j = masses(massPerI ? j : jtype);
double mass_ij_div_neg4_ftm2v = mass_j*mass_i_div_neg4_ftm2v;
double massinv_j = 1.0 / mass_j;
// Compute uCond
double kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa;
double alpha_ij = STACKPARAMS?m_params[itype][jtype].alpha:params(itype,jtype).alpha;
double del_uCond = alpha_ij*wr*dtsqrt *
#ifdef DPD_USE_RAN_MARS
pRNG->gaussian();
#else
rand_gen.normal();
#endif
del_uCond += kappa_ij*(theta_i_inv - theta_j_inv)*wdt;
uCond[j] -= del_uCond;
uCond_i += del_uCond;
double gammaFactor = halfgamma_ij*wdt*ftm2v;
double inv_1p_mu_gammaFactor = 1.0/(1.0 + (massinv_i + massinv_j)*gammaFactor);
double vxj = v(j, 0);
double vyj = v(j, 1);
double vzj = v(j, 2);
double dot4 = vxj*vxj + vyj*vyj + vzj*vzj;
double dot3 = vxi*vxi + vyi*vyi + vzi*vzi;
// Compute the initial velocity difference between atom i and atom j
double delvx = vxi - vxj;
double delvy = vyi - vyj;
double delvz = vzi - vzj;
double dot_rinv = (delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz);
// Compute momentum change between t and t+dt
double factorA = sigmaRand - gammaFactor*dot_rinv;
// Update the velocity on i
vxi += delx_rinv*factorA*massinv_i;
vyi += dely_rinv*factorA*massinv_i;
vzi += delz_rinv*factorA*massinv_i;
// Update the velocity on j
vxj -= delx_rinv*factorA*massinv_j;
vyj -= dely_rinv*factorA*massinv_j;
vzj -= delz_rinv*factorA*massinv_j;
//ii. Compute the new velocity diff
delvx = vxi - vxj;
delvy = vyi - vyj;
delvz = vzi - vzj;
dot_rinv = delx_rinv*delvx + dely_rinv*delvy + delz_rinv*delvz;
// Compute the new momentum change between t and t+dt
double factorB = (sigmaRand - gammaFactor*dot_rinv)*inv_1p_mu_gammaFactor;
// Update the velocity on i
vxi += delx_rinv*factorB*massinv_i;
vyi += dely_rinv*factorB*massinv_i;
vzi += delz_rinv*factorB*massinv_i;
double partial_uMech = (vxi*vxi + vyi*vyi + vzi*vzi - dot3)*massinv_j;
// Update the velocity on j
vxj -= delx_rinv*factorB*massinv_j;
vyj -= dely_rinv*factorB*massinv_j;
vzj -= delz_rinv*factorB*massinv_j;
partial_uMech += (vxj*vxj + vyj*vyj + vzj*vzj - dot4)*massinv_i;
// Store updated velocity for j
v(j, 0) = vxj;
v(j, 1) = vyj;
v(j, 2) = vzj;
// Compute uMech
double del_uMech = partial_uMech*mass_ij_div_neg4_ftm2v;
uMech_i += del_uMech;
uMech(j) += del_uMech;
}
}
// store updated velocity for i
v(i, 0) = vxi;
v(i, 1) = vyi;
v(i, 2) = vzi;
// store updated uMech and uCond for i
uMech(i) = uMech_i;
uCond(i) = uCond_i;
ii++;
}
#ifndef DPD_USE_RAN_MARS
rand_pool.free_state(rand_gen);
#endif
}
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::initial_integrate(int vflag)
{
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
copymode = 1;
dtsqrt = sqrt(update->dt);
NPairSSAKokkos<DeviceType> *np_ssa = dynamic_cast<NPairSSAKokkos<DeviceType>*>(list->np);
if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairSSAKokkos object");
ssa_phaseCt = np_ssa->ssa_phaseCt;
ssa_phaseLen = np_ssa->ssa_phaseLen;
ssa_itemLoc = np_ssa->ssa_itemLoc;
ssa_itemLen = np_ssa->ssa_itemLen;
ssa_gphaseCt = np_ssa->ssa_gphaseCt;
ssa_gphaseLen = np_ssa->ssa_gphaseLen;
ssa_gitemLoc = np_ssa->ssa_gitemLoc;
ssa_gitemLen = np_ssa->ssa_gitemLen;
np_ssa->k_ssa_itemLoc.template sync<DeviceType>();
np_ssa->k_ssa_itemLen.template sync<DeviceType>();
np_ssa->k_ssa_gitemLoc.template sync<DeviceType>();
np_ssa->k_ssa_gitemLen.template sync<DeviceType>();
np_ssa->k_ssa_phaseLen.template sync<LMPHostType>();
np_ssa->k_ssa_gphaseLen.template sync<LMPHostType>();
auto h_ssa_phaseLen = np_ssa->k_ssa_phaseLen.h_view;
auto h_ssa_gphaseLen = np_ssa->k_ssa_gphaseLen.h_view;
int maxWorkItemCt = (int) ssa_itemLoc.dimension_1();
if (maxWorkItemCt < (int) ssa_gitemLoc.dimension_1()) {
maxWorkItemCt = (int) ssa_gitemLoc.dimension_1();
}
if (maxWorkItemCt > maxRNG) {
#ifdef DPD_USE_RAN_MARS
if (pp_random) {
for (int i = 1; i < maxRNG; ++i) delete pp_random[i];
delete[] pp_random;
pp_random = NULL;
}
pp_random = new RanMars*[maxWorkItemCt];
for (int i = 1; i < maxWorkItemCt; ++i) {
pp_random[i] = new RanMars(lmp, k_pairDPDE->seed + comm->me + comm->nprocs*i);
}
pp_random[0] = k_pairDPDE->random;
#else
rand_pool.init(k_pairDPDE->seed + comm->me, maxWorkItemCt);
#endif
maxRNG = maxWorkItemCt;
}
#ifdef DEBUG_SSA_PAIR_CT
for (int i = 0; i < 2; ++i)
for (int j = 0; j < 3; ++j)
h_counters(i,j) = 0;
for (int i = 0; i < 32; ++i) h_hist[i] = 0;
deep_copy(d_counters, h_counters);
deep_copy(d_hist, h_hist);
#endif
//theta_ij_inv = 1.0/k_pairDPD->temperature; // independent of i,j
boltz_inv = 1.0/force->boltz;
ftm2v = force->ftm2v;
dt = update->dt;
k_params.template sync<DeviceType>();
// process neighbors in the local AIR
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int workItemCt = h_ssa_phaseLen[workPhase];
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
}
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
//Loop over all 13 outward directions (7 stages)
for (workPhase = 0; workPhase < ssa_gphaseCt; ++workPhase) {
// int airnum = workPhase + 1;
int workItemCt = h_ssa_gphaseLen[workPhase];
// Communicate the updated velocities to all nodes
atomKK->sync(Host,V_MASK);
comm->forward_comm_fix(this);
atomKK->modified(Host,V_MASK);
if(k_pairDPDE){
// Zero out the ghosts' uCond & uMech to be used as delta accumulators
// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
// must capture local variables, not class variables
atomKK->sync(execution_space,UCOND_MASK | UMECH_MASK);
auto l_uCond = uCond;
auto l_uMech = uMech;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nlocal+nghost), LAMMPS_LAMBDA (const int i) {
l_uCond(i) = 0.0;
l_uMech(i) = 0.0;
});
atomKK->modified(execution_space,UCOND_MASK | UMECH_MASK);
}
// process neighbors in this AIR
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
atomKK->modified(execution_space,V_MASK | UCOND_MASK | UMECH_MASK);
// Communicate the ghost deltas to the atom owners
atomKK->sync(Host,V_MASK | UCOND_MASK | UMECH_MASK);
comm->reverse_comm_fix(this);
atomKK->modified(Host,V_MASK | UCOND_MASK | UMECH_MASK);
} //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back
#ifdef DEBUG_SSA_PAIR_CT
deep_copy(h_counters, d_counters);
deep_copy(h_hist, d_hist);
for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", h_hist[i]);
fprintf(stdout, "\n%6d %6d,%6d %6d: "
,h_counters(0, 2)
,h_counters(1, 2)
,h_counters(0, 1)
,h_counters(1, 1)
);
#endif
copymode = 0;
}
template<class DeviceType>
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int &workItem) const {
const int ct = ssa_itemLen(workPhase, workItem);
const int ii = ssa_itemLoc(workPhase, workItem);
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
}
template<class DeviceType>
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void FixShardlowKokkos<DeviceType>::operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int &workItem) const {
const int ct = ssa_gitemLen(workPhase, workItem);
const int ii = ssa_gitemLoc(workPhase, workItem);
ssa_update_dpde<STACKPARAMS>(ii, ct, workItem);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShardlowKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
int ii,jj,m;
m = 0;
for (ii = 0; ii < n; ii++) {
jj = list[ii];
buf[m++] = h_v(jj, 0);
buf[m++] = h_v(jj, 1);
buf[m++] = h_v(jj, 2);
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
{
int ii,m,last;
m = 0;
last = first + n ;
for (ii = first; ii < last; ii++) {
h_v_t0(ii - nlocal, 0) = h_v(ii, 0) = buf[m++];
h_v_t0(ii - nlocal, 1) = h_v(ii, 1) = buf[m++];
h_v_t0(ii - nlocal, 2) = h_v(ii, 2) = buf[m++];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShardlowKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) {
buf[m++] = h_v(i, 0) - h_v_t0(i - nlocal, 0);
buf[m++] = h_v(i, 1) - h_v_t0(i - nlocal, 1);
buf[m++] = h_v(i, 2) - h_v_t0(i - nlocal, 2);
if(k_pairDPDE){
buf[m++] = h_uCond(i); // for ghosts, this is an accumulated delta
buf[m++] = h_uMech(i); // for ghosts, this is an accumulated delta
}
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShardlowKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
h_v(j, 0) += buf[m++];
h_v(j, 1) += buf[m++];
h_v(j, 2) += buf[m++];
if(k_pairDPDE){
h_uCond(j) += buf[m++]; // add in the accumulated delta
h_uMech(j) += buf[m++]; // add in the accumulated delta
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
double FixShardlowKokkos<DeviceType>::memory_usage()
{
double bytes = 0.0;
bytes += sizeof(double)*3*ghostmax; // v_t0[]
return bytes;
}
namespace LAMMPS_NS {
template class FixShardlowKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixShardlowKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,196 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(shardlow/kk,FixShardlowKokkos<LMPDeviceType>)
FixStyle(shardlow/kk/device,FixShardlowKokkos<LMPDeviceType>)
FixStyle(shardlow/kk/host,FixShardlowKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_SHARDLOW_KOKKOS_H
#define LMP_FIX_SHARDLOW_KOKKOS_H
#include "float.h"
#include "fix_shardlow.h"
#include "kokkos_type.h"
#include "neigh_list_kokkos.h"
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
#include "pair_dpd_fdt_kokkos.h"
#endif
#include "pair_dpd_fdt_energy_kokkos.h"
namespace LAMMPS_NS {
template<bool STACKPARAMS>
struct TagFixShardlowSSAUpdateDPDE{};
template<bool STACKPARAMS>
struct TagFixShardlowSSAUpdateDPDEGhost{};
template<class DeviceType>
class FixShardlowKokkos : public FixShardlow {
public:
typedef ArrayTypes<DeviceType> AT;
NeighListKokkos<DeviceType> *k_list; // The SSA specific neighbor list
FixShardlowKokkos(class LAMMPS *, int, char **);
~FixShardlowKokkos();
int setmask();
virtual void init();
virtual void init_list(int, class NeighList *);
virtual void initial_integrate(int);
void setup_pre_neighbor();
void pre_neighbor();
double memory_usage();
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
int pack_forward_comm(int , int *, double *, int, int *);
void unpack_forward_comm(int , int , double *);
struct params_ssa {
KOKKOS_INLINE_FUNCTION
params_ssa(){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
KOKKOS_INLINE_FUNCTION
params_ssa(int i){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
F_FLOAT cutinv,halfsigma,kappa,alpha;
};
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagFixShardlowSSAUpdateDPDE<STACKPARAMS>, const int&) const;
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagFixShardlowSSAUpdateDPDEGhost<STACKPARAMS>, const int&) const;
#ifdef DEBUG_SSA_PAIR_CT
typename AT::t_int_2d d_counters;
typename HAT::t_int_2d h_counters;
typename AT::t_int_1d d_hist;
typename HAT::t_int_1d h_hist;
#endif
protected:
int workPhase;
double theta_ij_inv,boltz_inv,ftm2v,dt;
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
// class PairDPDfdt *pairDPD; FIXME as per k_pairDPDE below
#endif
PairDPDfdtEnergyKokkos<DeviceType> *k_pairDPDE;
int maxRNG;
#ifdef DPD_USE_RAN_MARS
class RanMars **pp_random;
#elif defined(DPD_USE_Random_XorShift1024)
Kokkos::Random_XorShift1024_Pool<DeviceType> rand_pool;
typedef typename Kokkos::Random_XorShift1024_Pool<DeviceType>::generator_type rand_type;
#else
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
#endif
Kokkos::DualView<params_ssa**,Kokkos::LayoutRight,DeviceType> k_params;
typename Kokkos::DualView<params_ssa**,
Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
// hardwired to space for MAX_TYPES_STACKPARAMS (12) atom types
params_ssa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
typename DAT::tdual_v_array k_v_t0;
// typename AT::t_v_array d_v_t0; v_t0 only used in comm routines (on host)
typename HAT::t_v_array h_v_t0;
typename AT::t_x_array x;
typename AT::t_v_array v;
typename HAT::t_v_array h_v;
typename AT::t_efloat_1d uCond, uMech;
typename HAT::t_efloat_1d h_uCond, h_uMech;
typename AT::t_int_1d type;
bool massPerI;
typename AT::t_float_1d_randomread masses;
typename AT::t_efloat_1d dpdTheta;
double dtsqrt; // = sqrt(update->dt);
int ghostmax;
int nlocal, nghost;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist, d_numneigh;
int ssa_phaseCt;
typename AT::t_int_1d ssa_phaseLen;
typename AT::t_int_2d ssa_itemLoc, ssa_itemLen;
int ssa_gphaseCt;
typename AT::t_int_1d ssa_gphaseLen;
typename AT::t_int_2d ssa_gitemLoc, ssa_gitemLen;
#ifdef ENABLE_KOKKOS_DPD_CONSTANT_TEMPERATURE
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void ssa_update_dpd(int, int, int) const; // Constant Temperature
#endif
template<bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void ssa_update_dpde(int, int, int) const; // Constant Energy
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Must use dpd/fdt pair_style with fix shardlow
Self-explanatory.
E: Must use pair_style dpd/fdt or dpd/fdt/energy with fix shardlow
E: A deterministic integrator must be specified after fix shardlow in input
file (e.g. fix nve or fix nph).
Self-explanatory.
E: Cannot use constant temperature integration routines with DPD
Self-explanatory. Must use deterministic integrators such as nve or nph
E: Fix shardlow does not yet support triclinic geometries
Self-explanatory.
E: Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either
reduce the number of processors requested, or change the cutoff/skin
The Shardlow splitting algorithm requires the size of the sub-domain lengths
to be are larger than twice the cutoff+skin. Generally, the domain decomposition
is dependant on the number of processors requested.
*/

View File

@ -0,0 +1,103 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <math.h>
#include "fix_wall_lj93_kokkos.h"
#include "atom_kokkos.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
template <class DeviceType>
FixWallLJ93Kokkos<DeviceType>::FixWallLJ93Kokkos(LAMMPS *lmp, int narg, char **arg) :
FixWallLJ93(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
}
/* ----------------------------------------------------------------------
interaction of all particles in group with a wall
m = index of wall coeffs
which = xlo,xhi,ylo,yhi,zlo,zhi
error if any particle is on or behind wall
------------------------------------------------------------------------- */
template <class DeviceType>
void FixWallLJ93Kokkos<DeviceType>::wall_particle(int m_in, int which, double coord_in)
{
m = m_in;
coord = coord_in;
atomKK->sync(execution_space, X_MASK|F_MASK|MASK_MASK);
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
DAT::tdual_int_scalar k_oneflag = DAT::tdual_int_scalar("fix:oneflag");
d_oneflag = k_oneflag.view<DeviceType>();
int nlocal = atom->nlocal;
dim = which / 2;
side = which % 2;
if (side == 0) side = -1;
copymode = 1;
FixWallLJ93KokkosFunctor<DeviceType> wp_functor(this);
Kokkos::parallel_reduce(nlocal,wp_functor,ewall);
copymode = 0;
atomKK->modified(execution_space, F_MASK);
k_oneflag.template modify<DeviceType>();
k_oneflag.template sync<LMPHostType>();
if (k_oneflag.h_view()) error->one(FLERR,"Particle on or inside fix wall surface");
}
template <class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixWallLJ93Kokkos<DeviceType>::wall_particle_item(int i, value_type ewall) const {
if (mask(i) & groupbit) {
double delta;
if (side < 0) delta = x(i,dim) - coord;
else delta = coord - x(i,dim);
if (delta >= cutoff[m]) return;
if (delta <= 0.0) {
d_oneflag() = 1;
return;
}
double rinv = 1.0/delta;
double r2inv = rinv*rinv;
double r4inv = r2inv*r2inv;
double r10inv = r4inv*r4inv*r2inv;
double fwall = side * (coeff1[m]*r10inv - coeff2[m]*r4inv);
f(i,dim) -= fwall;
ewall[0] += coeff3[m]*r4inv*r4inv*rinv -
coeff4[m]*r2inv*rinv - offset[m];
ewall[m+1] += fwall;
}
}
namespace LAMMPS_NS {
template class FixWallLJ93Kokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixWallLJ93Kokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,83 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(wall/lj93/kk,FixWallLJ93Kokkos<LMPDeviceType>)
FixStyle(wall/lj93/kk/device,FixWallLJ93Kokkos<LMPDeviceType>)
FixStyle(wall/lj93/kk/host,FixWallLJ93Kokkos<LMPHostType>)
#else
#ifndef LMP_FIX_WALL_LJ93_KOKKOS_H
#define LMP_FIX_WALL_LJ93_KOKKOS_H
#include "fix_wall_lj93.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template <class DeviceType>
class FixWallLJ93Kokkos : public FixWallLJ93 {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef double value_type[];
FixWallLJ93Kokkos(class LAMMPS *, int, char **);
void wall_particle(int, int, double);
int m;
KOKKOS_INLINE_FUNCTION
void wall_particle_item(int, value_type) const;
private:
int dim,side;
double coord;
typename AT::t_x_array x;
typename AT::t_f_array f;
typename AT::t_int_1d mask;
typename AT::t_int_scalar d_oneflag;
};
template <class DeviceType>
struct FixWallLJ93KokkosFunctor {
typedef DeviceType device_type ;
typedef double value_type[];
const int value_count;
FixWallLJ93Kokkos<DeviceType> c;
FixWallLJ93KokkosFunctor(FixWallLJ93Kokkos<DeviceType>* c_ptr):
c(*c_ptr),
value_count(c_ptr->m+1) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, value_type ewall) const {
c.wall_particle_item(i,ewall);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Particle on or inside fix wall surface
Particles must be "exterior" to the wall in order for energy/force to
be calculated.
*/

View File

@ -95,7 +95,6 @@ void NBinKokkos<DeviceType>::bin_atoms()
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x.view<DeviceType>();
@ -106,7 +105,6 @@ void NBinKokkos<DeviceType>::bin_atoms()
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(h_resize, d_resize);
if(h_resize()) {

View File

@ -0,0 +1,307 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos (ARL) and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nbin_ssa_kokkos.h"
#include "neighbor.h"
#include "atom_kokkos.h"
#include "group.h"
#include "domain.h"
#include "comm.h"
#include "update.h"
#include "error.h"
#include "atom_masks.h"
// #include "memory.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
{
atoms_per_bin = ghosts_per_gbin = 16;
d_resize = typename AT::t_int_scalar("NBinSSAKokkos::d_resize");
d_lbinxlo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxlo");
d_lbinylo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinylo");
d_lbinzlo = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzlo");
d_lbinxhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxhi");
d_lbinyhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinyhi");
d_lbinzhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzhi");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
h_lbinxlo = Kokkos::create_mirror_view(d_lbinxlo);
h_lbinylo = Kokkos::create_mirror_view(d_lbinylo);
h_lbinzlo = Kokkos::create_mirror_view(d_lbinzlo);
h_lbinxhi = Kokkos::create_mirror_view(d_lbinxhi);
h_lbinyhi = Kokkos::create_mirror_view(d_lbinyhi);
h_lbinzhi = Kokkos::create_mirror_view(d_lbinzhi);
#else
h_resize = d_resize;
h_lbinxlo = d_lbinxlo;
h_lbinylo = d_lbinylo;
h_lbinzlo = d_lbinzlo;
h_lbinxhi = d_lbinxhi;
h_lbinyhi = d_lbinyhi;
h_lbinzhi = d_lbinzhi;
#endif
h_resize() = 1;
k_gbincount = DAT::tdual_int_1d("NBinSSAKokkos::gbincount",8);
gbincount = k_gbincount.view<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void NBinSSAKokkos<DeviceType>::bin_atoms_setup(int nall)
{
if (mbins > (int) k_bins.h_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("NBinSSAKokkos::bins",mbins,atoms_per_bin);
bins = k_bins.view<DeviceType>();
k_bincount = DAT::tdual_int_1d("NBinSSAKokkos::bincount",mbins);
bincount = k_bincount.view<DeviceType>();
}
ghosts_per_gbin = atom->nghost / 7; // estimate needed size
if (ghosts_per_gbin > (int) k_gbins.h_view.dimension_1()) {
k_gbins = DAT::tdual_int_2d("NBinSSAKokkos::gbins",8,ghosts_per_gbin);
gbins = k_gbins.view<DeviceType>();
}
// Clear the local bin extent bounding box.
h_lbinxlo() = mbinx - 1; // Safe to = stencil->sx + 1
h_lbinylo() = mbiny - 1; // Safe to = stencil->sy + 1
h_lbinzlo() = mbinz - 1; // Safe to = stencil->sz + 1
h_lbinxhi() = 0; // Safe to = mbinx - stencil->sx - 1
h_lbinyhi() = 0; // Safe to = mbiny - stencil->sy - 1
h_lbinzhi() = 0; // Safe to = mbinz - stencil->sz - 1
deep_copy(d_lbinxlo, h_lbinxlo);
deep_copy(d_lbinylo, h_lbinylo);
deep_copy(d_lbinzlo, h_lbinzlo);
deep_copy(d_lbinxhi, h_lbinxhi);
deep_copy(d_lbinyhi, h_lbinyhi);
deep_copy(d_lbinzhi, h_lbinzhi);
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
local atoms are in distinct bins (binhead[]) from the ghosts
ghost atoms are "binned" in gairhead_ssa[] instead
ghosts which are not in an Active Interaction Region (AIR) are skipped
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinSSAKokkos<DeviceType>::bin_atoms()
{
last_bin = update->ntimestep;
int nlocal = atom->nlocal;
int nghost = atom->nghost;
int nall = nlocal + nghost;
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x.view<DeviceType>();
sublo_[0] = domain->sublo[0];
sublo_[1] = domain->sublo[1];
sublo_[2] = domain->sublo[2];
subhi_[0] = domain->subhi[0];
subhi_[1] = domain->subhi[1];
subhi_[2] = domain->subhi[2];
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",nall);
binID = k_binID.view<DeviceType>();
// find each local atom's binID
{
atoms_per_bin = 0;
NPairSSAKokkosBinIDAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_reduce(nlocal, f, atoms_per_bin);
}
deep_copy(h_lbinxlo, d_lbinxlo);
deep_copy(h_lbinylo, d_lbinylo);
deep_copy(h_lbinzlo, d_lbinzlo);
deep_copy(h_lbinxhi, d_lbinxhi);
deep_copy(h_lbinyhi, d_lbinyhi);
deep_copy(h_lbinzhi, d_lbinzhi);
// find each ghost's binID (AIR number)
{
for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0;
k_gbincount.modify<LMPHostType>();
k_gbincount.sync<DeviceType>();
ghosts_per_gbin = 0;
NPairSSAKokkosBinIDGhostsFunctor<DeviceType> f(*this);
Kokkos::parallel_reduce(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nall), f, ghosts_per_gbin);
}
// actually bin the ghost atoms
{
if(ghosts_per_gbin > (int) gbins.dimension_1()) {
k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin);
gbins = k_gbins.view<DeviceType>();
}
for (int i = 0; i < 8; i++) k_gbincount.h_view(i) = 0;
k_gbincount.modify<LMPHostType>();
k_gbincount.sync<DeviceType>();
auto binID_ = binID;
auto gbincount_ = gbincount;
auto gbins_ = gbins;
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(nlocal,nall),
LAMMPS_LAMBDA (const int i) {
const int iAIR = binID_(i);
if (iAIR > 0) { // include only ghost atoms in an AIR
const int ac = Kokkos::atomic_fetch_add(&gbincount_[iAIR], (int)1);
gbins_(iAIR, ac) = i;
}
});
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType>(1,8),
LAMMPS_LAMBDA (const int i) {
sortBin(gbincount_, gbins_, i);
});
}
c_gbins = gbins; // gbins won't change until the next bin_atoms
// actually bin the local atoms
{
if ((mbins > (int) bins.dimension_0()) ||
(atoms_per_bin > (int) bins.dimension_1())) {
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
bins = k_bins.view<DeviceType>();
}
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
auto bincount_ = bincount;
auto bins_ = bins;
NPairSSAKokkosBinAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_for(nlocal, f);
Kokkos::parallel_for(mbins,
LAMMPS_LAMBDA (const int i) {
sortBin(bincount_, bins_, i);
});
}
k_bins.modify<DeviceType>();
k_bincount.modify<DeviceType>();
c_bins = bins; // bins won't change until the next bin_atoms
k_gbins.modify<DeviceType>();
k_gbincount.modify<DeviceType>();
//now dispose of the k_binID array
k_binID = DAT::tdual_int_1d("NBinSSAKokkos::binID",0);
binID = k_binID.view<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinSSAKokkos<DeviceType>::binAtomsItem(const int &i) const
{
const int ibin = binID(i);
const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1);
bins(ibin, ac) = i;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinSSAKokkos<DeviceType>::binIDAtomsItem(const int &i, int &update) const
{
int loc[3];
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0]));
binID(i) = ibin;
// Find the bounding box of the local atoms in the bins
if (loc[0] < d_lbinxlo()) Kokkos::atomic_fetch_min(&d_lbinxlo(),loc[0]);
if (loc[0] >= d_lbinxhi()) Kokkos::atomic_fetch_max(&d_lbinxhi(),loc[0] + 1);
if (loc[1] < d_lbinylo()) Kokkos::atomic_fetch_min(&d_lbinylo(),loc[1]);
if (loc[1] >= d_lbinyhi()) Kokkos::atomic_fetch_max(&d_lbinyhi(),loc[1] + 1);
if (loc[2] < d_lbinzlo()) Kokkos::atomic_fetch_min(&d_lbinzlo(),loc[2]);
if (loc[2] >= d_lbinzhi()) Kokkos::atomic_fetch_max(&d_lbinzhi(),loc[2] + 1);
const int ac = Kokkos::atomic_fetch_add(&(bincount[ibin]), (int)1);
if (update <= ac) update = ac + 1;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinSSAKokkos<DeviceType>::binIDGhostsItem(const int &i, int &update) const
{
const int iAIR = coord2ssaAIR(x(i, 0), x(i, 1), x(i, 2));
binID(i) = iAIR;
if (iAIR > 0) { // include only ghost atoms in an AIR
const int ac = Kokkos::atomic_fetch_add(&gbincount[iAIR], (int)1);
if (update <= ac) update = ac + 1;
}
}
// An implementation of heapsort without recursion
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinSSAKokkos<DeviceType>::sortBin(
typename AT::t_int_1d gbincount,
typename AT::t_int_2d gbins,
const int &ibin)
{
int n = gbincount(ibin);
int i = n/2;
int t;
do { /* Loops until bin is sorted */
if (i > 0) { /* First stage - Sorting the heap */
i--; /* Save its index to i */
t = gbins(ibin, i); /* Save parent value to t */
} else { /* Second stage - Extracting elements in-place */
if ((--n) <= 0) return; /* When the heap is empty, we are done */
t = gbins(ibin, n); /* Save last value (it will be overwritten) */
gbins(ibin, n) = gbins(ibin, 0); /* Save largest value at the end of the bin */
}
int parent = i; /* We will start pushing down t from parent */
int child = i*2 + 1; /* parent's left child */
/* Sift operation - pushing the value of t down the heap */
while (child < n) {
/* Choose the largest child */
if ((child + 1 < n) && (gbins(ibin, child + 1) > gbins(ibin, child))) ++child;
if (gbins(ibin, child) <= t) break; /* t's place is found */
gbins(ibin, parent) = gbins(ibin, child); /* Move the largest child up */
parent = child; /* Move parent pointer to this child */
child = parent*2+1; /* Find the next child */
}
gbins(ibin, parent) = t; /* We save t in the heap */
} while(1);
}
namespace LAMMPS_NS {
template class NBinSSAKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NBinSSAKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,246 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(ssa/kk/host,
NBinSSAKokkos<LMPHostType>,
NB_SSA | NB_KOKKOS_HOST)
NBinStyle(ssa/kk/device,
NBinSSAKokkos<LMPDeviceType>,
NB_SSA | NB_KOKKOS_DEVICE)
#else
#ifndef LMP_NBIN_SSA_KOKKOS_H
#define LMP_NBIN_SSA_KOKKOS_H
#include "nbin_standard.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NBinSSAKokkos : public NBinStandard {
public:
typedef ArrayTypes<DeviceType> AT;
NBinSSAKokkos(class LAMMPS *);
~NBinSSAKokkos() {}
void bin_atoms_setup(int);
void bin_atoms();
// temporary array to hold the binID for each atom
DAT::tdual_int_1d k_binID;
typename AT::t_int_1d binID;
typename AT::t_int_1d_const c_binID;
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
typename AT::t_int_1d bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
int ghosts_per_gbin;
DAT::tdual_int_1d k_gbincount;
DAT::tdual_int_2d k_gbins;
typename AT::t_int_1d gbincount;
typename AT::t_int_2d gbins;
typename AT::t_int_2d_const c_gbins;
typename AT::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename AT::t_x_array_randomread x;
// Bounds of the local atoms in the bins array
typename AT::t_int_scalar d_lbinxlo; // lowest local bin x-dim coordinate
typename AT::t_int_scalar d_lbinylo; // lowest local bin y-dim coordinate
typename AT::t_int_scalar d_lbinzlo; // lowest local bin z-dim coordinate
typename AT::t_int_scalar d_lbinxhi; // highest local bin x-dim coordinate
typename AT::t_int_scalar d_lbinyhi; // highest local bin y-dim coordinate
typename AT::t_int_scalar d_lbinzhi; // highest local bin z-dim coordinate
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinxlo;
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinylo;
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinzlo;
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinxhi;
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinyhi;
typename ArrayTypes<LMPHostType>::t_int_scalar h_lbinzhi;
KOKKOS_INLINE_FUNCTION
void binAtomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
void binIDAtomsItem(const int &i, int &update) const;
KOKKOS_INLINE_FUNCTION
void binIDGhostsItem(const int &i, int &update) const;
static KOKKOS_INLINE_FUNCTION
void sortBin(
typename AT::t_int_1d gbincount,
typename AT::t_int_2d gbins,
const int &ibin);
/* ----------------------------------------------------------------------
convert atom coords into the ssa active interaction region number
------------------------------------------------------------------------- */
KOKKOS_INLINE_FUNCTION
int coord2ssaAIR(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix, iy, iz;
ix = iy = iz = 0;
if (z < sublo_[2]) iz = -1;
if (z >= subhi_[2]) iz = 1;
if (y < sublo_[1]) iy = -1;
if (y >= subhi_[1]) iy = 1;
if (x < sublo_[0]) ix = -1;
if (x >= subhi_[0]) ix = 1;
if(iz < 0){
return -1;
} else if(iz == 0){
if( iy<0 ) return -1; // bottom left/middle/right
if( (iy==0) && (ix<0) ) return -1; // left atoms
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
if( (iy==0) && (ix>0) ) return 2; // Right atoms
if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms
if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms
} else { // iz > 0
if((ix==0) && (iy==0)) return 4; // Back atoms
if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
}
return -2;
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
private:
double bboxlo_[3],bboxhi_[3];
double sublo_[3], subhi_[3];
};
template<class DeviceType>
struct NPairSSAKokkosBinAtomsFunctor {
typedef DeviceType device_type;
const NBinSSAKokkos<DeviceType> c;
NPairSSAKokkosBinAtomsFunctor(const NBinSSAKokkos<DeviceType> &_c):
c(_c) {};
~NPairSSAKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binAtomsItem(i);
}
};
template<class DeviceType>
struct NPairSSAKokkosBinIDAtomsFunctor {
typedef DeviceType device_type;
typedef int value_type;
const NBinSSAKokkos<DeviceType> c;
NPairSSAKokkosBinIDAtomsFunctor(const NBinSSAKokkos<DeviceType> &_c):
c(_c) {};
~NPairSSAKokkosBinIDAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i, value_type& update) const {
c.binIDAtomsItem(i, update);
}
KOKKOS_INLINE_FUNCTION
void join (volatile value_type& dst,
const volatile value_type& src) const {
if (dst < src) dst = src;
}
KOKKOS_INLINE_FUNCTION
void init (value_type& dst) const {
dst = INT_MIN;
}
};
template<class DeviceType>
struct NPairSSAKokkosBinIDGhostsFunctor {
typedef DeviceType device_type;
typedef int value_type;
const NBinSSAKokkos<DeviceType> c;
NPairSSAKokkosBinIDGhostsFunctor(const NBinSSAKokkos<DeviceType> &_c):
c(_c) {};
~NPairSSAKokkosBinIDGhostsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i, value_type& update) const {
c.binIDGhostsItem(i, update);
}
KOKKOS_INLINE_FUNCTION
void join (volatile value_type& dst,
const volatile value_type& src) const {
if (dst < src) dst = src;
}
KOKKOS_INLINE_FUNCTION
void init (value_type& dst) const {
dst = INT_MIN;
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -274,7 +274,6 @@ void NeighBondKokkos<DeviceType>::bond_all()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondAll>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -370,7 +369,6 @@ void NeighBondKokkos<DeviceType>::bond_partial()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondPartial>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -443,7 +441,6 @@ void NeighBondKokkos<DeviceType>::bond_check()
k_bondlist.sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondCheck>(0,neighbor->nbondlist),*this,flag);
DeviceType::fence();
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
@ -494,7 +491,6 @@ void NeighBondKokkos<DeviceType>::angle_all()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAngleAll>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -597,7 +593,6 @@ void NeighBondKokkos<DeviceType>::angle_partial()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAnglePartial>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -678,7 +673,6 @@ void NeighBondKokkos<DeviceType>::angle_check()
k_anglelist.sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAngleCheck>(0,neighbor->nanglelist),*this,flag);
DeviceType::fence();
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
@ -741,7 +735,6 @@ void NeighBondKokkos<DeviceType>::dihedral_all()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralAll>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -849,7 +842,6 @@ void NeighBondKokkos<DeviceType>::dihedral_partial()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralPartial>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -935,7 +927,6 @@ void NeighBondKokkos<DeviceType>::dihedral_check(int nlist, typename AT::t_int_2
k_dihedrallist.sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralCheck>(0,nlist),*this,flag);
DeviceType::fence();
int flag_all;
MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
@ -1015,7 +1006,6 @@ void NeighBondKokkos<DeviceType>::improper_all()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperAll>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
@ -1123,7 +1113,6 @@ void NeighBondKokkos<DeviceType>::improper_partial()
k_fail_flag.template sync<DeviceType>();
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperPartial>(0,nlocal),*this,nmissing);
DeviceType::fence();
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();

View File

@ -48,7 +48,7 @@ class AtomNeighborsConst
const int num_neighs;
KOKKOS_INLINE_FUNCTION
AtomNeighborsConst(int* const & firstneigh, const int & _num_neighs,
AtomNeighborsConst(const int* const & firstneigh, const int & _num_neighs,
const int & stride):
_firstneigh(firstneigh), num_neighs(_num_neighs), _stride(stride) {};
KOKKOS_INLINE_FUNCTION
@ -82,6 +82,14 @@ public:
&d_neighbors(i,1)-&d_neighbors(i,0));
}
KOKKOS_INLINE_FUNCTION
static AtomNeighborsConst static_neighbors_const(int i,
typename ArrayTypes<Device>::t_neighbors_2d_const const& d_neighbors,
typename ArrayTypes<Device>::t_int_1d_const const& d_numneigh) {
return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),
&d_neighbors(i,1)-&d_neighbors(i,0));
}
KOKKOS_INLINE_FUNCTION
AtomNeighborsConst get_neighbors_const(const int &i) const {
return AtomNeighborsConst(&d_neighbors(i,0),d_numneigh(i),

View File

@ -206,7 +206,6 @@ int NeighborKokkos::check_distance_kokkos()
int flag = 0;
copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighborCheckDistance<DeviceType> >(0,nlocal),*this,flag);
DeviceType::fence();
copymode = 0;
int flagall;
@ -273,7 +272,6 @@ void NeighborKokkos::build_kokkos(int topoflag)
}
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNeighborXhold<DeviceType> >(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
xhold.modify<DeviceType>();
if (boxcheck) {

View File

@ -173,12 +173,6 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
@ -220,7 +214,6 @@ if (GHOST) {
#endif
}
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
@ -435,10 +428,10 @@ void NeighborKokkosExecute<DeviceType>::
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
neigh_list.d_ilist(i) = i;
@ -645,10 +638,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
neigh_list.d_ilist(i) = i;
}
if(n >= neigh_list.maxneighs) {
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
}
}
@ -737,9 +730,9 @@ void NeighborKokkosExecute<DeviceType>::
const int ybin = binxyz[1];
const int zbin = binxyz[2];
for (int k = 0; k < nstencil; k++) {
const X_FLOAT xbin2 = xbin + stencilxyz(k,0);
const X_FLOAT ybin2 = ybin + stencilxyz(k,1);
const X_FLOAT zbin2 = zbin + stencilxyz(k,2);
const int xbin2 = xbin + stencilxyz(k,0);
const int ybin2 = ybin + stencilxyz(k,1);
const int zbin2 = zbin + stencilxyz(k,2);
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
@ -768,10 +761,10 @@ void NeighborKokkosExecute<DeviceType>::
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
neigh_list.d_ilist(i) = i;
}

View File

@ -281,9 +281,6 @@ class NeighborKokkosExecute
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{

View File

@ -0,0 +1,750 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "npair_ssa_kokkos.h"
#include "neigh_list.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "neighbor_kokkos.h"
#include "nbin_ssa_kokkos.h"
#include "nstencil_ssa.h"
#include "error.h"
#include "comm.h"
namespace LAMMPS_NS {
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NPairSSAKokkos<DeviceType>::NPairSSAKokkos(LAMMPS *lmp) : NPair(lmp), ssa_phaseCt(27), ssa_gphaseCt(7)
{
const int gphaseLenEstimate = 1; //FIXME make this 4 eventually
k_ssa_gphaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_gphaseLen",ssa_gphaseCt);
ssa_gphaseLen = k_ssa_gphaseLen.view<DeviceType>();
k_ssa_gitemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLoc",ssa_gphaseCt,gphaseLenEstimate);
ssa_gitemLoc = k_ssa_gitemLoc.view<DeviceType>();
k_ssa_gitemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_gitemLen",ssa_gphaseCt,gphaseLenEstimate);
ssa_gitemLen = k_ssa_gitemLen.view<DeviceType>();
}
/* ----------------------------------------------------------------------
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairSSAKokkos<DeviceType>::copy_neighbor_info()
{
NPair::copy_neighbor_info();
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
// general params
k_cutneighsq = neighborKK->k_cutneighsq;
// exclusion info
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex2_group;
k_ex1_bit = neighborKK->k_ex1_bit;
k_ex2_bit = neighborKK->k_ex2_bit;
k_ex_mol_group = neighborKK->k_ex_mol_group;
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
}
/* ----------------------------------------------------------------------
copy per-atom and per-bin vectors from NBinSSAKokkos class to this build class
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairSSAKokkos<DeviceType>::copy_bin_info()
{
NPair::copy_bin_info();
NBinSSAKokkos<DeviceType>* nbKK = dynamic_cast<NBinSSAKokkos<DeviceType>*>(nb);
if (!nbKK) error->one(FLERR, "NBin wasn't a NBinSSAKokkos object");
atoms_per_bin = nbKK->atoms_per_bin;
k_bincount = nbKK->k_bincount;
k_bins = nbKK->k_bins;
ghosts_per_gbin = nbKK->ghosts_per_gbin;
k_gbincount = nbKK->k_gbincount;
k_gbins = nbKK->k_gbins;
lbinxlo = nbKK->h_lbinxlo();
lbinxhi = nbKK->h_lbinxhi();
lbinylo = nbKK->h_lbinylo();
lbinyhi = nbKK->h_lbinyhi();
lbinzlo = nbKK->h_lbinzlo();
lbinzhi = nbKK->h_lbinzhi();
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairSSAKokkos<DeviceType>::copy_stencil_info()
{
NPair::copy_stencil_info();
nstencil = ns->nstencil;
int maxstencil = ns->get_maxstencil();
k_stencil = DAT::tdual_int_1d("NPairSSAKokkos:stencil",maxstencil);
for (int k = 0; k < maxstencil; k++) {
k_stencil.h_view(k) = ns->stencil[k];
}
k_stencil.modify<LMPHostType>();
k_stencil.sync<DeviceType>();
k_stencilxyz = DAT::tdual_int_1d_3("NPairSSAKokkos:stencilxyz",maxstencil);
for (int k = 0; k < maxstencil; k++) {
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
}
k_stencilxyz.modify<LMPHostType>();
k_stencilxyz.sync<DeviceType>();
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
k_nstencil_ssa = DAT::tdual_int_1d("NPairSSAKokkos:nstencil_ssa",5);
for (int k = 0; k < 5; ++k) {
k_nstencil_ssa.h_view(k) = ns_ssa->nstencil_ssa[k];
}
k_nstencil_ssa.modify<LMPHostType>();
k_nstencil_ssa.sync<DeviceType>();
sx1 = ns_ssa->sx + 1;
sy1 = ns_ssa->sy + 1;
sz1 = ns_ssa->sz + 1;
// Setup the phases of the workplan for locals
ssa_phaseCt = sz1*sy1*sx1;
if (ssa_phaseCt > (int) k_ssa_phaseLen.dimension_0()) {
k_ssa_phaseLen = DAT::tdual_int_1d("NPairSSAKokkos:ssa_phaseLen",ssa_phaseCt);
ssa_phaseLen = k_ssa_phaseLen.view<DeviceType>();
k_ssa_phaseOff = DAT::tdual_int_1d_3("NPairSSAKokkos:ssa_phaseOff",ssa_phaseCt);
ssa_phaseOff = k_ssa_phaseOff.view<DeviceType>();
}
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
k_ssa_phaseOff.sync<LMPHostType>();
int workPhase = 0;
for (int zoff = sz1 - 1; zoff >= 0; --zoff) {
for (int yoff = sy1 - 1; yoff >= 0; --yoff) {
for (int xoff = sx1 - 1; xoff >= 0; --xoff) {
h_ssa_phaseOff(workPhase, 0) = xoff;
h_ssa_phaseOff(workPhase, 1) = yoff;
h_ssa_phaseOff(workPhase, 2) = zoff;
workPhase++;
}
}
}
k_ssa_phaseOff.modify<LMPHostType>();
k_ssa_phaseOff.sync<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NPairSSAKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
{
const int n1 = nspecial(i,0);
const int n2 = nspecial(i,1);
const int n3 = nspecial(i,2);
for (int k = 0; k < n3; k++) {
if (special(i,k) == tag(j)) {
if (k < n1) {
if (special_flag[1] == 0) return -1;
else if (special_flag[1] == 1) return 0;
else return 1;
} else if (k < n2) {
if (special_flag[2] == 0) return -1;
else if (special_flag[2] == 1) return 0;
else return 2;
} else {
if (special_flag[3] == 0) return -1;
else if (special_flag[3] == 1) return 0;
else return 3;
}
}
}
return 0;
};
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NPairSSAKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
const int &itype,const int &jtype) const
{
int m;
if (nex_type && ex_type(itype,jtype)) return 1;
if (nex_group) {
for (m = 0; m < nex_group; m++) {
if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1;
if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1;
}
}
if (nex_mol) {
for (m = 0; m < nex_mol; m++)
if (mask(i) & ex_mol_bit(m) && mask(j) & ex_mol_bit(m) &&
molecule(i) == molecule(j)) return 1;
}
return 0;
}
/* ---------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
for use by Shardlow Spliting Algorithm
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairSSAKokkos<DeviceType>::build(NeighList *list_)
{
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
int nl_size;
int xbinCt = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1;
int ybinCt = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1;
int zbinCt = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1;
int phaseLenEstimate = xbinCt*ybinCt*zbinCt;
if ((ssa_phaseCt > (int) k_ssa_itemLoc.dimension_0()) ||
(phaseLenEstimate > (int) k_ssa_itemLoc.dimension_1())) {
k_ssa_itemLoc = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLoc",ssa_phaseCt,phaseLenEstimate);
ssa_itemLoc = k_ssa_itemLoc.view<DeviceType>();
k_ssa_itemLen = DAT::tdual_int_2d("NPairSSAKokkos::ssa_itemLen",ssa_phaseCt,phaseLenEstimate);
ssa_itemLen = k_ssa_itemLen.view<DeviceType>();
}
k_ssa_itemLoc.sync<LMPHostType>();
k_ssa_itemLen.sync<LMPHostType>();
k_ssa_gitemLoc.sync<LMPHostType>();
k_ssa_gitemLen.sync<LMPHostType>();
k_ssa_phaseOff.sync<LMPHostType>();
k_ssa_phaseLen.sync<LMPHostType>();
auto h_ssa_itemLoc = k_ssa_itemLoc.h_view;
auto h_ssa_itemLen = k_ssa_itemLen.h_view;
auto h_ssa_gitemLoc = k_ssa_gitemLoc.h_view;
auto h_ssa_gitemLen = k_ssa_gitemLen.h_view;
auto h_ssa_phaseOff = k_ssa_phaseOff.h_view;
auto h_ssa_phaseLen = k_ssa_phaseLen.h_view;
{ // Preflight the neighbor list workplan
k_bincount.sync<LMPHostType>();
auto h_bincount = k_bincount.h_view;
k_stencil.sync<LMPHostType>();
auto h_stencil = k_stencil.h_view;
k_nstencil_ssa.sync<LMPHostType>();
auto h_nstencil_ssa = k_nstencil_ssa.h_view;
int inum = 0;
// loop over bins with local atoms, counting half of the neighbors
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int zoff = h_ssa_phaseOff(workPhase, 2);
int yoff = h_ssa_phaseOff(workPhase, 1);
int xoff = h_ssa_phaseOff(workPhase, 0);
int workItem = 0;
for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) {
for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) {
int inum_start = inum;
// if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small");
for (int subphase = 0; subphase < 4; subphase++) {
int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0);
int s_xbin = xbin + ((subphase & 0x1) ? sx1 - 1 : 0);
if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue;
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
const int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin;
const int ibinCt = h_bincount(ibin);
if (ibinCt > 0) {
int base_n = 0;
bool include_same = false;
// count all local atoms in the current stencil "subphase" as potential neighbors
for (int k = h_nstencil_ssa(subphase); k < h_nstencil_ssa(subphase+1); k++) {
const int jbin = ibin+h_stencil(k);
if (jbin != ibin) base_n += h_bincount(jbin);
else include_same = true;
}
// Calculate how many ibin particles would have had some neighbors
if (base_n > 0) inum += ibinCt;
else if (include_same) inum += ibinCt - 1;
}
}
h_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
h_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record workItem length
#ifdef DEBUG_SSA_BUILD_LOCALS
if (h_ssa_itemLen(workPhase,workItem) < 0) fprintf(stdout, "undr%03d phase (%3d,%3d) inum %d - inum_start %d UNDERFLOW\n"
,comm->me
,workPhase
,workItem
,inum
,inum_start
);
#endif
workItem++;
}
}
}
#ifdef DEBUG_SSA_BUILD_LOCALS
fprintf(stdout, "phas%03d phase %3d could use %6d inums, expected %6d inums. maxworkItems = %3d, inums/workItems = %g\n"
,comm->me
,workPhase
,inum - h_ssa_itemLoc(workPhase, 0)
,(nlocal*4 + ssa_phaseCt - 1) / ssa_phaseCt
,workItem
,(inum - h_ssa_itemLoc(workPhase, 0)) / (double) workItem
);
#endif
// record where workPhase ends
h_ssa_phaseLen(workPhase) = workItem;
}
#ifdef DEBUG_SSA_BUILD_LOCALS
fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inums/phase = %g\n"
,comm->me
,workPhase
,inum
,nlocal*4
,inum / (double) workPhase
);
#endif
nl_size = inum; // record how much space is needed for the local work plan
}
// count how many ghosts might have neighbors, and increase the work plan storage
k_gbincount.sync<LMPHostType>();
for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) {
int len = k_gbincount.h_view(workPhase + 1);
h_ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
h_ssa_gitemLen(workPhase,0) = len;
nl_size += len;
}
list->grow(nl_size); // Make special larger SSA neighbor list
k_ssa_itemLoc.modify<LMPHostType>();
k_ssa_itemLen.modify<LMPHostType>();
k_ssa_gitemLoc.modify<LMPHostType>();
k_ssa_gitemLen.modify<LMPHostType>();
k_ssa_phaseLen.modify<LMPHostType>();
k_ssa_itemLoc.sync<DeviceType>();
k_ssa_itemLen.sync<DeviceType>();
k_ssa_gitemLen.sync<DeviceType>();
k_ssa_gitemLoc.sync<DeviceType>();
k_ssa_phaseOff.sync<DeviceType>();
k_ssa_phaseLen.sync<DeviceType>();
k_ssa_gphaseLen.sync<DeviceType>();
NPairSSAKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),
k_gbincount.view<DeviceType>(),
k_gbins.view<DeviceType>(),
lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi,
nstencil, sx1, sy1, sz1,
k_stencil.view<DeviceType>(),
k_stencilxyz.view<DeviceType>(),
k_nstencil_ssa.view<DeviceType>(),
ssa_phaseCt,
k_ssa_phaseLen.view<DeviceType>(),
k_ssa_phaseOff.view<DeviceType>(),
k_ssa_itemLoc.view<DeviceType>(),
k_ssa_itemLen.view<DeviceType>(),
ssa_gphaseCt,
k_ssa_gphaseLen.view<DeviceType>(),
k_ssa_gitemLoc.view<DeviceType>(),
k_ssa_gitemLen.view<DeviceType>(),
nlocal,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
k_bincount.sync<DeviceType>();
k_bins.sync<DeviceType>();
k_gbincount.sync<DeviceType>();
k_gbins.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
bool firstTry = true;
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
// loop over bins with local atoms, storing half of the neighbors
Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) {
data.build_locals_onePhase(firstTry, comm->me, workPhase);
});
k_ssa_itemLoc.modify<DeviceType>();
k_ssa_itemLen.modify<DeviceType>();
k_ssa_phaseLen.modify<DeviceType>();
k_ssa_itemLoc.sync<LMPHostType>();
k_ssa_itemLen.sync<LMPHostType>();
k_ssa_phaseLen.sync<LMPHostType>();
data.neigh_list.inum = h_ssa_itemLoc(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1) +
h_ssa_itemLen(ssa_phaseCt-1,h_ssa_phaseLen(ssa_phaseCt-1)-1);
// loop over AIR ghost atoms, storing their local neighbors
Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) {
data.build_ghosts_onePhase(workPhase);
});
k_ssa_gitemLoc.modify<DeviceType>();
k_ssa_gitemLen.modify<DeviceType>();
k_ssa_gphaseLen.modify<DeviceType>();
k_ssa_gitemLoc.sync<LMPHostType>();
k_ssa_gitemLen.sync<LMPHostType>();
k_ssa_gphaseLen.sync<LMPHostType>();
auto h_ssa_gphaseLen = k_ssa_gphaseLen.h_view;
data.neigh_list.gnum = h_ssa_gitemLoc(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) +
h_ssa_gitemLen(ssa_gphaseCt-1,h_ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
firstTry = false;
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
//k_ssa_phaseLen.modify<DeviceType>();
//k_ssa_itemLoc.modify<DeviceType>();
//k_ssa_itemLen.modify<DeviceType>();
//k_ssa_gphaseLen.modify<DeviceType>();
//k_ssa_gitemLoc.modify<DeviceType>();
//k_ssa_gitemLen.modify<DeviceType>();
list->inum = data.neigh_list.inum; //FIXME once the above is in a parallel_for
list->gnum = data.neigh_list.gnum; // it will need a deep_copy or something
#ifdef DEBUG_SSA_BUILD_LOCALS
fprintf(stdout, "Fina%03d %6d inum %6d gnum, total used %6d, allocated %6d\n"
,comm->me
,list->inum
,list->gnum
,list->inum + list->gnum
,nl_size
);
#endif
list->k_ilist.template modify<DeviceType>();
}
template<class DeviceType>
void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTry, int me, int workPhase) const
{
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil = d_stencil;
int which = 0;
int zoff = d_ssa_phaseOff(workPhase, 2);
int yoff = d_ssa_phaseOff(workPhase, 1);
int xoff = d_ssa_phaseOff(workPhase, 0);
int workItem = 0;
int skippedItems = 0;
for (int zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
for (int ybin = lbinylo + yoff - sy1 + 1; ybin < lbinyhi; ybin += sy1) {
for (int xbin = lbinxlo + xoff - sx1 + 1; xbin < lbinxhi; xbin += sx1) {
if (d_ssa_itemLen(workPhase, workItem + skippedItems) == 0) {
if (firstTry) ++skippedItems;
else ++workItem; // phase is done,should break out of three loops here if we could...
continue;
}
int inum_start = d_ssa_itemLoc(workPhase, workItem + skippedItems);
int inum = inum_start;
for (int subphase = 0; subphase < 4; subphase++) {
int s_ybin = ybin + ((subphase & 0x2) ? sy1 - 1 : 0);
int s_xbin = xbin + ((subphase & 0x1) ? sx1 - 1 : 0);
if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue;
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
int ibin = zbin*mbiny*mbinx + s_ybin*mbinx + s_xbin;
for (int il = 0; il < c_bincount(ibin); ++il) {
const int i = c_bins(ibin, il);
int n = 0;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
// loop over all local atoms in the current stencil "subphase"
for (int k = d_nstencil_ssa(subphase); k < d_nstencil_ssa(subphase+1); k++) {
const int jbin = ibin+stencil(k);
int jl;
if (jbin != ibin) jl = 0;
else jl = il + 1; // same bin as i, so start just past i in the bin
for (; jl < c_bincount(jbin); ++jl) {
const int j = c_bins(jbin, jl);
const int jtype = type(j);
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
if (n > 0) {
neigh_list.d_numneigh(inum) = n;
neigh_list.d_ilist(inum++) = i;
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
}
}
}
int len = inum - inum_start;
#ifdef DEBUG_SSA_BUILD_LOCALS
if (len != d_ssa_itemLen(workPhase, workItem + skippedItems)) {
fprintf(stdout, "Leng%03d workphase (%2d,%3d,%3d): len = %4d, but ssa_itemLen = %4d%s\n"
,me
,workPhase
,workItem
,workItem + skippedItems
,len
,d_ssa_itemLen(workPhase, workItem + skippedItems)
,(len > d_ssa_itemLen(workPhase, workItem + skippedItems)) ? " OVERFLOW" : ""
);
}
#endif
if (inum > inum_start) {
d_ssa_itemLoc(workPhase,workItem) = inum_start; // record where workItem starts in ilist
d_ssa_itemLen(workPhase,workItem) = inum - inum_start; // record actual workItem length
workItem++;
} else if (firstTry) ++skippedItems;
}
}
}
#ifdef DEBUG_SSA_BUILD_LOCALS
fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = %3d, inums/workItems = %g\n"
,me
,workPhase
,inum - d_ssa_itemLoc(workPhase, 0)
,workItem
,skippedItems
,(inum - d_ssa_itemLoc(workPhase, 0)) / (double) workItem
);
#endif
// record where workPhase actually ends
if (firstTry) {
d_ssa_phaseLen(workPhase) = workItem;
while (workItem < (int) d_ssa_itemLen.dimension_1()) {
d_ssa_itemLen(workPhase,workItem++) = 0;
}
}
}
template<class DeviceType>
void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) const
{
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil = d_stencil;
int which = 0;
// since these are ghosts, must check if stencil bin is out of bounds
int airnum = workPhase + 1;
//FIXME for now, there is only 1 workItem for each ghost AIR
int workItem;
for (workItem = 0; workItem < 1; ++workItem) {
int gNdx = d_ssa_gitemLoc(workPhase, workItem); // record where workItem starts in ilist
for (int il = 0; il < c_gbincount(airnum); ++il) {
const int i = c_gbins(airnum, il);
int n = 0;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(gNdx);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
int loc[3];
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0]));
// loop over AIR ghost atoms in all bins in "full" stencil
// Note: the non-AIR ghost atoms have already been filtered out
for (int k = 0; k < nstencil; k++) {
int xbin2 = loc[0] + d_stencilxyz(k,0);
int ybin2 = loc[1] + d_stencilxyz(k,1);
int zbin2 = loc[2] + d_stencilxyz(k,2);
// Skip it if this bin is outside the extent of local bins
if (xbin2 < lbinxlo || xbin2 >= lbinxhi ||
ybin2 < lbinylo || ybin2 >= lbinyhi ||
zbin2 < lbinzlo || zbin2 >= lbinzhi) continue;
const int jbin = ibin+stencil(k);
for (int jl = 0; jl < c_bincount(jbin); ++jl) {
const int j = c_bins(jbin, jl);
const int jtype = type(j);
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(j,i);
/* else if (jmol >= 0) */
/* which = find_special(onemols[jmol]->special[jatom], */
/* onemols[jmol]->nspecial[jatom], */
/* tag[i]-jtagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
}
if (n > 0) {
neigh_list.d_numneigh(gNdx) = n;
neigh_list.d_ilist(gNdx++) = i;
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
}
}
// record where workItem ends in ilist
d_ssa_gitemLen(workPhase,workItem) = gNdx - d_ssa_gitemLoc(workPhase,workItem);
// if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++;
}
d_ssa_gphaseLen(workPhase) = workItem;
}
}
namespace LAMMPS_NS {
template class NPairSSAKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NPairSSAKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,362 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
typedef NPairSSAKokkos<LMPHostType> NPairSSAKokkosHost;
NPairStyle(half/bin/newton/ssa/kk/host,
NPairSSAKokkosHost,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST | NP_KOKKOS_HOST)
typedef NPairSSAKokkos<LMPDeviceType> NPairSSAKokkosDevice;
NPairStyle(half/bin/newton/ssa/kk/device,
NPairSSAKokkosDevice,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST | NP_KOKKOS_DEVICE)
#else
#ifndef LMP_NPAIR_SSA_KOKKOS_H
#define LMP_NPAIR_SSA_KOKKOS_H
#include "npair.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NPairSSAKokkos : public NPair {
public:
typedef ArrayTypes<DeviceType> AT;
// SSA Work plan data structures
int ssa_phaseCt;
DAT::tdual_int_1d k_ssa_phaseLen;
DAT::tdual_int_1d_3 k_ssa_phaseOff;
DAT::tdual_int_2d k_ssa_itemLoc;
DAT::tdual_int_2d k_ssa_itemLen;
typename AT::t_int_1d ssa_phaseLen;
typename AT::t_int_1d_3 ssa_phaseOff;
typename AT::t_int_2d ssa_itemLoc;
typename AT::t_int_2d ssa_itemLen;
const int ssa_gphaseCt;
DAT::tdual_int_1d k_ssa_gphaseLen;
DAT::tdual_int_2d k_ssa_gitemLoc;
DAT::tdual_int_2d k_ssa_gitemLen;
typename AT::t_int_1d ssa_gphaseLen;
typename AT::t_int_2d ssa_gitemLoc;
typename AT::t_int_2d ssa_gitemLen;
NPairSSAKokkos(class LAMMPS *);
~NPairSSAKokkos() {}
void copy_neighbor_info();
void copy_bin_info();
void copy_stencil_info();
void build(class NeighList *);
private:
// data from Neighbor class
DAT::tdual_xfloat_2d k_cutneighsq;
// exclusion data from Neighbor class
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
// data from NBinSSA class
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
int ghosts_per_gbin;
DAT::tdual_int_1d k_gbincount;
DAT::tdual_int_2d k_gbins;
int lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi;
// data from NStencilSSA class
int nstencil;
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
DAT::tdual_int_1d_3 k_stencilxyz;
DAT::tdual_int_1d k_nstencil_ssa;
int sx1, sy1, sz1;
};
template<class DeviceType>
class NPairSSAKokkosExecute
{
typedef ArrayTypes<DeviceType> AT;
public:
NeighListKokkos<DeviceType> neigh_list;
// data from Neighbor class
const typename AT::t_xfloat_2d_randomread cutneighsq;
// exclusion data from Neighbor class
const int exclude;
const int nex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
// data from NBinSSA class
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_int_1d gbincount;
const typename AT::t_int_1d_const c_gbincount;
typename AT::t_int_2d gbins;
typename AT::t_int_2d_const c_gbins;
const int lbinxlo, lbinxhi, lbinylo, lbinyhi, lbinzlo, lbinzhi;
// data from NStencil class
const int nstencil;
const int sx1, sy1, sz1;
typename AT::t_int_1d d_stencil; // # of J neighs for each I
typename AT::t_int_1d_3 d_stencilxyz;
typename AT::t_int_1d d_nstencil_ssa;
// data from Atom class
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask;
const typename AT::t_tagint_1d_const molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
// SSA Work plan data structures
int ssa_phaseCt;
typename AT::t_int_1d d_ssa_phaseLen;
typename AT::t_int_1d_3_const d_ssa_phaseOff;
typename AT::t_int_2d d_ssa_itemLoc;
typename AT::t_int_2d d_ssa_itemLen;
int ssa_gphaseCt;
typename AT::t_int_1d d_ssa_gphaseLen;
typename AT::t_int_2d d_ssa_gitemLoc;
typename AT::t_int_2d d_ssa_gitemLen;
NPairSSAKokkosExecute(
const NeighListKokkos<DeviceType> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const typename AT::t_int_1d &_gbincount,
const typename AT::t_int_2d &_gbins,
const int _lbinxlo, const int _lbinxhi,
const int _lbinylo, const int _lbinyhi,
const int _lbinzlo, const int _lbinzhi,
const int _nstencil, const int _sx1, const int _sy1, const int _sz1,
const typename AT::t_int_1d &_d_stencil,
const typename AT::t_int_1d_3 &_d_stencilxyz,
const typename AT::t_int_1d &_d_nstencil_ssa,
const int _ssa_phaseCt,
const typename AT::t_int_1d &_d_ssa_phaseLen,
const typename AT::t_int_1d_3 &_d_ssa_phaseOff,
const typename AT::t_int_2d &_d_ssa_itemLoc,
const typename AT::t_int_2d &_d_ssa_itemLen,
const int _ssa_gphaseCt,
const typename AT::t_int_1d &_d_ssa_gphaseLen,
const typename AT::t_int_2d &_d_ssa_gitemLoc,
const typename AT::t_int_2d &_d_ssa_gitemLen,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_tagint_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
gbincount(_gbincount),c_gbincount(_gbincount),gbins(_gbins),c_gbins(_gbins),
lbinxlo(_lbinxlo),lbinxhi(_lbinxhi),
lbinylo(_lbinylo),lbinyhi(_lbinyhi),
lbinzlo(_lbinzlo),lbinzhi(_lbinzhi),
nstencil(_nstencil),sx1(_sx1),sy1(_sy1),sz1(_sz1),
d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),d_nstencil_ssa(_d_nstencil_ssa),
ssa_phaseCt(_ssa_phaseCt),
d_ssa_phaseLen(_d_ssa_phaseLen),
d_ssa_phaseOff(_d_ssa_phaseOff),
d_ssa_itemLoc(_d_ssa_itemLoc),
d_ssa_itemLen(_d_ssa_itemLen),
ssa_gphaseCt(_ssa_gphaseCt),
d_ssa_gphaseLen(_d_ssa_gphaseLen),
d_ssa_gitemLoc(_d_ssa_gitemLoc),
d_ssa_gitemLen(_d_ssa_gitemLen),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NPairSSAKokkosExecute::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NPairSSAKokkosExecute::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NPairSSAKokkosExecute() {neigh_list.copymode = 1;};
KOKKOS_FUNCTION
void build_locals_onePhase(const bool firstTry, int me, int workPhase) const;
KOKKOS_FUNCTION
void build_ghosts_onePhase(int workPhase) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,796 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (Sandia)
------------------------------------------------------------------------- */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "atom_kokkos.h"
#include "atom_vec.h"
#include "comm.h"
#include "update.h"
#include "fix.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "modify.h"
#include "pair_dpd_fdt_energy_kokkos.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
#define EPSILON 1.0e-10
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairDPDfdtEnergyKokkos<DeviceType>::PairDPDfdtEnergyKokkos(LAMMPS *lmp) :
PairDPDfdtEnergy(lmp),
#ifdef DPD_USE_RAN_MARS
rand_pool(0 /* unused */, lmp)
#else
rand_pool()
#endif
{
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairDPDfdtEnergyKokkos<DeviceType>::~PairDPDfdtEnergyKokkos()
{
if (copymode) return;
memory->destroy_kokkos(k_eatom,eatom);
memory->destroy_kokkos(k_vatom,vatom);
if (allocated) {
memory->destroy_kokkos(k_duCond,duCond);
memory->destroy_kokkos(k_duMech,duMech);
}
memory->destroy_kokkos(k_cutsq,cutsq);
#ifdef DPD_USE_RAN_MARS
rand_pool.destroy();
#endif
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairDPDfdtEnergyKokkos<DeviceType>::init_style()
{
PairDPDfdtEnergy::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk");
}
#ifdef DPD_USE_RAN_MARS
rand_pool.init(random,seed);
#else
rand_pool.init(seed + comm->me,DeviceType::max_hardware_threads());
#endif
}
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
// CUDA specialization of init_style to properly call rand_pool.init()
template<>
void PairDPDfdtEnergyKokkos<Kokkos::Cuda>::init_style()
{
PairDPDfdtEnergy::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<Kokkos::Cuda,LMPHostType>::value &&
!Kokkos::Impl::is_same<Kokkos::Cuda,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<Kokkos::Cuda,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with dpd/fdt/energy/kk");
}
#ifdef DPD_USE_RAN_MARS
rand_pool.init(random,seed);
#else
rand_pool.init(seed + comm->me,4*32768 /*fake max_hardware_threads()*/);
#endif
}
#endif
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairDPDfdtEnergyKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
copymode = 1;
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag,0);
else evflag = vflag_fdotr = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
memory->destroy_kokkos(k_eatom,eatom);
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.template view<DeviceType>();
}
if (vflag_atom) {
memory->destroy_kokkos(k_vatom,vatom);
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.template view<DeviceType>();
}
x = atomKK->k_x.view<DeviceType>();
v = atomKK->k_v.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
mass = atomKK->k_mass.view<DeviceType>();
rmass = atomKK->rmass;
dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
k_cutsq.template sync<DeviceType>();
k_params.template sync<DeviceType>();
atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK);
else atomKK->modified(execution_space,F_MASK);
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
nlocal = atom->nlocal;
int nghost = atom->nghost;
int newton_pair = force->newton_pair;
dtinvsqrt = 1.0/sqrt(update->dt);
int inum = list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
boltz = force->boltz;
ftm2v = force->ftm2v;
// loop over neighbors of my atoms
EV_FLOAT ev;
if (splitFDT_flag) {
if (!a0_is_zero) {
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,0,false> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,0,false> >(0,inum),*this);
}
} else if (neighflag == FULL) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,0,false> >(0,inum),*this);
}
}
} else {
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,1,true> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,1,0,true> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,1,true> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALF,0,0,true> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,1,true> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,1,0,true> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,1,true> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<HALFTHREAD,0,0,true> >(0,inum),*this);
}
} else if (neighflag == FULL) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,1,true> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,1,0,true> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,1,true> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeSplit<FULL,0,0,true> >(0,inum),*this);
}
}
}
}
} else {
// Allocate memory for duCond and duMech
if (allocated) {
memory->destroy_kokkos(k_duCond,duCond);
memory->destroy_kokkos(k_duMech,duMech);
}
memory->create_kokkos(k_duCond,duCond,nlocal+nghost,"pair:duCond");
memory->create_kokkos(k_duMech,duMech,nlocal+nghost,"pair:duMech");
d_duCond = k_duCond.view<DeviceType>();
d_duMech = k_duMech.view<DeviceType>();
h_duCond = k_duCond.h_view;
h_duMech = k_duMech.h_view;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyZero>(0,nlocal+nghost),*this);
atomKK->sync(execution_space,V_MASK | DPDTHETA_MASK | RMASS_MASK);
atomKK->k_mass.sync<DeviceType>();
// loop over neighbors of my atoms
if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,0,false> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,0,false> >(0,inum),*this);
}
} else if (neighflag == FULL) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,0,false> >(0,inum),*this);
}
}
} else {
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALF,0,0,false> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<HALFTHREAD,0,0,false> >(0,inum),*this);
}
} else if (neighflag == FULL) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,1,0,false> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,1,false> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairDPDfdtEnergyComputeNoSplit<FULL,0,0,false> >(0,inum),*this);
}
}
}
// Communicate the ghost delta energies to the locally owned atoms
// this memory transfer can be removed when fix_dpd_fdt_energy_kokkos is added
k_duCond.template modify<DeviceType>();
k_duCond.template sync<LMPHostType>();
k_duMech.template modify<DeviceType>();
k_duMech.template sync<LMPHostType>();
comm->reverse_comm_pair(this);
}
if (eflag_global) eng_vdwl += ev.evdwl;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
copymode = 0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyZero, const int &ii) const {
d_duCond[ii] = 0.0;
d_duMech[ii] = 0.0;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii, EV_FLOAT& ev) const {
// The f array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
int i,j,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r,rinv,wd,wr,factor_dpd;
i = d_ilist[ii];
xtmp = x(i,0);
ytmp = x(i,1);
ztmp = x(i,2);
itype = type[i];
jnum = d_numneigh[i];
double fx_i = 0.0;
double fy_i = 0.0;
double fz_i = 0.0;
for (jj = 0; jj < jnum; jj++) {
j = d_neighbors(i,jj);
factor_dpd = special_lj[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x(j,0);
dely = ytmp - x(j,1);
delz = ztmp - x(j,2);
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
double cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype);
if (rsq < cutsq_ij) {
r = sqrt(rsq);
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0/r;
double cut_ij = STACKPARAMS?m_params[itype][jtype].cut:params(itype,jtype).cut;
wr = 1.0 - r/cut_ij;
wd = wr*wr;
// conservative force = a0 * wr
double a0_ij = STACKPARAMS?m_params[itype][jtype].a0:params(itype,jtype).a0;
fpair = a0_ij*wr;
fpair *= factor_dpd*rinv;
fx_i += delx*fpair;
fy_i += dely*fpair;
fz_i += delz*fpair;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
a_f(j,0) -= delx*fpair;
a_f(j,1) -= dely*fpair;
a_f(j,2) -= delz*fpair;
}
if (eflag) {
// unshifted eng of conservative term:
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/d_cut(itype,jtype));
// eng shifted to 0.0 at cutoff
evdwl = 0.5*a0_ij*cut_ij * wd;
evdwl *= factor_dpd;
if (EVFLAG)
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*evdwl;
}
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
a_f(i,0) += fx_i;
a_f(i,1) += fy_i;
a_f(i,2) += fz_i;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii) const {
EV_FLOAT ev;
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>(), ii, ev);
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii, EV_FLOAT& ev) const {
// These array are atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_duCond = d_duCond;
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_duMech = d_duMech;
int i,j,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
double rsq,r,rinv,wd,wr,factor_dpd,uTmp;
double dot,randnum;
double kappa_ij, alpha_ij, theta_ij, gamma_ij;
double mass_i, mass_j;
double massinv_i, massinv_j;
double randPair, mu_ij;
rand_type rand_gen = rand_pool.get_state();
i = d_ilist[ii];
xtmp = x(i,0);
ytmp = x(i,1);
ztmp = x(i,2);
vxtmp = v(i,0);
vytmp = v(i,1);
vztmp = v(i,2);
itype = type[i];
jnum = d_numneigh[i];
double fx_i = 0.0;
double fy_i = 0.0;
double fz_i = 0.0;
for (jj = 0; jj < jnum; jj++) {
j = d_neighbors(i,jj);
factor_dpd = special_lj[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x(j,0);
dely = ytmp - x(j,1);
delz = ztmp - x(j,2);
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
double cutsq_ij = STACKPARAMS?m_cutsq[itype][jtype]:d_cutsq(itype,jtype);
if (rsq < cutsq_ij) {
r = sqrt(rsq);
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0/r;
double cut_ij = STACKPARAMS?m_params[itype][jtype].cut:params(itype,jtype).cut;
wr = 1.0 - r/cut_ij;
wd = wr*wr;
delvx = vxtmp - v(j,0);
delvy = vytmp - v(j,1);
delvz = vztmp - v(j,2);
dot = delx*delvx + dely*delvy + delz*delvz;
randnum = rand_gen.normal();
// Compute the current temperature
theta_ij = 0.5*(1.0/dpdTheta[i] + 1.0/dpdTheta[j]);
theta_ij = 1.0/theta_ij;
double sigma_ij = STACKPARAMS?m_params[itype][jtype].sigma:params(itype,jtype).sigma;
gamma_ij = sigma_ij*sigma_ij
/ (2.0*boltz*theta_ij);
// conservative force = a0 * wr
// drag force = -gamma * wr^2 * (delx dot delv) / r
// random force = sigma * wr * rnd * dtinvsqrt;
double a0_ij = STACKPARAMS?m_params[itype][jtype].a0:params(itype,jtype).a0;
fpair = a0_ij*wr;
fpair -= gamma_ij*wd*dot*rinv;
fpair += sigma_ij*wr*randnum*dtinvsqrt;
fpair *= factor_dpd*rinv;
fx_i += delx*fpair;
fy_i += dely*fpair;
fz_i += delz*fpair;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
a_f(j,0) -= delx*fpair;
a_f(j,1) -= dely*fpair;
a_f(j,2) -= delz*fpair;
}
if (rmass) {
mass_i = rmass[i];
mass_j = rmass[j];
} else {
mass_i = mass[itype];
mass_j = mass[jtype];
}
massinv_i = 1.0 / mass_i;
massinv_j = 1.0 / mass_j;
// Compute the mechanical and conductive energy, uMech and uCond
mu_ij = massinv_i + massinv_j;
mu_ij *= ftm2v;
uTmp = gamma_ij*wd*rinv*rinv*dot*dot
- 0.5*sigma_ij*sigma_ij*mu_ij*wd;
uTmp -= sigma_ij*wr*rinv*dot*randnum*dtinvsqrt;
uTmp *= 0.5;
a_duMech[i] += uTmp;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
a_duMech[j] += uTmp;
}
// Compute uCond
randnum = rand_gen.normal();
kappa_ij = STACKPARAMS?m_params[itype][jtype].kappa:params(itype,jtype).kappa;
alpha_ij = sqrt(2.0*boltz*kappa_ij);
randPair = alpha_ij*wr*randnum*dtinvsqrt;
uTmp = kappa_ij*(1.0/dpdTheta[i] - 1.0/dpdTheta[j])*wd;
uTmp += randPair;
a_duCond[i] += uTmp;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
a_duCond[j] -= uTmp;
}
if (eflag) {
// unshifted eng of conservative term:
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/d_cut(itype,jtype));
// eng shifted to 0.0 at cutoff
evdwl = 0.5*a0_ij*cut_ij * wd;
evdwl *= factor_dpd;
if (EVFLAG)
ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*evdwl;
}
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
a_f(i,0) += fx_i;
a_f(i,1) += fy_i;
a_f(i,2) += fz_i;
rand_pool.free_state(rand_gen);
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void PairDPDfdtEnergyKokkos<DeviceType>::operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int &ii) const {
EV_FLOAT ev;
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>(), ii, ev);
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairDPDfdtEnergyKokkos<DeviceType>::allocate()
{
PairDPDfdtEnergy::allocate();
int n = atom->ntypes;
int nlocal = atom->nlocal;
int nghost = atom->nghost;
memory->destroy(cutsq);
memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
k_params = Kokkos::DualView<params_dpd**,Kokkos::LayoutRight,DeviceType>("PairDPDfdtEnergy::params",n+1,n+1);
params = k_params.template view<DeviceType>();
if (!splitFDT_flag) {
memory->destroy(duCond);
memory->destroy(duMech);
memory->create_kokkos(k_duCond,duCond,nlocal+nghost+1,"pair:duCond");
memory->create_kokkos(k_duMech,duMech,nlocal+nghost+1,"pair:duMech");
d_duCond = k_duCond.view<DeviceType>();
d_duMech = k_duMech.view<DeviceType>();
h_duCond = k_duCond.h_view;
h_duMech = k_duMech.h_view;
}
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
template<class DeviceType>
double PairDPDfdtEnergyKokkos<DeviceType>::init_one(int i, int j)
{
double cutone = PairDPDfdtEnergy::init_one(i,j);
k_params.h_view(i,j).cut = cut[i][j];
k_params.h_view(i,j).a0 = a0[i][j];
k_params.h_view(i,j).sigma = sigma[i][j];
k_params.h_view(i,j).kappa = kappa[i][j];
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
}
k_cutsq.h_view(i,j) = cutone*cutone;
k_cutsq.h_view(j,i) = k_cutsq.h_view(i,j);
k_cutsq.template modify<LMPHostType>();
k_params.template modify<LMPHostType>();
return cutone;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void PairDPDfdtEnergyKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = eflag;
const int VFLAG = vflag_either;
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
if (EFLAG) {
if (eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
} else {
v_eatom[i] += epairhalf;
}
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (vflag_global) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (NEWTON_PAIR || j < nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
} else {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
}
if (vflag_atom) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) {
v_vatom(i,0) += 0.5*v0;
v_vatom(i,1) += 0.5*v1;
v_vatom(i,2) += 0.5*v2;
v_vatom(i,3) += 0.5*v3;
v_vatom(i,4) += 0.5*v4;
v_vatom(i,5) += 0.5*v5;
}
if (NEWTON_PAIR || j < nlocal) {
v_vatom(j,0) += 0.5*v0;
v_vatom(j,1) += 0.5*v1;
v_vatom(j,2) += 0.5*v2;
v_vatom(j,3) += 0.5*v3;
v_vatom(j,4) += 0.5*v4;
v_vatom(j,5) += 0.5*v5;
}
} else {
v_vatom(i,0) += 0.5*v0;
v_vatom(i,1) += 0.5*v1;
v_vatom(i,2) += 0.5*v2;
v_vatom(i,3) += 0.5*v3;
v_vatom(i,4) += 0.5*v4;
v_vatom(i,5) += 0.5*v5;
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int PairDPDfdtEnergyKokkos<DeviceType>::sbmask(const int& j) const {
return j >> SBBITS & 3;
}
namespace LAMMPS_NS {
template class PairDPDfdtEnergyKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairDPDfdtEnergyKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,182 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(dpd/fdt/energy/kk,PairDPDfdtEnergyKokkos<LMPDeviceType>)
PairStyle(dpd/fdt/energy/kk/device,PairDPDfdtEnergyKokkos<LMPDeviceType>)
PairStyle(dpd/fdt/energy/kk/host,PairDPDfdtEnergyKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H
#define LMP_PAIR_DPD_FDT_ENERGY_KOKKOS_H
#if !defined(DPD_USE_RAN_MARS) && !defined(DPD_USE_Random_XorShift64) && !defined(Random_XorShift1024)
#define DPD_USE_Random_XorShift64
#endif
#include "pair_dpd_fdt_energy.h"
#include "pair_kokkos.h"
#include "kokkos_type.h"
#ifdef DPD_USE_RAN_MARS
#include "rand_pool_wrap_kokkos.h"
#else
#include "Kokkos_Random.hpp"
#endif
namespace LAMMPS_NS {
struct TagPairDPDfdtEnergyZero{};
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
struct TagPairDPDfdtEnergyComputeSplit{};
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
struct TagPairDPDfdtEnergyComputeNoSplit{};
template<class DeviceType>
class PairDPDfdtEnergyKokkos : public PairDPDfdtEnergy {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
PairDPDfdtEnergyKokkos(class LAMMPS *);
virtual ~PairDPDfdtEnergyKokkos();
virtual void compute(int, int);
void init_style();
double init_one(int, int);
KOKKOS_INLINE_FUNCTION
void operator()(TagPairDPDfdtEnergyZero, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairDPDfdtEnergyComputeSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool STACKPARAMS>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairDPDfdtEnergyComputeNoSplit<NEIGHFLAG,NEWTON_PAIR,EVFLAG,STACKPARAMS>, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const;
KOKKOS_INLINE_FUNCTION
int sbmask(const int& j) const;
struct params_dpd {
KOKKOS_INLINE_FUNCTION
params_dpd(){cut=0;a0=0;sigma=0;kappa=0;};
KOKKOS_INLINE_FUNCTION
params_dpd(int i){cut=0;a0=0;sigma=0;kappa=0;};
F_FLOAT cut,a0,sigma,kappa;
};
DAT::tdual_efloat_1d k_duCond,k_duMech;
#ifdef DPD_USE_RAN_MARS
RandPoolWrap rand_pool;
typedef RandWrap rand_type;
#elif defined(DPD_USE_Random_XorShift64)
Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool;
typedef typename Kokkos::Random_XorShift64_Pool<DeviceType>::generator_type rand_type;
#elif defined(DPD_USE_Random_XorShift1024)
Kokkos::Random_XorShift1024_Pool<DeviceType> rand_pool;
typedef typename Kokkos::Random_XorShift1024_Pool<DeviceType>::generator_type rand_type;
#endif
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
protected:
int eflag,vflag;
int nlocal,neighflag;
double dtinvsqrt;
double boltz,ftm2v;
double special_lj[4];
virtual void allocate();
Kokkos::DualView<params_dpd**,Kokkos::LayoutRight,DeviceType> k_params;
typename Kokkos::DualView<params_dpd**,
Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
// hardwired to space for MAX_TYPES_STACKPARAMS (12) atom types
params_dpd m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_x_array c_x;
typename ArrayTypes<DeviceType>::t_v_array_randomread v;
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_float_1d_randomread mass;
double *rmass;
typename AT::t_efloat_1d dpdTheta;
typename AT::t_efloat_1d d_duCond,d_duMech;
HAT::t_efloat_1d h_duCond,h_duMech;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename AT::t_efloat_1d d_eatom;
typename AT::t_virial_array d_vatom;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh;
friend void pair_virial_fdotr_compute<PairDPDfdtEnergyKokkos>(PairDPDfdtEnergyKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair dpd/fdt/energy requires ghost atoms store velocity
Use the communicate vel yes command to enable this.
E: Pair dpd/fdt/energy requires newton pair on
Self-explanatory.
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,280 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(exp6/rx/kk,PairExp6rxKokkos<LMPDeviceType>)
PairStyle(exp6/rx/kk/device,PairExp6rxKokkos<LMPDeviceType>)
PairStyle(exp6/rx/kk/host,PairExp6rxKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_EXP6_RX_KOKKOS_H
#define LMP_PAIR_EXP6_RX_KOKKOS_H
#include "pair_exp6_rx.h"
#include "kokkos_type.h"
#include "pair_kokkos.h"
namespace LAMMPS_NS {
// Create a structure to hold the parameter data for all
// local and neighbor particles. Pack inside this struct
// to avoid any name clashes.
template<class DeviceType>
struct PairExp6ParamDataTypeKokkos
{
typedef ArrayTypes<DeviceType> AT;
int n;
typename AT::t_float_1d epsilon1, alpha1, rm1, mixWtSite1,
epsilon2, alpha2, rm2, mixWtSite2,
epsilonOld1, alphaOld1, rmOld1, mixWtSite1old,
epsilonOld2, alphaOld2, rmOld2, mixWtSite2old;
// Default constructor -- nullify everything.
PairExp6ParamDataTypeKokkos<DeviceType>(void)
: n(0), epsilon1(NULL), alpha1(NULL), rm1(NULL), mixWtSite1(NULL),
epsilon2(NULL), alpha2(NULL), rm2(NULL), mixWtSite2(NULL),
epsilonOld1(NULL), alphaOld1(NULL), rmOld1(NULL), mixWtSite1old(NULL),
epsilonOld2(NULL), alphaOld2(NULL), rmOld2(NULL), mixWtSite2old(NULL)
{}
};
template<class DeviceType>
struct PairExp6ParamDataTypeKokkosVect
{
typedef ArrayTypes<DeviceType> AT;
typename AT::t_float_1d epsilon, rm3, alpha, xMolei, epsilon_old, rm3_old,
alpha_old, xMolei_old, fractionOFA, fraction1,
fraction2, nMoleculesOFA, nMolecules1, nMolecules2,
nTotal, fractionOFAold, fractionOld1, fractionOld2,
nMoleculesOFAold, nMoleculesOld1, nMoleculesOld2,
nTotalold;
// Default constructor -- nullify everything.
PairExp6ParamDataTypeKokkosVect<DeviceType>(void)
: epsilon(NULL), rm3(NULL), alpha(NULL), xMolei(NULL), epsilon_old(NULL), rm3_old(NULL),
alpha_old(NULL), xMolei_old(NULL), fractionOFA(NULL), fraction1(NULL),
fraction2(NULL), nMoleculesOFA(NULL), nMolecules1(NULL), nMolecules2(NULL),
nTotal(NULL), fractionOFAold(NULL), fractionOld1(NULL), fractionOld2(NULL),
nMoleculesOFAold(NULL), nMoleculesOld1(NULL), nMoleculesOld2(NULL),
nTotalold(NULL)
{}
};
struct TagPairExp6rxZeroMixingWeights{};
struct TagPairExp6rxgetMixingWeights{};
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
struct TagPairExp6rxCompute{};
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
struct TagPairExp6rxComputeNoAtomics{};
struct TagPairExp6rxCollapseDupViews{};
struct TagPairExp6rxZeroDupViews{};
template<class DeviceType>
class PairExp6rxKokkos : public PairExp6rx {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
PairExp6rxKokkos(class LAMMPS *);
virtual ~PairExp6rxKokkos();
void compute(int, int);
void coeff(int, char **);
void init_style();
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxZeroMixingWeights, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxgetMixingWeights, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxComputeNoAtomics<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, bool Site1EqSite2, bool UseAtomics, bool OneType>
KOKKOS_INLINE_FUNCTION
void vectorized_operator(const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxComputeNoAtomics<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxCollapseDupViews, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairExp6rxZeroDupViews, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const;
KOKKOS_INLINE_FUNCTION
int sbmask(const int& j) const;
protected:
int eflag,vflag;
int nlocal,newton_pair,neighflag;
double special_lj[4];
int num_threads,ntypes;
typename AT::t_x_array_randomread x;
typename AT::t_f_array f;
typename AT::t_int_1d_randomread type;
typename AT::t_efloat_1d uCG, uCGnew;
typename AT::t_float_2d dvector;
typedef Kokkos::View<F_FLOAT**[3],Kokkos::LayoutRight,DeviceType> t_f_array_thread;
typedef Kokkos::View<E_FLOAT**,Kokkos::LayoutRight,DeviceType> t_efloat_1d_thread;
t_f_array_thread t_f;
t_efloat_1d_thread t_uCG, t_uCGnew;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename AT::t_efloat_1d d_eatom;
typename AT::t_virial_array d_vatom;
DAT::tdual_int_scalar k_error_flag;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh;
PairExp6ParamDataTypeKokkos<DeviceType> PairExp6ParamData;
PairExp6ParamDataTypeKokkosVect<DeviceType> PairExp6ParamDataVect;
void allocate();
DAT::tdual_int_1d k_mol2param; // mapping from molecule to parameters
typename AT::t_int_1d_randomread d_mol2param;
typedef Kokkos::DualView<Param*,Kokkos::LayoutRight,DeviceType> tdual_param_1d;
typedef typename tdual_param_1d::t_dev_const_randomread t_param_1d_randomread;
tdual_param_1d k_params; // parameter set for an I-J-K interaction
t_param_1d_randomread d_params; // parameter set for an I-J-K interaction
typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
void read_file(char *);
void setup();
KOKKOS_INLINE_FUNCTION
void getMixingWeights(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const;
template <class ArrayT>
void getMixingWeightsVect(const int, int, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &, ArrayT &) const;
KOKKOS_INLINE_FUNCTION
void exponentScaling(double, double &, double &) const;
KOKKOS_INLINE_FUNCTION
void polynomialScaling(double, double &, double &, double &) const;
double s_coeffAlpha[6],s_coeffEps[6],s_coeffRm[6];
KOKKOS_INLINE_FUNCTION
double func_rin(const double &) const;
KOKKOS_INLINE_FUNCTION
double expValue(const double) const;
friend void pair_virial_fdotr_compute<PairExp6rxKokkos>(PairExp6rxKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: alpha_ij is 6.0 in pair exp6
Self-explanatory
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: PairExp6rxKokkos requires a fix rx command
The fix rx command must come before the pair style command in the input file
E: There are no rx species specified
There must be at least one species specified through the fix rx command
E: Site1 name not recognized in pair coefficients
The site1 keyword does not match the species keywords specified throug the fix rx command
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: Cannot open exp6/rx potential file %s
Self-explanatory
E: Incorrect format in exp6/rx potential file
Self-explanatory
E: Illegal exp6/rx parameters. Rm and Epsilon must be greater than zero. Alpha cannot be negative.
Self-explanatory
E: Illegal exp6/rx parameters. Interaction potential does not exist.
Self-explanatory
E: Potential file has duplicate entry.
Self-explanatory
E: The number of molecules in CG particle is less than 10*DBL_EPSILON.
Self-explanatory. Check the species concentrations have been properly set
and check the reaction kinetic solver parameters in fix rx to more for
sufficient accuracy.
*/

View File

@ -0,0 +1,159 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "pair_hybrid_kokkos.h"
#include "atom_kokkos.h"
#include "force.h"
#include "pair.h"
#include "neighbor.h"
#include "neigh_request.h"
#include "update.h"
#include "comm.h"
#include "memory.h"
#include "error.h"
#include "respa.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairHybridKokkos::PairHybridKokkos(LAMMPS *lmp) : PairHybrid(lmp)
{
atomKK = (AtomKokkos *) atom;
// prevent overlapping host/device computation, which isn't
// yet supported by pair_hybrid_kokkos
execution_space = Device;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
}
/* ---------------------------------------------------------------------- */
PairHybridKokkos::~PairHybridKokkos()
{
}
/* ----------------------------------------------------------------------
call each sub-style's compute() or compute_outer() function
accumulate sub-style global/peratom energy/virial in hybrid
for global vflag = 1:
each sub-style computes own virial[6]
sum sub-style virial[6] to hybrid's virial[6]
for global vflag = 2:
call sub-style with adjusted vflag to prevent it calling
virial_fdotr_compute()
hybrid calls virial_fdotr_compute() on final accumulated f
------------------------------------------------------------------------- */
void PairHybridKokkos::compute(int eflag, int vflag)
{
int i,j,m,n;
// if no_virial_fdotr_compute is set and global component of
// incoming vflag = 2, then
// reset vflag as if global component were 1
// necessary since one or more sub-styles cannot compute virial as F dot r
int neighflag = lmp->kokkos->neighflag;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
// check if global component of incoming vflag = 2
// if so, reset vflag passed to substyle as if it were 0
// necessary so substyle will not invoke virial_fdotr_compute()
int vflag_substyle;
if (vflag % 4 == 2) vflag_substyle = vflag/4 * 4;
else vflag_substyle = vflag;
double *saved_special = save_special();
// check if we are running with r-RESPA using the hybrid keyword
Respa *respa = NULL;
respaflag = 0;
if (strstr(update->integrate_style,"respa")) {
respa = (Respa *) update->integrate;
if (respa->nhybrid_styles > 0) respaflag = 1;
}
for (m = 0; m < nstyles; m++) {
set_special(m);
if (!respaflag || (respaflag && respa->hybrid_compute[m])) {
// invoke compute() unless compute flag is turned off or
// outerflag is set and sub-style has a compute_outer() method
if (styles[m]->compute_flag == 0) continue;
atomKK->sync(styles[m]->execution_space,styles[m]->datamask_read);
if (outerflag && styles[m]->respa_enable)
styles[m]->compute_outer(eflag,vflag_substyle);
else styles[m]->compute(eflag,vflag_substyle);
atomKK->modified(styles[m]->execution_space,styles[m]->datamask_modify);
}
restore_special(saved_special);
// jump to next sub-style if r-RESPA does not want global accumulated data
if (respaflag && !respa->tally_global) continue;
if (eflag_global) {
eng_vdwl += styles[m]->eng_vdwl;
eng_coul += styles[m]->eng_coul;
}
if (vflag_global) {
for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n];
}
if (eflag_atom) {
n = atom->nlocal;
if (force->newton_pair) n += atom->nghost;
double *eatom_substyle = styles[m]->eatom;
for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i];
}
if (vflag_atom) {
n = atom->nlocal;
if (force->newton_pair) n += atom->nghost;
double **vatom_substyle = styles[m]->vatom;
for (i = 0; i < n; i++)
for (j = 0; j < 6; j++)
vatom[i][j] += vatom_substyle[i][j];
}
}
delete [] saved_special;
// perform virial_fdotr on device
atomKK->sync(Device,X_MASK|F_MASK);
x = atomKK->k_x.view<LMPDeviceType>();
f = atomKK->k_f.view<LMPDeviceType>();
if (vflag_fdotr)
pair_virial_fdotr_compute(this);
}

View File

@ -0,0 +1,118 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(hybrid/kk,PairHybridKokkos)
#else
#ifndef LMP_PAIR_HYBRID_KOKKOS_H
#define LMP_PAIR_HYBRID_KOKKOS_H
#include <stdio.h>
#include "pair_hybrid.h"
#include "pair_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class PairHybridKokkos : public PairHybrid {
friend class FixGPU;
friend class FixIntel;
friend class FixOMP;
friend class Force;
friend class Respa;
friend class Info;
public:
typedef LMPDeviceType device_type;
PairHybridKokkos(class LAMMPS *);
virtual ~PairHybridKokkos();
void compute(int, int);
private:
DAT::t_x_array_randomread x;
DAT::t_f_array f;
friend void pair_virial_fdotr_compute<PairHybridKokkos>(PairHybridKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Pair style hybrid cannot have hybrid as an argument
Self-explanatory.
E: Pair style hybrid cannot have none as an argument
Self-explanatory.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair coeff for hybrid has invalid style
Style in pair coeff must have been listed in pair_style command.
E: Pair hybrid sub-style is not used
No pair_coeff command used a sub-style specified in the pair_style
command.
E: Pair_modify special setting for pair hybrid incompatible with global special_bonds setting
Cannot override a setting of 0.0 or 1.0 or change a setting between
0.0 and 1.0.
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: Invoked pair single on pair style none
A command (e.g. a dump) attempted to invoke the single() function on a
pair style none, which is illegal. You are probably attempting to
compute per-atom quantities with an undefined pair style.
E: Pair hybrid sub-style does not support single call
You are attempting to invoke a single() call on a pair style
that doesn't support it.
E: Pair hybrid single calls do not support per sub-style special bond values
Self-explanatory.
E: Unknown pair_modify hybrid sub-style
The choice of sub-style is unknown.
E: Coulomb cutoffs of pair hybrid sub-styles do not match
If using a Kspace solver, all Coulomb cutoffs of long pair styles must
be the same.
*/

View File

@ -0,0 +1,107 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "pair_hybrid_overlay_kokkos.h"
#include "atom.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_request.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairHybridOverlayKokkos::PairHybridOverlayKokkos(LAMMPS *lmp) : PairHybridKokkos(lmp) {}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
------------------------------------------------------------------------- */
void PairHybridOverlayKokkos::coeff(int narg, char **arg)
{
if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients");
if (!allocated) allocate();
int ilo,ihi,jlo,jhi;
force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi);
force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi);
// 3rd arg = pair sub-style name
// 4th arg = pair sub-style index if name used multiple times
// allow for "none" as valid sub-style name
int multflag;
int m;
for (m = 0; m < nstyles; m++) {
multflag = 0;
if (strcmp(arg[2],keywords[m]) == 0) {
if (multiple[m]) {
multflag = 1;
if (narg < 4) error->all(FLERR,"Incorrect args for pair coefficients");
if (!isdigit(arg[3][0]))
error->all(FLERR,"Incorrect args for pair coefficients");
int index = force->inumeric(FLERR,arg[3]);
if (index == multiple[m]) break;
else continue;
} else break;
}
}
int none = 0;
if (m == nstyles) {
if (strcmp(arg[2],"none") == 0) none = 1;
else error->all(FLERR,"Pair coeff for hybrid has invalid style");
}
// move 1st/2nd args to 2nd/3rd args
// if multflag: move 1st/2nd args to 3rd/4th args
// just copy ptrs, since arg[] points into original input line
arg[2+multflag] = arg[1];
arg[1+multflag] = arg[0];
// invoke sub-style coeff() starting with 1st remaining arg
if (!none) styles[m]->coeff(narg-1-multflag,&arg[1+multflag]);
// set setflag and which type pairs map to which sub-style
// if sub-style is none: set hybrid subflag, wipe out map
// else: set hybrid setflag & map only if substyle setflag is set
// if sub-style is new for type pair, add as multiple mapping
// if sub-style exists for type pair, don't add, just update coeffs
int count = 0;
for (int i = ilo; i <= ihi; i++) {
for (int j = MAX(jlo,i); j <= jhi; j++) {
if (none) {
setflag[i][j] = 1;
nmap[i][j] = 0;
count++;
} else if (styles[m]->setflag[i][j]) {
int k;
for (k = 0; k < nmap[i][j]; k++)
if (map[i][j][k] == m) break;
if (k == nmap[i][j]) map[i][j][nmap[i][j]++] = m;
setflag[i][j] = 1;
count++;
}
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}

View File

@ -11,27 +11,24 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
#ifdef PAIR_CLASS
NPairStyle(halffull/newton/ssa,
NPairHalffullNewtonSSA,
NP_HALF_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON |
NP_ORTHO | NP_TRI | NP_SSA)
PairStyle(hybrid/overlay/kk,PairHybridOverlayKokkos)
#else
#ifndef LMP_NPAIR_HALFFULL_NEWTON_SSA_H
#define LMP_NPAIR_HALFFULL_NEWTON_SSA_H
#ifndef LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H
#define LMP_PAIR_HYBRID_OVERLAY_KOKKOS_H
#include "npair.h"
#include "pair_hybrid_kokkos.h"
namespace LAMMPS_NS {
class NPairHalffullNewtonSSA : public NPair {
class PairHybridOverlayKokkos : public PairHybridKokkos {
public:
NPairHalffullNewtonSSA(class LAMMPS *);
~NPairHalffullNewtonSSA() {}
void build(class NeighList *);
PairHybridOverlayKokkos(class LAMMPS *);
virtual ~PairHybridOverlayKokkos() {}
void coeff(int, char **);
};
}
@ -41,4 +38,12 @@ class NPairHalffullNewtonSSA : public NPair {
/* ERROR/WARNING messages:
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair coeff for hybrid has invalid style
Style in pair coeff must have been listed in pair_style command.
*/

View File

@ -0,0 +1,998 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------------------------
Contributing authors:
Stan Moore (Sandia)
Please cite the related publications:
J.D. Moore, B.C. Barnes, S. Izvekov, M. Lisal, M.S. Sellers, D.E. Taylor & J.K. Brennan
"A coarse-grain force field for RDX: Density dependent and energy conserving"
The Journal of Chemical Physics, 2016, 144, 104501.
------------------------------------------------------------------------------------------- */
#include <mpi.h>
#include <math.h>
#include "math_const.h"
#include <stdlib.h>
#include <string.h>
#include "pair_multi_lucy_rx_kokkos.h"
#include "atom_kokkos.h"
#include "force.h"
#include "comm.h"
#include "neigh_list.h"
#include "memory.h"
#include "error.h"
#include "citeme.h"
#include "modify.h"
#include "fix.h"
#include "atom_masks.h"
#include "neigh_request.h"
using namespace LAMMPS_NS;
enum{NONE,RLINEAR,RSQ};
#define MAXLINE 1024
#ifdef DBL_EPSILON
#define MY_EPSILON (10.0*DBL_EPSILON)
#else
#define MY_EPSILON (10.0*2.220446049250313e-16)
#endif
#define oneFluidParameter (-1)
#define isOneFluid(_site) ( (_site) == oneFluidParameter )
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairMultiLucyRXKokkos<DeviceType>::PairMultiLucyRXKokkos(LAMMPS *lmp) : PairMultiLucyRX(lmp)
{
respa_enable = 0;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
update_table = 1;
h_table = new TableHost();
d_table = new TableDevice();
k_error_flag = DAT::tdual_int_scalar("pair:error_flag");
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairMultiLucyRXKokkos<DeviceType>::~PairMultiLucyRXKokkos()
{
if (copymode) return;
memory->destroy_kokkos(k_eatom,eatom);
memory->destroy_kokkos(k_vatom,vatom);
memory->destroy_kokkos(k_cutsq,cutsq);
delete h_table;
delete d_table;
tabindex = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::init_style()
{
PairMultiLucyRX::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with multi/lucy/rx/kk");
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
copymode = 1;
if (update_table)
create_kokkos_tables();
if (tabstyle == LOOKUP)
compute_style<LOOKUP>(eflag_in,vflag_in);
else if(tabstyle == LINEAR)
compute_style<LINEAR>(eflag_in,vflag_in);
copymode = 0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int TABSTYLE>
void PairMultiLucyRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
{
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag,0);
else evflag = vflag_fdotr = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
memory->destroy_kokkos(k_eatom,eatom);
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.template view<DeviceType>();
}
if (vflag_atom) {
memory->destroy_kokkos(k_vatom,vatom);
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.template view<DeviceType>();
}
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
rho = atomKK->k_rho.view<DeviceType>();
uCG = atomKK->k_uCG.view<DeviceType>();
uCGnew = atomKK->k_uCGnew.view<DeviceType>();
dvector = atomKK->k_dvector.view<DeviceType>();
atomKK->sync(execution_space,X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | DPDRHO_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK);
k_cutsq.template sync<DeviceType>();
nlocal = atom->nlocal;
int nghost = atom->nghost;
int newton_pair = force->newton_pair;
{
const int ntotal = nlocal + nghost;
if (ntotal > d_mixWtSite1.dimension_0()) {
d_mixWtSite1old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1old",ntotal);
d_mixWtSite2old = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2old",ntotal);
d_mixWtSite1 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite1",ntotal);
d_mixWtSite2 = typename AT::t_float_1d("PairMultiLucyRX::mixWtSite2",ntotal);
}
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXgetMixingWeights>(0,ntotal),*this);
}
const int inum = list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
computeLocalDensity();
// loop over neighbors of my atoms
EV_FLOAT ev;
if (neighflag == HALF) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,1,0,TABSTYLE> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALF,0,0,TABSTYLE> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,1,0,TABSTYLE> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<HALFTHREAD,0,0,TABSTYLE> >(0,inum),*this);
}
} else if (neighflag == FULL) {
if (newton_pair) {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,1,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,1,0,TABSTYLE> >(0,inum),*this);
} else {
if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,0,1,TABSTYLE> >(0,inum),*this,ev);
else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXCompute<FULL,0,0,TABSTYLE> >(0,inum),*this);
}
}
if (evflag) atomKK->modified(execution_space,F_MASK | ENERGY_MASK | VIRIAL_MASK | UCG_MASK | UCGNEW_MASK);
else atomKK->modified(execution_space,F_MASK | UCG_MASK | UCGNEW_MASK);
k_error_flag.template modify<DeviceType>();
k_error_flag.template sync<LMPHostType>();
if (k_error_flag.h_view() == 1)
error->one(FLERR,"Density < table inner cutoff");
else if (k_error_flag.h_view() == 2)
error->one(FLERR,"Density > table outer cutoff");
else if (k_error_flag.h_view() == 3)
error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx");
if (eflag_global) eng_vdwl += ev.evdwl;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXgetMixingWeights, const int &i) const {
getMixingWeights(i, d_mixWtSite1old[i], d_mixWtSite2old[i], d_mixWtSite1[i], d_mixWtSite2[i]);
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii, EV_FLOAT& ev) const {
// The f array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
int i,jj,jnum,itype,jtype,itable;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair;
double rsq;
double mixWtSite1old_i,mixWtSite1old_j;
double mixWtSite2old_i,mixWtSite2old_j;
double mixWtSite1_i;
double pi = MathConst::MY_PI;
double A_i, A_j;
double fraction_i,fraction_j;
int jtable;
int tlm1 = tablength - 1;
i = d_ilist[ii];
xtmp = x(i,0);
ytmp = x(i,1);
ztmp = x(i,2);
itype = type[i];
jnum = d_numneigh[i];
double fx_i = 0.0;
double fy_i = 0.0;
double fz_i = 0.0;
mixWtSite1old_i = d_mixWtSite1old[i];
mixWtSite2old_i = d_mixWtSite2old[i];
mixWtSite1_i = d_mixWtSite1[i];
for (jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
delx = xtmp - x(j,0);
dely = ytmp - x(j,1);
delz = ztmp - x(j,2);
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < d_cutsq(itype,jtype)) { // optimize
fpair = 0.0;
mixWtSite1old_j = d_mixWtSite1old[j];
mixWtSite2old_j = d_mixWtSite2old[j];
//tb = &tables[tabindex[itype][jtype]];
const int tidx = d_table_const.tabindex(itype,jtype);
//if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){
if (rho[i]*rho[i] < d_table_const.innersq(tidx) || rho[j]*rho[j] < d_table_const.innersq(tidx)){
k_error_flag.template view<DeviceType>()() = 1;
}
if (TABSTYLE == LOOKUP) {
//itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
jtable = static_cast<int> (((rho[j]*rho[j]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
if (itable >= tlm1 || jtable >= tlm1){
k_error_flag.template view<DeviceType>()() = 2;
}
//A_i = tb->f[itable];
A_i = d_table_const.f(tidx,itable);
//A_j = tb->f[jtable];
A_j = d_table_const.f(tidx,jtable);
const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype));
fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor;
fpair /= sqrt(rsq);
} else if (TABSTYLE == LINEAR) {
//itable = static_cast<int> ((rho[i]*rho[i] - tb->innersq) * tb->invdelta);
itable = static_cast<int> ((rho[i]*rho[i] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//jtable = static_cast<int> (((rho[j]*rho[j]) - tb->innersq) * tb->invdelta);
jtable = static_cast<int> ((rho[j]*rho[j] - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
if (itable >= tlm1 || jtable >= tlm1){
k_error_flag.template view<DeviceType>()() = 2;
}
if(itable<0) itable=0;
if(itable>=tlm1) itable=tlm1;
if(jtable<0) jtable=0;
if(jtable>=tlm1)jtable=tlm1;
//fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx));
//fraction_j = (((rho[j]*rho[j]) - tb->rsq[jtable]) * tb->invdelta);
fraction_j = (((rho[j]*rho[j]) - d_table_const.rsq(tidx,jtable)) * d_table_const.invdelta(tidx));
if(itable==0) fraction_i=0.0;
if(itable==tlm1) fraction_i=0.0;
if(jtable==0) fraction_j=0.0;
if(jtable==tlm1) fraction_j=0.0;
//A_i = tb->f[itable] + fraction_i*tb->df[itable];
A_i = d_table_const.f(tidx,itable) + fraction_i*d_table_const.df(tidx,itable);
//A_j = tb->f[jtable] + fraction_j*tb->df[jtable];
A_j = d_table_const.f(tidx,jtable) + fraction_j*d_table_const.df(tidx,jtable);
const double rfactor = 1.0-sqrt(rsq/d_cutsq(itype,jtype));
fpair = 0.5*(A_i + A_j)*(4.0-3.0*rfactor)*rfactor*rfactor*rfactor;
fpair /= sqrt(rsq);
} else k_error_flag.template view<DeviceType>()() = 3;
if (isite1 == isite2) fpair = sqrt(mixWtSite1old_i*mixWtSite2old_j)*fpair;
else fpair = (sqrt(mixWtSite1old_i*mixWtSite2old_j) + sqrt(mixWtSite2old_i*mixWtSite1old_j))*fpair;
fx_i += delx*fpair;
fy_i += dely*fpair;
fz_i += delz*fpair;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
a_f(j,0) -= delx*fpair;
a_f(j,1) -= dely*fpair;
a_f(j,2) -= delz*fpair;
}
//if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,0.0,fpair,delx,dely,delz);
if (EVFLAG) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,0.0,fpair,delx,dely,delz);
}
}
a_f(i,0) += fx_i;
a_f(i,1) += fy_i;
a_f(i,2) += fz_i;
//tb = &tables[tabindex[itype][itype]];
const int tidx = d_table_const.tabindex(itype,itype);
//itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
itable = static_cast<int> (((rho[i]*rho[i]) - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
//if (TABSTYLE == LOOKUP) evdwl = tb->e[itable];
if (TABSTYLE == LOOKUP) {
evdwl = d_table_const.e(tidx,itable);
} else if (TABSTYLE == LINEAR) {
if (itable >= tlm1){
k_error_flag.template view<DeviceType>()() = 2;
}
if(itable==0) fraction_i=0.0;
//else fraction_i = (((rho[i]*rho[i]) - tb->rsq[itable]) * tb->invdelta);
else fraction_i = (((rho[i]*rho[i]) - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx));
//evdwl = tb->e[itable] + fraction_i*tb->de[itable];
evdwl = d_table_const.e(tidx,itable) + fraction_i*d_table_const.de(tidx,itable);
} else k_error_flag.template view<DeviceType>()() = 3;
evdwl *=(pi*d_cutsq(itype,itype)*d_cutsq(itype,itype))/84.0;
evdwlOld = mixWtSite1old_i*evdwl;
evdwl = mixWtSite1_i*evdwl;
uCG[i] += evdwlOld;
uCGnew[i] += evdwl;
evdwl = evdwlOld;
//if (evflag) ev_tally(0,0,nlocal,newton_pair,evdwl,0.0,0.0,0.0,0.0,0.0);
if (EVFLAG)
ev.evdwl += ((/*FIXME??? (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && */ NEWTON_PAIR)?1.0:0.5)*evdwl;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int &ii) const {
EV_FLOAT ev;
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::computeLocalDensity()
{
x = atomKK->k_x.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
rho = atomKK->k_rho.view<DeviceType>();
h_rho = atomKK->k_rho.h_view;
nlocal = atom->nlocal;
atomKK->sync(execution_space,X_MASK | TYPE_MASK | DPDRHO_MASK);
const int inum = list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
const double pi = MathConst::MY_PI;
const bool newton_pair = force->newton_pair;
const bool one_type = (atom->ntypes == 1);
// Special cut-off values for when there's only one type.
cutsq_type11 = cutsq[1][1];
rcut_type11 = sqrt(cutsq_type11);
factor_type11 = 84.0/(5.0*pi*rcut_type11*rcut_type11*rcut_type11);
// zero out density
int m = nlocal;
if (newton_pair) m += atom->nghost;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXZero>(0,m),*this);
// rho = density at each atom
// loop over neighbors of my atoms
if (neighflag == HALF) {
if (newton_pair)
if (one_type)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1,true> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,1,false> >(0,inum),*this);
else
if (one_type)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0,true> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALF,0,false> >(0,inum),*this);
} else if (neighflag == HALFTHREAD) {
if (newton_pair)
if (one_type)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,1,true> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,1,false> >(0,inum),*this);
else
if (one_type)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,0,true> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<HALFTHREAD,0,false> >(0,inum),*this);
} else if (neighflag == FULL) {
if (newton_pair)
if (one_type)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,1,true> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,1,false> >(0,inum),*this);
else
if (one_type)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,0,true> >(0,inum),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXComputeLocalDensity<FULL,0,false> >(0,inum),*this);
}
atomKK->modified(execution_space,DPDRHO_MASK);
// communicate and sum densities (on the host)
if (newton_pair)
comm->reverse_comm_pair(this);
comm->forward_comm_pair(this);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXZero, const int &i) const {
rho[i] = 0.0;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXComputeLocalDensity<NEIGHFLAG,NEWTON_PAIR,ONE_TYPE>, const int &ii) const {
// The rho array is atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_rho = rho;
const int i = d_ilist[ii];
const double xtmp = x(i,0);
const double ytmp = x(i,1);
const double ztmp = x(i,2);
double rho_i_contrib = 0.0;
const int itype = type[i];
const int jnum = d_numneigh[i];
const double pi = MathConst::MY_PI;
for (int jj = 0; jj < jnum; jj++){
const int j = (d_neighbors(i,jj) & NEIGHMASK);
const int jtype = type[j];
const double delx = xtmp - x(j,0);
const double dely = ytmp - x(j,1);
const double delz = ztmp - x(j,2);
const double rsq = delx*delx + dely*dely + delz*delz;
if (ONE_TYPE) {
if (rsq < cutsq_type11) {
const double rcut = rcut_type11;
const double r_over_rcut = sqrt(rsq) / rcut;
const double tmpFactor = 1.0 - r_over_rcut;
const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4;
rho_i_contrib += factor;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal))
a_rho[j] += factor;
}
} else if (rsq < d_cutsq(itype,jtype)) {
const double rcut = sqrt(d_cutsq(itype,jtype));
const double tmpFactor = 1.0-sqrt(rsq)/rcut;
const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4;
rho_i_contrib += factor;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal))
a_rho[j] += factor;
}
}
a_rho[i] += rho_i_contrib;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2) const
{
double fractionOFAold, fractionOFA;
double fractionOld1, fraction1;
double fractionOld2, fraction2;
double nMoleculesOFAold, nMoleculesOFA;
double nMoleculesOld1, nMolecules1;
double nMoleculesOld2, nMolecules2;
double nTotal, nTotalOld;
nTotal = 0.0;
nTotalOld = 0.0;
for (int ispecies = 0; ispecies < nspecies; ispecies++){
nTotal += dvector(ispecies,id);
nTotalOld += dvector(ispecies+nspecies,id);
}
if (isOneFluid(isite1) == false){
nMoleculesOld1 = dvector(isite1+nspecies,id);
nMolecules1 = dvector(isite1,id);
fractionOld1 = nMoleculesOld1/nTotalOld;
fraction1 = nMolecules1/nTotal;
}
if (isOneFluid(isite2) == false){
nMoleculesOld2 = dvector(isite2+nspecies,id);
nMolecules2 = dvector(isite2,id);
fractionOld2 = nMoleculesOld2/nTotalOld;
fraction2 = nMolecules2/nTotal;
}
if (isOneFluid(isite1) || isOneFluid(isite2)){
nMoleculesOFAold = 0.0;
nMoleculesOFA = 0.0;
fractionOFAold = 0.0;
fractionOFA = 0.0;
for (int ispecies = 0; ispecies < nspecies; ispecies++){
if (isite1 == ispecies || isite2 == ispecies) continue;
nMoleculesOFAold += dvector(ispecies+nspecies,id);
nMoleculesOFA += dvector(ispecies,id);
fractionOFAold += dvector(ispecies+nspecies,id) / nTotalOld;
fractionOFA += dvector(ispecies,id) / nTotal;
}
if (isOneFluid(isite1)){
nMoleculesOld1 = 1.0-(nTotalOld-nMoleculesOFAold);
nMolecules1 = 1.0-(nTotal-nMoleculesOFA);
fractionOld1 = fractionOFAold;
fraction1 = fractionOFA;
}
if (isOneFluid(isite2)){
nMoleculesOld2 = 1.0-(nTotalOld-nMoleculesOFAold);
nMolecules2 = 1.0-(nTotal-nMoleculesOFA);
fractionOld2 = fractionOFAold;
fraction2 = fractionOFA;
}
}
if(fractionalWeighting){
mixWtSite1old = fractionOld1;
mixWtSite1 = fraction1;
mixWtSite2old = fractionOld2;
mixWtSite2 = fraction2;
} else {
mixWtSite1old = nMoleculesOld1;
mixWtSite1 = nMolecules1;
mixWtSite2old = nMoleculesOld2;
mixWtSite2 = nMolecules2;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
int pbc_flag, int *pbc)
{
atomKK->sync(execution_space,DPDRHO_MASK);
d_sendlist = k_sendlist.view<DeviceType>();
iswap = iswap_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXPackForwardComm>(0,n),*this);
return n;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXPackForwardComm, const int &i) const {
int j = d_sendlist(iswap, i);
v_buf[i] = rho[j];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
{
first = first_in;
v_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairMultiLucyRXUnpackForwardComm>(0,n),*this);
atomKK->modified(execution_space,DPDRHO_MASK);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXUnpackForwardComm, const int &i) const {
rho[i + first] = v_buf[i];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
{
int i,j,m;
atomKK->sync(Host,DPDRHO_MASK);
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = h_rho[j];
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) h_rho[i] = buf[m++];
atomKK->modified(Host,DPDRHO_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairMultiLucyRXKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
{
int i,m,last;
atomKK->sync(Host,DPDRHO_MASK);
m = 0;
last = first + n;
for (i = first; i < last; i++) buf[m++] = h_rho[i];
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
{
int i,j,m;
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
h_rho[j] += buf[m++];
}
atomKK->modified(Host,DPDRHO_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void PairMultiLucyRXKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = eflag;
const int VFLAG = vflag_either;
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
if (EFLAG) {
if (eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
} else {
v_eatom[i] += epairhalf;
}
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (vflag_global) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (NEWTON_PAIR || j < nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
} else {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
}
if (vflag_atom) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) {
v_vatom(i,0) += 0.5*v0;
v_vatom(i,1) += 0.5*v1;
v_vatom(i,2) += 0.5*v2;
v_vatom(i,3) += 0.5*v3;
v_vatom(i,4) += 0.5*v4;
v_vatom(i,5) += 0.5*v5;
}
if (NEWTON_PAIR || j < nlocal) {
v_vatom(j,0) += 0.5*v0;
v_vatom(j,1) += 0.5*v1;
v_vatom(j,2) += 0.5*v2;
v_vatom(j,3) += 0.5*v3;
v_vatom(j,4) += 0.5*v4;
v_vatom(j,5) += 0.5*v5;
}
} else {
v_vatom(i,0) += 0.5*v0;
v_vatom(i,1) += 0.5*v1;
v_vatom(i,2) += 0.5*v2;
v_vatom(i,3) += 0.5*v3;
v_vatom(i,4) += 0.5*v4;
v_vatom(i,5) += 0.5*v5;
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::create_kokkos_tables()
{
const int tlm1 = tablength-1;
memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq");
memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
if(tabstyle == LOOKUP) {
memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e");
memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f");
}
if(tabstyle == LINEAR) {
memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df");
}
for(int i=0; i < ntables; i++) {
Table* tb = &tables[i];
h_table->innersq[i] = tb->innersq;
h_table->invdelta[i] = tb->invdelta;
for(int j = 0; j<h_table->rsq.dimension_1(); j++)
h_table->rsq(i,j) = tb->rsq[j];
for(int j = 0; j<h_table->e.dimension_1(); j++)
h_table->e(i,j) = tb->e[j];
for(int j = 0; j<h_table->de.dimension_1(); j++)
h_table->de(i,j) = tb->de[j];
for(int j = 0; j<h_table->f.dimension_1(); j++)
h_table->f(i,j) = tb->f[j];
for(int j = 0; j<h_table->df.dimension_1(); j++)
h_table->df(i,j) = tb->df[j];
}
Kokkos::deep_copy(d_table->innersq,h_table->innersq);
Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
Kokkos::deep_copy(d_table->rsq,h_table->rsq);
Kokkos::deep_copy(d_table->e,h_table->e);
Kokkos::deep_copy(d_table->de,h_table->de);
Kokkos::deep_copy(d_table->f,h_table->f);
Kokkos::deep_copy(d_table->df,h_table->df);
Kokkos::deep_copy(d_table->tabindex,h_table->tabindex);
d_table_const.innersq = d_table->innersq;
d_table_const.invdelta = d_table->invdelta;
d_table_const.rsq = d_table->rsq;
d_table_const.e = d_table->e;
d_table_const.de = d_table->de;
d_table_const.f = d_table->f;
d_table_const.df = d_table->df;
update_table = 0;
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::allocate()
{
allocated = 1;
const int nt = atom->ntypes + 1;
memory->create(setflag,nt,nt,"pair:setflag");
memory->create_kokkos(k_cutsq,cutsq,nt,nt,"pair:cutsq");
d_cutsq = k_cutsq.template view<DeviceType>();
k_cutsq.template modify<LMPHostType>();
memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex");
d_table_const.tabindex = d_table->tabindex;
memset(&setflag[0][0],0,nt*nt*sizeof(int));
memset(&cutsq[0][0],0,nt*nt*sizeof(double));
memset(&tabindex[0][0],0,nt*nt*sizeof(int));
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
template<class DeviceType>
void PairMultiLucyRXKokkos<DeviceType>::settings(int narg, char **arg)
{
if (narg < 2) error->all(FLERR,"Illegal pair_style command");
// new settings
if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP;
else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR;
else error->all(FLERR,"Unknown table style in pair_style command");
tablength = force->inumeric(FLERR,arg[1]);
if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries");
// optional keywords
int iarg = 2;
while (iarg < narg) {
if (strcmp(arg[iarg],"fractional") == 0) fractionalWeighting = true;
else if (strcmp(arg[iarg],"molecular") == 0) fractionalWeighting = false;
else error->all(FLERR,"Illegal pair_style command");
iarg++;
}
// delete old tables, since cannot just change settings
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
memory->sfree(tables);
if (allocated) {
memory->destroy(setflag);
d_table_const.tabindex = d_table->tabindex = typename ArrayTypes<DeviceType>::t_int_2d();
h_table->tabindex = typename ArrayTypes<LMPHostType>::t_int_2d();
}
allocated = 0;
ntables = 0;
tables = NULL;
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class PairMultiLucyRXKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairMultiLucyRXKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,266 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(multi/lucy/rx/kk,PairMultiLucyRXKokkos<LMPDeviceType>)
PairStyle(multi/lucy/rx/kk/device,PairMultiLucyRXKokkos<LMPDeviceType>)
PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_MULTI_LUCY_RX_KOKKOS_H
#define LMP_PAIR_MULTI_LUCY_RX_KOKKOS_H
#include "pair_multi_lucy_rx.h"
#include "pair_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
struct TagPairMultiLucyRXPackForwardComm{};
struct TagPairMultiLucyRXUnpackForwardComm{};
struct TagPairMultiLucyRXgetMixingWeights{};
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
struct TagPairMultiLucyRXCompute{};
struct TagPairMultiLucyRXZero{};
template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE>
struct TagPairMultiLucyRXComputeLocalDensity{};
template<class DeviceType>
class PairMultiLucyRXKokkos : public PairMultiLucyRX {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
PairMultiLucyRXKokkos(class LAMMPS *);
virtual ~PairMultiLucyRXKokkos();
void compute(int, int);
void settings(int, char **);
template<int TABSTYLE>
void compute_style(int, int);
void init_style();
int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
int, int *);
void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
int pack_forward_comm(int, int *, double *, int, int *);
void unpack_forward_comm(int, int, double *);
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
void computeLocalDensity();
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXPackForwardComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXUnpackForwardComm, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXgetMixingWeights, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&, EV_FLOAT&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int TABSTYLE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,TABSTYLE>, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXZero, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairMultiLucyRXComputeLocalDensity<NEIGHFLAG,NEWTON_PAIR,ONE_TYPE>, const int&) const;
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const;
private:
int nlocal;
int neighflag;
int eflag,vflag;
double cutsq_type11;
double rcut_type11;
double factor_type11;
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
//struct Table {
// int ninput,rflag,fpflag,match;
// double rlo,rhi,fplo,fphi,cut;
// double *rfile,*efile,*ffile;
// double *e2file,*f2file;
// double innersq,delta,invdelta,deltasq6;
// double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
//};
/*struct TableDeviceConst {
typename AT::t_int_2d_randomread tabindex;
typename AT::t_ffloat_1d_randomread innersq,invdelta;
typename AT::t_ffloat_2d_randomread rsq,e,de,f,df;
};*/
//Its faster not to use texture fetch if the number of tables is less than 32!
struct TableDeviceConst {
typename AT::t_int_2d tabindex;
typename AT::t_ffloat_1d innersq,invdelta;
typename AT::t_ffloat_2d_randomread rsq,e,de,f,df;
};
struct TableDevice {
typename AT::t_int_2d tabindex;
typename AT::t_ffloat_1d innersq,invdelta;
typename AT::t_ffloat_2d rsq,e,de,f,df;
};
struct TableHost {
HAT::t_int_2d tabindex;
HAT::t_ffloat_1d innersq,invdelta;
HAT::t_ffloat_2d rsq,e,de,f,df;
};
TableDeviceConst d_table_const;
TableDevice* d_table;
TableHost* h_table;
F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
void allocate();
int update_table;
void create_kokkos_tables();
KOKKOS_INLINE_FUNCTION
void getMixingWeights(int, double &, double &, double &, double &) const;
typename AT::t_float_1d d_mixWtSite1old,d_mixWtSite2old,d_mixWtSite1,d_mixWtSite2;
typename AT::t_x_array_randomread x;
typename AT::t_f_array f;
typename AT::t_int_1d_randomread type;
typename AT::t_efloat_1d rho;
typename HAT::t_efloat_1d h_rho;
typename AT::t_efloat_1d uCG, uCGnew;
typename AT::t_float_2d dvector;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename AT::t_efloat_1d d_eatom;
typename AT::t_virial_array d_vatom;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh;
DAT::tdual_int_scalar k_error_flag;
typename AT::tdual_ffloat_2d k_cutsq;
typename AT::t_ffloat_2d d_cutsq;
int iswap;
int first;
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um v_buf;
friend void pair_virial_fdotr_compute<PairMultiLucyRXKokkos>(PairMultiLucyRXKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Pair multi/lucy/rx command requires atom_style with density (e.g. dpd, meso)
Self-explanatory
E: Density < table inner cutoff
The local density inner is smaller than the inner cutoff
E: Density > table inner cutoff
The local density inner is greater than the inner cutoff
E: Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx
Self-explanatory
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Unknown table style in pair_style command
Self-explanatory
E: Illegal number of pair table entries
There must be at least 2 table entries.
E: Illegal pair_coeff command
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: PairMultiLucyRXKokkos requires a fix rx command
The fix rx command must come before the pair style command in the input file
E: There are no rx species specified
There must be at least one species specified through the fix rx command
E: Invalid pair table length
Length of read-in pair table is invalid
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: Cannot open file %s
The specified file cannot be opened. Check that the path and name are
correct.
E: Did not find keyword in table file
Keyword used in pair_coeff command was not found in table file.
E: Invalid keyword in pair table parameters
Keyword used in list of table parameters is not recognized.
E: Pair table parameters did not set N
List of pair table parameters must include N setting.
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(table/rx/kk,PairTableRXKokkos<LMPDeviceType>)
PairStyle(table/rx/kk/device,PairTableRXKokkos<LMPDeviceType>)
PairStyle(table/rx/kk/host,PairTableRXKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_TABLE_RX_KOKKOS_H
#define LMP_PAIR_TABLE_RX_KOKKOS_H
#include "pair_table_kokkos.h"
#include "kokkos_few.h"
namespace LAMMPS_NS {
template<class DeviceType>
class PairTableRXKokkos : public PairTable {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
typedef DeviceType device_type;
PairTableRXKokkos(class LAMMPS *);
virtual ~PairTableRXKokkos();
virtual void compute(int, int);
template<int TABSTYLE>
void compute_style(int, int);
void settings(int, char **);
void coeff(int, char **);
double init_one(int, int);
virtual double single(int, int, int, int, double, double, double, double &);
void init_style();
struct TableDeviceConst {
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d_randomread rsq,drsq,e,de,f,df,e2,f2;
};
struct TableDevice {
typename ArrayTypes<DeviceType>::t_ffloat_2d cutsq;
typename ArrayTypes<DeviceType>::t_int_2d tabindex;
typename ArrayTypes<DeviceType>::t_int_1d nshiftbits,nmask;
typename ArrayTypes<DeviceType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<DeviceType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
};
struct TableHost {
typename ArrayTypes<LMPHostType>::t_ffloat_2d cutsq;
typename ArrayTypes<LMPHostType>::t_int_2d tabindex;
typename ArrayTypes<LMPHostType>::t_int_1d nshiftbits,nmask;
typename ArrayTypes<LMPHostType>::t_ffloat_1d innersq,invdelta,deltasq6;
typename ArrayTypes<LMPHostType>::t_ffloat_2d rsq,drsq,e,de,f,df,e2,f2;
};
TableDeviceConst d_table_const;
TableDevice* d_table;
TableHost* h_table;
Few<Few<F_FLOAT, MAX_TYPES_STACKPARAMS+1>, MAX_TYPES_STACKPARAMS+1> m_cutsq;
typename ArrayTypes<DeviceType>::t_ffloat_2d d_cutsq;
virtual void allocate();
void compute_table(Table *);
typename ArrayTypes<DeviceType>::t_x_array_randomread x;
typename ArrayTypes<DeviceType>::t_f_array f;
int neighflag;
int update_table;
void create_kokkos_tables();
void cleanup_copy();
friend void pair_virial_fdotr_compute<PairTableRXKokkos>(PairTableRXKokkos*);
/* PairTableRX members */
Kokkos::View<double*, DeviceType> mixWtSite1old;
Kokkos::View<double*, DeviceType> mixWtSite2old;
Kokkos::View<double*, DeviceType> mixWtSite1;
Kokkos::View<double*, DeviceType> mixWtSite2;
int nspecies;
char *site1, *site2;
int isite1, isite2;
bool fractionalWeighting;
typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom;
typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,72 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "comm.h"
#include "rand_pool_wrap_kokkos.h"
#include "lammps.h"
#include "kokkos.h"
#include "random_mars.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
RandPoolWrap::RandPoolWrap(int, LAMMPS *lmp) : Pointers(lmp)
{
random_thr = NULL;
nthreads = lmp->kokkos->num_threads;
}
/* ---------------------------------------------------------------------- */
RandPoolWrap::~RandPoolWrap()
{
}
void RandPoolWrap::destroy()
{
if (random_thr) {
for (int i=1; i < nthreads; ++i)
delete random_thr[i];
delete[] random_thr;
random_thr = NULL;
}
}
void RandPoolWrap::init(RanMars* random, int seed)
{
// deallocate pool of RNGs
if (random_thr) {
for (int i=1; i < this->nthreads; ++i)
delete random_thr[i];
delete[] random_thr;
}
// allocate pool of RNGs
// generate a random number generator instance for
// all threads != 0. make sure we use unique seeds.
nthreads = lmp->kokkos->num_threads;
random_thr = new RanMars*[nthreads];
for (int tid = 1; tid < nthreads; ++tid) {
random_thr[tid] = new RanMars(lmp, seed + comm->me
+ comm->nprocs*tid);
}
// to ensure full compatibility with the serial style
// we use the serial random number generator instance for thread 0
random_thr[0] = random;
}

View File

@ -0,0 +1,83 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef RAND_POOL_WRAP_H
#define RAND_POOL_WRAP_H
#include "pointers.h"
#include "kokkos_type.h"
#include "random_mars.h"
#include "error.h"
namespace LAMMPS_NS {
struct RandWrap {
class RanMars* rng;
KOKKOS_INLINE_FUNCTION
RandWrap() {
rng = NULL;
}
KOKKOS_INLINE_FUNCTION
double drand() {
return rng->uniform();
}
KOKKOS_INLINE_FUNCTION
double normal() {
return rng->gaussian();
}
};
class RandPoolWrap : protected Pointers {
public:
RandPoolWrap(int, class LAMMPS *);
~RandPoolWrap();
void destroy();
void init(RanMars*, int);
KOKKOS_INLINE_FUNCTION
RandWrap get_state() const
{
#ifdef KOKKOS_HAVE_CUDA
error->all(FLERR,"Cannot use Marsaglia RNG with GPUs");
#endif
RandWrap rand_wrap;
int tid = 0;
#ifndef KOKKOS_HAVE_CUDA
tid = LMPDeviceType::hardware_thread_id();
#endif
rand_wrap.rng = random_thr[tid];
return rand_wrap;
}
KOKKOS_INLINE_FUNCTION
void free_state(RandWrap) const
{
}
private:
class RanMars **random_thr;
int nthreads;
};
}
#endif
/* ERROR/WARNING messages:
*/

View File

@ -67,7 +67,6 @@ void RegBlockKokkos<DeviceType>::match_all_kokkos(int groupbit_in, DAT::tdual_in
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagRegBlockMatchAll>(0,nlocal),*this);
DeviceType::fence();
copymode = 0;
k_match_in.template modify<DeviceType>();

View File

@ -0,0 +1,116 @@
# mpi = MPI with its default compiler
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpicxx
CCFLAGS = -g -O3 -Wall -Wextra -frounding-math -fsignaling-nans -march=native
SHFLAGS = -shared -MD -mcmodel=medium -fpic -fPIC
DEPFLAGS = -M
LINK = mpicxx
LINKFLAGS = -g -O
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP
#LMP_INC += -DLAMMPS_JPEG
LMP_INC += -DLAMMPS_MEMALIGN=64
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manual
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
FFT_PATH =
FFT_LIB =
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB =
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -542,8 +542,8 @@ void DumpCustomMPIIO::write_string(int n, double *mybuf)
#if defined(_OPENMP)
int nthreads = omp_get_max_threads();
if (nthreads > 1)
nsme = convert_string_omp(n,mybuf);
if ((nthreads > 1) && !(lmp->kokkos))
nsme = convert_string_omp(n,mybuf); // not (yet) compatible with Kokkos
else
nsme = convert_string(n,mybuf);
#else

View File

@ -16,6 +16,9 @@ style_region.h
style_neigh_bin.h
style_neigh_pair.h
style_neigh_stencil.h
# deleted on 5 September 2017
npair_halffull_newton_ssa.cpp
npair_halffull_newton_ssa.h
# deleted on 6 June 2017
pair_lj_sf.cpp
pair_lj_sf.h

View File

@ -34,6 +34,8 @@ FixDPDenergy::FixDPDenergy(LAMMPS *lmp, int narg, char **arg) :
pairDPDE = NULL;
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1);
if (pairDPDE == NULL)
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1);
if (pairDPDE == NULL)
error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix dpd/energy");

View File

@ -150,6 +150,8 @@ FixEOStableRX::FixEOStableRX(LAMMPS *lmp, int narg, char **arg) :
FixEOStableRX::~FixEOStableRX()
{
if (copymode) return;
for (int m = 0; m < ntables; m++) {
free_table(&tables[m]);
free_table(&tables2[m]);

View File

@ -220,6 +220,9 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
FixRX::~FixRX()
{
//printf("Inside FixRX::~FixRX copymode= %d\n", copymode);
if (copymode) return;
// De-Allocate memory to prevent memory leak
for (int ii = 0; ii < nreactions; ii++){
delete [] stoich[ii];
@ -370,11 +373,11 @@ void FixRX::post_constructor()
newarg2[nspecies+3] = (char *) "ghost";
newarg2[nspecies+4] = (char *) "yes";
modify->add_fix(nspecies+5,newarg);
modify->add_fix(nspecies+5,newarg,1);
fix_species = (FixPropertyAtom *) modify->fix[modify->nfix-1];
restartFlag = modify->fix[modify->nfix-1]->restart_reset;
modify->add_fix(nspecies+5,newarg2);
modify->add_fix(nspecies+5,newarg2,1);
fix_species_old = (FixPropertyAtom *) modify->fix[modify->nfix-1];
if(nspecies==0) error->all(FLERR,"There are no rx species specified.");
@ -634,6 +637,9 @@ int FixRX::setmask()
void FixRX::init()
{
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1);
if (pairDPDE == NULL)
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1);
if (pairDPDE == NULL)
error->all(FLERR,"Must use pair_style dpd/fdt/energy with fix rx");
@ -669,7 +675,17 @@ void FixRX::setup_pre_force(int vflag)
if(restartFlag){
restartFlag = 0;
} else {
}
else
{
int ode_counter[4] = {0};
UserRHSData userData;
userData.kFor = new double[nreactions];
userData.rxnRateLaw = new double[nreactions];
double *rwork = new double[8*nspecies];
if(localTempFlag){
int count = nlocal + (newton_pair ? nghost : 0);
dpdThetaLocal = new double[count];
@ -682,22 +698,27 @@ void FixRX::setup_pre_force(int vflag)
tmp = atom->dvector[ispecies][id];
atom->dvector[ispecies+nspecies][id] = tmp;
}
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit){
// Set the reaction rate constants to zero: no reactions occur at step 0
for(int irxn=0;irxn<nreactions;irxn++)
kR[irxn] = 0.0;
userData.kFor[irxn] = 0.0;
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
rk4(i,NULL);
rk4(i, rwork, &userData);
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
rkf45(i,NULL);
rkf45(i, rwork, &userData, ode_counter);
}
// Communicate the updated momenta and velocities to all nodes
comm->forward_comm_fix(this);
if(localTempFlag) delete [] dpdThetaLocal;
delete [] userData.kFor;
delete [] userData.rxnRateLaw;
delete [] rwork;
}
}
@ -705,12 +726,13 @@ void FixRX::setup_pre_force(int vflag)
void FixRX::pre_force(int vflag)
{
TimerType timer_start = getTimeStamp();
int nlocal = atom->nlocal;
int nghost = atom->nghost;
int *mask = atom->mask;
double *dpdTheta = atom->dpdTheta;
int newton_pair = force->newton_pair;
double theta;
if(localTempFlag){
int count = nlocal + (newton_pair ? nghost : 0);
@ -722,7 +744,10 @@ void FixRX::pre_force(int vflag)
TimerType timer_localTemperature = getTimeStamp();
// Zero the counters for the ODE solvers.
this->nSteps = this->nIters = this->nFuncs = this->nFails = 0;
int nSteps = 0;
int nIters = 0;
int nFuncs = 0;
int nFails = 0;
if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1)
{
@ -730,10 +755,23 @@ void FixRX::pre_force(int vflag)
memory->create( diagnosticCounterPerODE[FuncSum], nlocal, "FixRX::diagnosticCounterPerODE");
}
double *rwork = new double[8*nspecies + nreactions];
//#pragma omp parallel \
// reduction(+: nSteps, nIters, nFuncs, nFails )
{
double *rwork = new double[8*nspecies];
UserRHSData userData;
userData.kFor = new double[nreactions];
userData.rxnRateLaw = new double[nreactions];
int ode_counter[4] = { 0 };
//#pragma omp for schedule(runtime)
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit){
{
if (mask[i] & groupbit)
{
double theta;
if (localTempFlag)
theta = dpdThetaLocal[i];
else
@ -741,24 +779,42 @@ void FixRX::pre_force(int vflag)
//Compute the reaction rate constants
for (int irxn = 0; irxn < nreactions; irxn++)
kR[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta);
userData.kFor[irxn] = Arr[irxn]*pow(theta,nArr[irxn])*exp(-Ea[irxn]/force->boltz/theta);
if (odeIntegrationFlag == ODE_LAMMPS_RK4)
rk4(i,rwork);
rk4(i, rwork, &userData);
else if (odeIntegrationFlag == ODE_LAMMPS_RKF45)
rkf45(i,rwork);
rkf45(i, rwork, &userData, ode_counter);
}
}
TimerType timer_ODE = getTimeStamp();
nSteps += ode_counter[0];
nIters += ode_counter[1];
nFuncs += ode_counter[2];
nFails += ode_counter[3];
delete [] rwork;
delete [] userData.kFor;
delete [] userData.rxnRateLaw;
} // end parallel region
TimerType timer_ODE = getTimeStamp();
// Communicate the updated momenta and velocities to all nodes
comm->forward_comm_fix(this);
if(localTempFlag) delete [] dpdThetaLocal;
TimerType timer_stop = getTimeStamp();
double time_ODE = getElapsedTime(timer_localTemperature, timer_ODE);
//printf("me= %d total= %g temp= %g ode= %g comm= %g nlocal= %d nfc= %d %d\n", comm->me,
// getElapsedTime(timer_start, timer_stop),
// getElapsedTime(timer_start, timer_localTemperature),
// getElapsedTime(timer_localTemperature, timer_ODE),
// getElapsedTime(timer_ODE, timer_stop), nlocal, nFuncs, nSteps);
// Warn the user if a failure was detected in the ODE solver.
if (nFails > 0){
char sbuf[128];
@ -954,21 +1010,15 @@ void FixRX::setupParams()
/* ---------------------------------------------------------------------- */
void FixRX::rk4(int id, double *rwork)
void FixRX::rk4(int id, double *rwork, void* v_params)
{
double *k1 = NULL;
if (rwork == NULL)
k1 = new double[6*nspecies + nreactions];
else
k1 = rwork;
double *k1 = rwork;
double *k2 = k1 + nspecies;
double *k3 = k2 + nspecies;
double *k4 = k3 + nspecies;
double *y = k4 + nspecies;
double *yp = y + nspecies;
double *dummyArray = yp + nspecies; // Passed to the rhs function.
const int numSteps = minSteps;
const double h = update->dt / double(numSteps);
@ -985,25 +1035,25 @@ void FixRX::rk4(int id, double *rwork)
for (int step = 0; step < numSteps; step++)
{
// k1
rhs(0.0,y,k1,dummyArray);
rhs(0.0,y,k1,v_params);
// k2
for (int ispecies = 0; ispecies < nspecies; ispecies++)
yp[ispecies] = y[ispecies] + 0.5*h*k1[ispecies];
rhs(0.0,yp,k2,dummyArray);
rhs(0.0,yp,k2,v_params);
// k3
for (int ispecies = 0; ispecies < nspecies; ispecies++)
yp[ispecies] = y[ispecies] + 0.5*h*k2[ispecies];
rhs(0.0,yp,k3,dummyArray);
rhs(0.0,yp,k3,v_params);
// k4
for (int ispecies = 0; ispecies < nspecies; ispecies++)
yp[ispecies] = y[ispecies] + h*k3[ispecies];
rhs(0.0,yp,k4,dummyArray);
rhs(0.0,yp,k4,v_params);
for (int ispecies = 0; ispecies < nspecies; ispecies++)
y[ispecies] += h*(k1[ispecies]/6.0 + k2[ispecies]/3.0 + k3[ispecies]/3.0 + k4[ispecies]/6.0);
@ -1018,9 +1068,6 @@ void FixRX::rk4(int id, double *rwork)
y[ispecies] = 0.0;
atom->dvector[ispecies][id] = y[ispecies];
}
if (rwork == NULL)
delete [] k1;
}
/* ---------------------------------------------------------------------- */
@ -1270,6 +1317,78 @@ void FixRX::odeDiagnostics(void)
double max_per_proc[numCounters];
double min_per_proc[numCounters];
if(1)
{
static bool firstStep = true;
static TimerType oldTimeStamp (-1);
TimerType now = getTimeStamp();
// Query the fix database and look for rx_weight for the balance fix.
int type_flag = -1;
int rx_weight_index = atom->find_custom( "rx_weight", /*0:int, 1:float*/ type_flag );
// Compute the average # of neighbors.
double averageNumNeighbors = 0;
{
const int inum = pairDPDE->list->inum;
const int* ilist = pairDPDE->list->ilist;
const int* numneigh = pairDPDE->list->numneigh;
for (int ii = 0; ii < inum; ++ii)
{
const int i = ilist[ii];
averageNumNeighbors += numneigh[i];
}
averageNumNeighbors /= inum;
}
printf("me= %d nst= %g nfc= %g time= %g nlocal= %g lmpnst= %g weight_idx= %d 1st= %d aveNeigh= %g\n", comm->me, this->diagnosticCounter[0], this->diagnosticCounter[1], this->diagnosticCounter[2], this->diagnosticCounter[3], this->diagnosticCounter[4], rx_weight_index, firstStep, averageNumNeighbors);
if (rx_weight_index != -1 && !firstStep && 0)
{
double *rx_weight = atom->dvector[rx_weight_index];
const int nlocal = atom->nlocal;
const int *mask = atom->mask;
if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1)
{
const double total_time = getElapsedTime( oldTimeStamp, now );
const double fixrx_time = this->diagnosticCounter[TimeSum];
const double time_ratio = fixrx_time / total_time;
double tsum = 0.0;
double tmin = 100000, tmax = 0;
for (int i = 0; i < nlocal; ++i)
if (mask[i] & groupbit)
{
double nfunc_ratio = double( diagnosticCounterPerODE[FuncSum][i] ) / diagnosticCounter[FuncSum];
rx_weight[i] = nfunc_ratio * fixrx_time + (total_time - fixrx_time) / nlocal;
tmin = fmin( tmin, rx_weight[i] );
tmax = fmax( tmax, rx_weight[i] );
tsum += rx_weight[i];
//rx_weight[i] = (double) diagnosticCounterPerODE[FuncSum][i];
}
printf("me= %d total= %g fixrx= %g ratio= %g tsum= %g %g %g %g\n", comm->me, total_time, fixrx_time, time_ratio, tsum, (total_time - fixrx_time) / nlocal, tmin, tmax);
}
else
{
error->warning(FLERR, "Dynamic load balancing enabled but per-atom weights not available.");
for (int i = 0; i < nlocal; ++i)
if (mask[i] & groupbit)
rx_weight[i] = 1.0;
}
}
firstStep = false;
oldTimeStamp = now;
}
// Compute counters per dpd time-step.
for (int i = 0; i < numCounters; ++i){
my_vals[i] = this->diagnosticCounter[i] / nTimes;
@ -1343,7 +1462,7 @@ void FixRX::odeDiagnostics(void)
if (screen) fprintf(screen,"%s\n", smesg); \
if (logfile) fprintf(logfile,"%s\n", smesg); }
sprintf(smesg, "FixRX::ODE Diagnostics: # of steps |# of rhs evals| run-time (sec)");
sprintf(smesg, "FixRX::ODE Diagnostics: # of iters |# of rhs evals| run-time (sec) | # atoms");
print_mesg(smesg);
sprintf(smesg, " AVG per ODE : %-12.5g | %-12.5g | %-12.5g", avg_per_atom[0], avg_per_atom[1], avg_per_atom[2]);
@ -1365,7 +1484,7 @@ void FixRX::odeDiagnostics(void)
print_mesg(smesg);
}
sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g", avg_per_proc[0], avg_per_proc[1], avg_per_proc[2]);
sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]);
print_mesg(smesg);
if (comm->nprocs > 1){
@ -1373,13 +1492,13 @@ void FixRX::odeDiagnostics(void)
for (int i = 0; i < numCounters; ++i)
rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs );
sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2]);
sprintf(smesg, " RMS per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", rms_per_proc[0], rms_per_proc[1], rms_per_proc[2], rms_per_proc[AtomSum]);
print_mesg(smesg);
sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2]);
sprintf(smesg, " MAX per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", max_per_proc[0], max_per_proc[1], max_per_proc[2], max_per_proc[AtomSum]);
print_mesg(smesg);
sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2]);
sprintf(smesg, " MIN per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", min_per_proc[0], min_per_proc[1], min_per_proc[2], min_per_proc[AtomSum]);
print_mesg(smesg);
}
@ -1399,7 +1518,7 @@ void FixRX::odeDiagnostics(void)
return;
}
void FixRX::rkf45(int id, double *rwork)
void FixRX::rkf45(int id, double *rwork, void *v_param, int ode_counter[])
{
// Rounding coefficient.
const double uround = DBL_EPSILON;
@ -1408,12 +1527,7 @@ void FixRX::rkf45(int id, double *rwork)
const double adaption_limit = 4.0;
//double *y = new double[8*nspecies + nreactions];
double *y = NULL;
if (rwork == NULL)
y = new double[8*nspecies + nreactions];
else
y = rwork;
double *rhstmp = y + 8*nspecies;
double *y = rwork;
const int neq = nspecies;
@ -1450,7 +1564,7 @@ void FixRX::rkf45(int id, double *rwork)
if (h < h_min){
//fprintf(stderr,"hin not implemented yet\n");
//exit(-1);
nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, y + neq, rhstmp);
nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, y + neq, v_param);
}
//printf("t= %e t_stop= %e h= %e\n", t, t_stop, h);
@ -1461,7 +1575,7 @@ void FixRX::rkf45(int id, double *rwork)
double *eout = yout + neq;
// Take a trial step.
rkf45_step (neq, h, y, yout, eout, rhstmp);
rkf45_step (neq, h, y, yout, eout, v_param);
// Estimate the solution error.
// ... weighted 2-norm of the error.
@ -1509,16 +1623,17 @@ void FixRX::rkf45(int id, double *rwork)
if (maxIters && nit > maxIters){
//fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
nFails ++;
//nFails ++;
ode_counter[3] ++;
break;
// We should set an error here so that the solution is not used!
}
} // end while
nSteps += nst;
nIters += nit;
nFuncs += nfe;
ode_counter[0] += nst;
ode_counter[1] += nit;
ode_counter[2] += nfe;
//if (diagnosticFrequency == 1 && diagnosticCounterPerODE[StepSum] != NULL)
if (diagnosticCounterPerODE[StepSum] != NULL){
@ -1535,9 +1650,6 @@ void FixRX::rkf45(int id, double *rwork)
y[ispecies] = 0.0;
atom->dvector[ispecies][id] = y[ispecies];
}
if (rwork == NULL)
delete [] y;
}
/* ---------------------------------------------------------------------- */
@ -1555,21 +1667,23 @@ int FixRX::rhs(double t, const double *y, double *dydt, void *params)
int FixRX::rhs_dense(double t, const double *y, double *dydt, void *params)
{
double rxnRateLawForward;
double *rxnRateLaw = (double *) params;
double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
double concentration;
int nspecies = atom->nspecies_dpd;
UserRHSData *userData = (UserRHSData *) params;
double *rxnRateLaw = userData->rxnRateLaw;
double *kFor = userData->kFor;
const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
const int nspecies = atom->nspecies_dpd;
for(int ispecies=0; ispecies<nspecies; ispecies++)
dydt[ispecies] = 0.0;
// Construct the reaction rate laws
for(int jrxn=0; jrxn<nreactions; jrxn++){
rxnRateLawForward = kR[jrxn];
double rxnRateLawForward = kFor[jrxn];
for(int ispecies=0; ispecies<nspecies; ispecies++){
concentration = y[ispecies]/VDPD;
const double concentration = y[ispecies]/VDPD;
rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
}
rxnRateLaw[jrxn] = rxnRateLawForward;
@ -1587,13 +1701,13 @@ int FixRX::rhs_dense(double t, const double *y, double *dydt, void *params)
int FixRX::rhs_sparse(double t, const double *y, double *dydt, void *v_params) const
{
double *_rxnRateLaw = (double *) v_params;
UserRHSData *userData = (UserRHSData *) v_params;
const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
#define kFor (this->kR)
#define kFor (userData->kFor)
#define kRev (NULL)
#define rxnRateLaw (_rxnRateLaw)
#define rxnRateLaw (userData->rxnRateLaw)
#define conc (dydt)
#define maxReactants (this->sparseKinetics_maxReactants)
#define maxSpecies (this->sparseKinetics_maxSpecies)

View File

@ -66,14 +66,14 @@ class FixRX : public Fix {
double *kR;
//!< Classic Runge-Kutta 4th-order stepper.
void rk4(int,double*);
void rk4(int, double*, void*);
//!< Runge-Kutta-Fehlberg ODE Solver.
void rkf45(int,double*);
void rkf45(int, double*, void*, int ode_counter[]);
//!< Runge-Kutta-Fehlberg ODE stepper function.
void rkf45_step (const int neq, const double h, double y[], double y_out[],
double rwk[], void* v_param);
double rwk[], void *);
//!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
int rkf45_h0 (const int neq, const double t, const double t_stop,
@ -90,6 +90,13 @@ class FixRX : public Fix {
int rhs(double, const double *, double *, void *);
int rhs_dense (double, const double *, double *, void *);
// User-defined data container needed in rhs.
struct UserRHSData
{
double *kFor;
double *rxnRateLaw;
};
// Sparse stoichiometric matrix storage format and methods.
bool useSparseKinetics;
//SparseKinetics sparseKinetics;
@ -116,10 +123,10 @@ class FixRX : public Fix {
double relTol, absTol; //!< Relative and absolute tolerances for the ODE solver(s).
// ODE Diagnostics
int nSteps; //!< # of accepted steps taken over all atoms.
int nIters; //!< # of attemped steps for all atoms.
int nFuncs; //!< # of RHS evaluations for all atoms.
int nFails; //!< # of ODE systems that failed (for some reason).
//int nSteps; //!< # of accepted steps taken over all atoms.
//int nIters; //!< # of attemped steps for all atoms.
//int nFuncs; //!< # of RHS evaluations for all atoms.
//int nFails; //!< # of ODE systems that failed (for some reason).
int diagnosticFrequency; //!< Frequency (LMP steps) that run-time diagnostics will be printed to the log.
enum { numDiagnosticCounters = 5 };

View File

@ -55,6 +55,7 @@
#include "pair_dpd_fdt.h"
#include "pair_dpd_fdt_energy.h"
#include "pair.h"
#include "npair_half_bin_newton_ssa.h"
#include "citeme.h"
using namespace LAMMPS_NS;
@ -95,6 +96,8 @@ FixShardlow::FixShardlow(LAMMPS *lmp, int narg, char **arg) :
pairDPDE = NULL;
pairDPD = (PairDPDfdt *) force->pair_match("dpd/fdt",1);
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy",1);
if (pairDPDE == NULL)
pairDPDE = (PairDPDfdtEnergy *) force->pair_match("dpd/fdt/energy/kk",1);
if(pairDPDE){
comm_forward = 3;
@ -107,26 +110,12 @@ FixShardlow::FixShardlow(LAMMPS *lmp, int narg, char **arg) :
if(pairDPD == NULL && pairDPDE == NULL)
error->all(FLERR,"Must use pair_style dpd/fdt or dpd/fdt/energy with fix shardlow");
// Setup the ssaAIR array
atom->ssaAIR = NULL;
grow_arrays(atom->nmax);
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
// Setup callbacks for maintaining atom->ssaAIR[]
atom->add_callback(0); // grow (aka exchange)
atom->add_callback(1); // restart
atom->add_callback(2); // border
}
/* ---------------------------------------------------------------------- */
FixShardlow::~FixShardlow()
{
atom->delete_callback(id, 0);
atom->delete_callback(id, 1);
atom->delete_callback(id, 2);
memory->destroy(atom->ssaAIR);
}
/* ---------------------------------------------------------------------- */
@ -135,7 +124,6 @@ int FixShardlow::setmask()
{
int mask = 0;
mask |= INITIAL_INTEGRATE;
mask |= PRE_EXCHANGE | MIN_PRE_EXCHANGE;
return mask;
}
@ -146,7 +134,9 @@ void FixShardlow::init()
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->ghost = 1;
neighbor->requests[irequest]->ssa = 1;
neighbor->requests[irequest]->newton = 1; // SSA requires newton on
}
/* ---------------------------------------------------------------------- */
@ -158,27 +148,6 @@ void FixShardlow::init_list(int id, NeighList *ptr)
/* ---------------------------------------------------------------------- */
void FixShardlow::pre_exchange()
{
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
}
/* ---------------------------------------------------------------------- */
void FixShardlow::setup_pre_exchange()
{
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
}
/* ---------------------------------------------------------------------- */
void FixShardlow::min_pre_exchange()
{
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
}
/* ---------------------------------------------------------------------- */
void FixShardlow::setup(int vflag)
{
bool fixShardlow = false;
@ -243,6 +212,10 @@ void FixShardlow::ssa_update_dpd(
const double mass_i = (rmass) ? rmass[i] : mass[itype];
const double massinv_i = 1.0 / mass_i;
#ifdef DEBUG_SSA_PAIR_CT
const int nlocal = atom->nlocal;
#endif
// Loop over Directional Neighbors only
for (int jj = 0; jj < jlen; jj++) {
int j = jlist[jj] & NEIGHMASK;
@ -252,9 +225,23 @@ void FixShardlow::ssa_update_dpd(
double dely = ytmp - x[j][1];
double delz = ztmp - x[j][2];
double rsq = delx*delx + dely*dely + delz*delz;
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]);
else ++(counters[0][1]);
++(counters[0][2]);
int rsqi = rsq / 8;
if (rsqi < 0) rsqi = 0;
else if (rsqi > 31) rsqi = 31;
++(hist[rsqi]);
#endif
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) {
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]);
else ++(counters[1][1]);
++(counters[1][2]);
#endif
double r = sqrt(rsq);
double rinv = 1.0/r;
double delx_rinv = delx*rinv;
@ -382,6 +369,10 @@ void FixShardlow::ssa_update_dpde(
const double massinv_i = 1.0 / mass_i;
const double mass_i_div_neg4_ftm2v = mass_i*(-0.25)/ftm2v;
#ifdef DEBUG_SSA_PAIR_CT
const int nlocal = atom->nlocal;
#endif
// Loop over Directional Neighbors only
for (int jj = 0; jj < jlen; jj++) {
int j = jlist[jj] & NEIGHMASK;
@ -391,9 +382,23 @@ void FixShardlow::ssa_update_dpde(
double dely = ytmp - x[j][1];
double delz = ztmp - x[j][2];
double rsq = delx*delx + dely*dely + delz*delz;
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) ++(counters[0][0]);
else ++(counters[0][1]);
++(counters[0][2]);
int rsqi = rsq / 8;
if (rsqi < 0) rsqi = 0;
else if (rsqi > 31) rsqi = 31;
++(hist[rsqi]);
#endif
// NOTE: r can be 0.0 in DPD systems, so do EPSILON_SQUARED test
if ((rsq < cut2_i[jtype]) && (rsq >= EPSILON_SQUARED)) {
#ifdef DEBUG_SSA_PAIR_CT
if ((i < nlocal) && (j < nlocal)) ++(counters[1][0]);
else ++(counters[1][1]);
++(counters[1][2]);
#endif
double r = sqrt(rsq);
double rinv = 1.0/r;
double delx_rinv = delx*rinv;
@ -518,7 +523,19 @@ void FixShardlow::initial_integrate(int vflag)
error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");
if(rcut >= bbx || rcut >= bby || rcut>= bbz )
error->all(FLERR,"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin\n");
{
char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
char *msg = (char *) malloc(sizeof(fmt) + 4*15);
sprintf(msg, fmt, rcut, bbx, bby, bbz);
error->one(FLERR, msg);
}
#ifdef DEBUG_SSA_PAIR_CT
for (int i = 0; i < 2; ++i)
for (int j = 0; j < 3; ++j)
counters[i][j] = 0;
for (int i = 0; i < 32; ++i) hist[i] = 0;
#endif
// Allocate memory for v_t0 to hold the initial velocities for the ghosts
v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0");
@ -528,10 +545,37 @@ void FixShardlow::initial_integrate(int vflag)
dtsqrt = sqrt(update->dt);
//Loop over all 14 directions (8 stages)
for (airnum = 1; airnum <=8; airnum++){
NPairHalfBinNewtonSSA *np_ssa = dynamic_cast<NPairHalfBinNewtonSSA*>(list->np);
if (!np_ssa) error->one(FLERR, "NPair wasn't a NPairHalfBinNewtonSSA object");
int ssa_phaseCt = np_ssa->ssa_phaseCt;
int *ssa_phaseLen = np_ssa->ssa_phaseLen;
int **ssa_itemLoc = np_ssa->ssa_itemLoc;
int **ssa_itemLen = np_ssa->ssa_itemLen;
// process neighbors in the local AIR
for (int workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int workItemCt = ssa_phaseLen[workPhase];
for (int workItem = 0; workItem < workItemCt; ++workItem) {
int ct = ssa_itemLen[workPhase][workItem];
ii = ssa_itemLoc[workPhase][workItem];
while (ct-- > 0) {
int len = list->numneigh[ii];
if (len > 0) {
if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len);
else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len);
}
ii++;
}
}
}
ii = inum;
//Loop over all 13 outward directions (7 stages)
for (airnum = 1; airnum <=7; airnum++){
int ct = list->AIRct_ssa[airnum];
if (airnum > 1) {
// Communicate the updated velocities to all nodes
comm->forward_comm_fix(this);
@ -540,24 +584,30 @@ void FixShardlow::initial_integrate(int vflag)
memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
}
}
// Loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
int start = (airnum < 2) ? 0 : list->ndxAIR_ssa[i][airnum - 2];
int len = list->ndxAIR_ssa[i][airnum - 1] - start;
if (len > 0) {
if (useDPDE) ssa_update_dpde(i, &(list->firstneigh[i][start]), len);
else ssa_update_dpd(i, &(list->firstneigh[i][start]), len);
}
// process neighbors in this AIR
while (ct-- > 0) {
int len = list->numneigh[ii];
if (useDPDE) ssa_update_dpde(ilist[ii], list->firstneigh[ii], len);
else ssa_update_dpd(ilist[ii], list->firstneigh[ii], len);
ii++;
}
// Communicate the ghost deltas to the atom owners
if (airnum > 1) comm->reverse_comm_fix(this);
comm->reverse_comm_fix(this);
} //End Loop over all directions For airnum = Top, Top-Right, Right, Bottom-Right, Back
#ifdef DEBUG_SSA_PAIR_CT
for (int i = 0; i < 32; ++i) fprintf(stdout, "%8d", hist[i]);
fprintf(stdout, "\n%6d %6d,%6d %6d: "
,counters[0][2]
,counters[1][2]
,counters[0][1]
,counters[1][1]
);
#endif
memory->sfree(v_t0);
v_t0 = NULL;
}
@ -643,91 +693,11 @@ void FixShardlow::unpack_reverse_comm(int n, int *list, double *buf)
}
}
/* ----------------------------------------------------------------------
convert atom coords into the ssa active interaction region number
------------------------------------------------------------------------- */
int FixShardlow::coord2ssaAIR(double *x)
{
int ix, iy, iz;
ix = iy = iz = 0;
if (x[2] < domain->sublo[2]) iz = -1;
if (x[2] >= domain->subhi[2]) iz = 1;
if (x[1] < domain->sublo[1]) iy = -1;
if (x[1] >= domain->subhi[1]) iy = 1;
if (x[0] < domain->sublo[0]) ix = -1;
if (x[0] >= domain->subhi[0]) ix = 1;
if(iz < 0){
return -1;
} else if(iz == 0){
if( iy<0 ) return -1; // bottom left/middle/right
if( (iy==0) && (ix<0) ) return -1; // left atoms
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
if( (iy==0) && (ix>0) ) return 3; // Right atoms
if( (iy>0) && (ix==0) ) return 2; // Top-middle atoms
if( (iy>0) && (ix!=0) ) return 4; // Top-right and top-left atoms
} else { // iz > 0
if((ix==0) && (iy==0)) return 5; // Back atoms
if((ix==0) && (iy!=0)) return 6; // Top-back and bottom-back atoms
if((ix!=0) && (iy==0)) return 7; // Left-back and right-back atoms
if((ix!=0) && (iy!=0)) return 8; // Back corner atoms
}
return -2;
}
/* ---------------------------------------------------------------------- */
void FixShardlow::grow_arrays(int nmax)
{
memory->grow(atom->ssaAIR,nmax,"fix_shardlow:ssaAIR");
}
void FixShardlow::copy_arrays(int i, int j, int delflag)
{
atom->ssaAIR[j] = atom->ssaAIR[i];
}
void FixShardlow::set_arrays(int i)
{
atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */
}
int FixShardlow::pack_border(int n, int *list, double *buf)
{
for (int i = 0; i < n; i++) {
int j = list[i];
if (atom->ssaAIR[j] == 0) atom->ssaAIR[j] = 1; // not purely local anymore
}
return 0;
}
int FixShardlow::unpack_border(int n, int first, double *buf)
{
int i,last = first + n;
for (i = first; i < last; i++) {
atom->ssaAIR[i] = coord2ssaAIR(atom->x[i]);
}
return 0;
}
int FixShardlow::unpack_exchange(int i, double *buf)
{
atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */
return 0;
}
void FixShardlow::unpack_restart(int i, int nth)
{
atom->ssaAIR[i] = 0; /* coord2ssaAIR(x[i]) */
}
double FixShardlow::memory_usage()
{
double bytes = 0.0;
bytes += memory->usage(atom->ssaAIR,atom->nmax);
bytes += sizeof(double)*3*atom->nghost; // v_t0[]
return bytes;
}

View File

@ -35,21 +35,14 @@ class FixShardlow : public Fix {
virtual void init_list(int, class NeighList *);
virtual void setup(int);
virtual void initial_integrate(int);
void setup_pre_exchange();
void pre_exchange();
void min_pre_exchange();
void grow_arrays(int);
void copy_arrays(int, int, int);
void set_arrays(int);
int pack_border(int, int *, double *);
int unpack_border(int, int, double *);
int unpack_exchange(int, double *);
void unpack_restart(int, int);
double memory_usage();
#ifdef DEBUG_SSA_PAIR_CT
int counters[2][3];
int hist[32];
#endif
protected:
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
@ -63,7 +56,6 @@ class FixShardlow : public Fix {
private:
double dtsqrt; // = sqrt(update->dt);
int coord2ssaAIR(double *); // map atom coord to an AIR number
void ssa_update_dpd(int, int *, int); // Constant Temperature
void ssa_update_dpde(int, int *, int); // Constant Energy

View File

@ -20,6 +20,7 @@
#include "atom.h"
#include "update.h"
#include "group.h"
#include "domain.h"
#include "memory.h"
#include "error.h"
@ -29,24 +30,19 @@ using namespace LAMMPS_NS;
NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp)
{
maxbin_ssa = 0;
bins_ssa = NULL;
maxhead_ssa = 0;
binhead_ssa = NULL;
gbinhead_ssa = NULL;
for (int i = 0; i < 8; i++) {
gairhead_ssa[i] = -1;
}
}
NBinSSA::~NBinSSA()
{
memory->destroy(bins_ssa);
memory->destroy(binhead_ssa);
memory->destroy(gbinhead_ssa);
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
local atoms are in distinct bins (binhead_ssa) from the ghosts
ghost atoms are in distinct bins (gbinhead_ssa) from the locals
local atoms are in distinct bins (binhead[]) from the ghosts
ghost atoms are "binned" in gairhead_ssa[] instead
ghosts which are not in an Active Interaction Region (AIR) are skipped
------------------------------------------------------------------------- */
@ -58,13 +54,19 @@ void NBinSSA::bin_atoms()
if (includegroup) nlocal = atom->nfirst;
double **x = atom->x;
int *mask = atom->mask;
int *ssaAIR = atom->ssaAIR;
int xbin,ybin,zbin;
last_bin = update->ntimestep;
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
for (i = 0; i < 8; i++) {
gairhead_ssa[i] = -1;
}
for (i = 0; i < mbins; i++) {
gbinhead_ssa[i] = -1;
binhead_ssa[i] = -1;
binhead[i] = -1;
}
// bin in reverse order so linked list will be in forward order
@ -73,29 +75,34 @@ void NBinSSA::bin_atoms()
int bitmask = group->bitmask[includegroup];
int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above
for (i = nall-1; i >= nowned; i--) {
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
ibin = coord2ssaAIR(x[i]);
if (ibin < 1) continue; // skip ghost atoms not in AIR
if (mask[i] & bitmask) {
ibin = coord2bin(x[i]);
atom2bin[i] = ibin;
bins_ssa[i] = gbinhead_ssa[ibin];
gbinhead_ssa[ibin] = i;
bins[i] = gairhead_ssa[ibin];
gairhead_ssa[ibin] = i;
}
}
} else {
for (i = nall-1; i >= nlocal; i--) {
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
ibin = coord2bin(x[i]);
atom2bin[i] = ibin;
bins_ssa[i] = gbinhead_ssa[ibin];
gbinhead_ssa[ibin] = i;
ibin = coord2ssaAIR(x[i]);
if (ibin < 1) continue; // skip ghost atoms not in AIR
bins[i] = gairhead_ssa[ibin];
gairhead_ssa[ibin] = i;
}
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(x[i]);
atom2bin[i] = ibin;
bins_ssa[i] = binhead_ssa[ibin];
binhead_ssa[ibin] = i;
ibin = coord2bin(x[i][0], x[i][1], x[i][2], xbin, ybin, zbin);
// Find the bounding box of the local atoms in the bins
if (xbin < lbinxlo) lbinxlo = xbin;
if (xbin >= lbinxhi) lbinxhi = xbin + 1;
if (ybin < lbinylo) lbinylo = ybin;
if (ybin >= lbinyhi) lbinyhi = ybin + 1;
if (zbin < lbinzlo) lbinzlo = zbin;
if (zbin >= lbinzhi) lbinzhi = zbin + 1;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
}
/* ---------------------------------------------------------------------- */
@ -104,19 +111,13 @@ void NBinSSA::bin_atoms_setup(int nall)
{
NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too
if (mbins > maxhead_ssa) {
maxhead_ssa = mbins;
memory->destroy(gbinhead_ssa);
memory->destroy(binhead_ssa);
memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa");
memory->create(gbinhead_ssa,maxhead_ssa,"gbinhead_ssa");
}
if (nall > maxbin_ssa) {
maxbin_ssa = nall;
memory->destroy(bins_ssa);
memory->create(bins_ssa,maxbin_ssa,"bins_ssa");
}
// Clear the local bin extent bounding box.
lbinxlo = mbinx - 1; // Safe to = stencil->sx + 1
lbinylo = mbiny - 1; // Safe to = stencil->sy + 1
lbinzlo = mbinz - 1; // Safe to = stencil->sz + 1
lbinxhi = 0; // Safe to = mbinx - stencil->sx - 1
lbinyhi = 0; // Safe to = mbiny - stencil->sy - 1
lbinzhi = 0; // Safe to = mbinz - stencil->sz - 1
}
/* ---------------------------------------------------------------------- */
@ -125,10 +126,39 @@ bigint NBinSSA::memory_usage()
{
bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too
if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa);
if (maxhead_ssa) {
bytes += memory->usage(binhead_ssa,maxhead_ssa);
bytes += memory->usage(gbinhead_ssa,maxhead_ssa);
}
return bytes;
}
/* ----------------------------------------------------------------------
convert atom coords into the ssa active interaction region number
------------------------------------------------------------------------- */
int NBinSSA::coord2ssaAIR(const double *x)
{
int ix, iy, iz;
ix = iy = iz = 0;
if (x[2] < domain->sublo[2]) iz = -1;
if (x[2] >= domain->subhi[2]) iz = 1;
if (x[1] < domain->sublo[1]) iy = -1;
if (x[1] >= domain->subhi[1]) iy = 1;
if (x[0] < domain->sublo[0]) ix = -1;
if (x[0] >= domain->subhi[0]) ix = 1;
if(iz < 0){
return -1;
} else if(iz == 0){
if( iy<0 ) return -1; // bottom left/middle/right
if( (iy==0) && (ix<0) ) return -1; // left atoms
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
if( (iy==0) && (ix>0) ) return 2; // Right atoms
if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms
if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms
} else { // iz > 0
if((ix==0) && (iy==0)) return 4; // Back atoms
if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
}
return -2;
}

View File

@ -29,11 +29,15 @@ namespace LAMMPS_NS {
class NBinSSA : public NBinStandard {
public:
int *bins_ssa; // index of next atom in each bin
int maxbin_ssa; // size of bins_ssa array
int *binhead_ssa; // index of 1st local atom in each bin
int *gbinhead_ssa; // index of 1st ghost atom in each bin
int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays
int gairhead_ssa[8]; // index of 1st ghost atom in each AIR
// Bounds of the local atoms in the binhead array
int lbinxlo; // lowest local bin x-dim coordinate
int lbinylo; // lowest local bin y-dim coordinate
int lbinzlo; // lowest local bin z-dim coordinate
int lbinxhi; // highest local bin x-dim coordinate
int lbinyhi; // highest local bin y-dim coordinate
int lbinzhi; // highest local bin z-dim coordinate
NBinSSA(class LAMMPS *);
~NBinSSA();
@ -42,6 +46,115 @@ class NBinSSA : public NBinStandard {
void bin_atoms();
bigint memory_usage();
inline
int coord2bin(const double & x,const double & y,const double & z) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
inline
int coord2bin(const double & x,const double & y,const double & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
inline
int coord2bin(const double & x,const double & y,const double & z, int &ixo, int &iyo, int &izo) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
ixo = ix - mbinxlo;
iyo = iy - mbinylo;
izo = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
private:
int coord2ssaAIR(const double *); // map atom coord to an AIR number
double bboxlo_[3],bboxhi_[3];
};
}

View File

@ -32,15 +32,29 @@
using namespace LAMMPS_NS;
// allocate space for static class variable
// prototype for non-class function
/* ---------------------------------------------------------------------- */
static int *ssaAIRptr;
static int cmp_ssaAIR(const void *, const void *);
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp)
{
ssa_maxPhaseCt = 0;
ssa_maxPhaseLen = 0;
ssa_phaseCt = 0;
ssa_phaseLen = NULL;
ssa_itemLoc = NULL;
ssa_itemLen = NULL;
}
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
NPairHalfBinNewtonSSA::~NPairHalfBinNewtonSSA()
{
ssa_maxPhaseCt = 0;
ssa_maxPhaseLen = 0;
ssa_phaseCt = 0;
memory->destroy(ssa_phaseLen);
memory->destroy(ssa_itemLoc);
memory->destroy(ssa_itemLen);
}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
@ -65,7 +79,6 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
int **nspecial = atom->nspecial;
int nlocal = atom->nlocal;
if (includegroup) nlocal = atom->nfirst;
int *ssaAIR = atom->ssaAIR;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
@ -81,26 +94,83 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
int nstencil_half = ns_ssa->nstencil_half;
int *nstencil_ssa = &(ns_ssa->nstencil_ssa[0]);
int nstencil_full = ns_ssa->nstencil;
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object");
int *bins_ssa = nb_ssa->bins_ssa;
int *binhead_ssa = nb_ssa->binhead_ssa;
int *gbinhead_ssa = nb_ssa->gbinhead_ssa;
int *bins = nb_ssa->bins;
int *binhead = nb_ssa->binhead;
int *gairhead_ssa = &(nb_ssa->gairhead_ssa[0]);
int inum = 0;
int gnum = 0;
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
int **stencilxyz = ns_ssa->stencilxyz;
int lbinxlo = nb_ssa->lbinxlo;
int lbinxhi = nb_ssa->lbinxhi;
int lbinylo = nb_ssa->lbinylo;
int lbinyhi = nb_ssa->lbinyhi;
int lbinzlo = nb_ssa->lbinzlo;
int lbinzhi = nb_ssa->lbinzhi;
int sx1 = ns_ssa->sx + 1;
int sy1 = ns_ssa->sy + 1;
int sz1 = ns_ssa->sz + 1;
ssa_phaseCt = sz1*sy1*sx1;
xbin = (lbinxhi - lbinxlo + sx1 - 1) / sx1 + 1;
ybin = (lbinyhi - lbinylo + sy1 - 1) / sy1 + 1;
zbin = (lbinzhi - lbinzlo + sz1 - 1) / sz1 + 1;
int phaseLenEstimate = xbin*ybin*zbin;
if (ssa_phaseCt > ssa_maxPhaseCt) {
ssa_maxPhaseCt = ssa_phaseCt;
ssa_maxPhaseLen = 0;
memory->destroy(ssa_phaseLen);
memory->destroy(ssa_itemLoc);
memory->destroy(ssa_itemLen);
memory->create(ssa_phaseLen,ssa_maxPhaseCt,"NPairHalfBinNewtonSSA:ssa_phaseLen");
}
if (phaseLenEstimate > ssa_maxPhaseLen) {
ssa_maxPhaseLen = phaseLenEstimate;
memory->destroy(ssa_itemLoc);
memory->destroy(ssa_itemLen);
memory->create(ssa_itemLoc,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLoc");
memory->create(ssa_itemLen,ssa_maxPhaseCt,ssa_maxPhaseLen,"NPairHalfBinNewtonSSA:ssa_itemLen");
}
ipage->reset();
// loop over owned atoms, storing half of the neighbors
int workPhase = 0;
// loop over bins with local atoms, storing half of the neighbors
for (int zoff = ns_ssa->sz; zoff >= 0; --zoff) {
for (int yoff = ns_ssa->sy; yoff >= 0; --yoff) {
for (int xoff = ns_ssa->sx; xoff >= 0; --xoff) {
int workItem = 0;
for (zbin = lbinzlo + zoff; zbin < lbinzhi; zbin += sz1) {
for (ybin = lbinylo + yoff - ns_ssa->sy; ybin < lbinyhi; ybin += sy1) {
for (xbin = lbinxlo + xoff - ns_ssa->sx; xbin < lbinxhi; xbin += sx1) {
if (workItem >= phaseLenEstimate) error->one(FLERR,"phaseLenEstimate was too small");
ssa_itemLoc[workPhase][workItem] = inum; // record where workItem starts in ilist
for (i = 0; i < nlocal; i++) {
int AIRct[8] = { 0 };
for (int subphase = 0; subphase < 4; subphase++) {
int s_ybin = ybin + ((subphase & 0x2) ? ns_ssa->sy : 0);
int s_xbin = xbin + ((subphase & 0x1) ? ns_ssa->sx : 0);
int ibin, ct;
if ((s_ybin < lbinylo) || (s_ybin >= lbinyhi)) continue;
if ((s_xbin < lbinxlo) || (s_xbin >= lbinxhi)) continue;
ibin = zbin*nb_ssa->mbiny*nb_ssa->mbinx
+ s_ybin*nb_ssa->mbinx
+ s_xbin;
for (i = binhead[ibin]; i >= 0; i = bins[i]) {
n = 0;
neighptr = ipage->vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
@ -111,52 +181,18 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
tagprev = tag[i] - iatom - 1;
}
// loop over rest of local atoms in i's bin
// just store them, since j is beyond i in linked list
for (j = bins_ssa[i]; j >= 0; j = bins_ssa[j]) {
// loop over all local atoms in the current stencil "subphase"
for (k = nstencil_ssa[subphase]; k < nstencil_ssa[subphase+1]; k++) {
const int jbin = ibin+stencil[k];
if (jbin != ibin) j = binhead[jbin];
else j = bins[i]; // same bin as i, so start just past i in the bin
for (; j >= 0; j = bins[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
ibin = atom2bin[i];
// loop over all local atoms in other bins in "half" stencil
for (k = 0; k < nstencil_half; k++) {
for (j = binhead_ssa[ibin+stencil[k]]; j >= 0;
j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
@ -174,86 +210,102 @@ void NPairHalfBinNewtonSSA::build(NeighList *list)
}
}
}
AIRct[0] = n;
// loop over AIR ghost atoms in all bins in "full" stencil
// Note: the non-AIR ghost atoms have already been filtered out
// That is a significant time savings because of the "full" stencil
// Note2: only non-pure locals can have ghosts as neighbors
if (ssaAIR[i] == 1) for (k = 0; k < nstencil_full; k++) {
for (j = gbinhead_ssa[ibin+stencil[k]]; j >= 0;
j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
} else if (domain->minimum_image_check(delx,dely,delz)) {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
} else if (which > 0) {
neighptr[n++] = j ^ (which << SBBITS);
++(AIRct[ssaAIR[j] - 1]);
}
} else {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
}
}
}
}
if (n > 0) {
firstneigh[inum] = neighptr;
numneigh[inum] = n;
ilist[inum++] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
}
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
// sort the ghosts in the neighbor list by their ssaAIR number
ssaAIRptr = atom->ssaAIR;
qsort(&(neighptr[AIRct[0]]), n - AIRct[0], sizeof(int), cmp_ssaAIR);
// do a prefix sum on the counts to turn them into indexes
list->ndxAIR_ssa[i][0] = AIRct[0];
for (int ndx = 1; ndx < 8; ++ndx) {
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
}
}
// record where workItem ends in ilist
ssa_itemLen[workPhase][workItem] = inum - ssa_itemLoc[workPhase][workItem];
if (ssa_itemLen[workPhase][workItem] > 0) workItem++;
}
}
}
list->inum = inum;
// record where workPhase ends
ssa_phaseLen[workPhase++] = workItem;
}
}
}
/* ----------------------------------------------------------------------
comparison function invoked by qsort()
accesses static class member ssaAIRptr, set before call to qsort()
------------------------------------------------------------------------- */
if (ssa_phaseCt != workPhase) error->one(FLERR,"ssa_phaseCt was wrong");
static int cmp_ssaAIR(const void *iptr, const void *jptr)
{
int i = NEIGHMASK & *((int *) iptr);
int j = NEIGHMASK & *((int *) jptr);
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
return 0;
list->AIRct_ssa[0] = list->inum = inum;
// loop over AIR ghost atoms, storing their local neighbors
// since these are ghosts, must check if stencil bin is out of bounds
for (int airnum = 1; airnum <= 7; airnum++) {
int locAIRct = 0;
for (i = gairhead_ssa[airnum]; i >= 0; i = bins[i]) {
n = 0;
neighptr = ipage->vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
ibin = coord2bin(x[i],xbin,ybin,zbin);
// loop over AIR ghost atoms in all bins in "full" stencil
// Note: the non-AIR ghost atoms have already been filtered out
for (k = 0; k < nstencil_full; k++) {
xbin2 = xbin + stencilxyz[k][0];
ybin2 = ybin + stencilxyz[k][1];
zbin2 = zbin + stencilxyz[k][2];
// Skip it if this bin is outside the extent of local bins
if (xbin2 < lbinxlo || xbin2 >= lbinxhi ||
ybin2 < lbinylo || ybin2 >= lbinyhi ||
zbin2 < lbinzlo || zbin2 >= lbinzhi) continue;
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[j],nspecial[j],tag[i]);
else {
int jmol = molindex[j];
if (jmol >= 0) {
int jatom = molatom[j];
which = find_special(onemols[jmol]->special[jatom],
onemols[jmol]->nspecial[jatom],
tag[i] - (tag[j] - jatom - 1));
} else which = 0;
}
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
if (n > 0) {
firstneigh[inum + gnum] = neighptr;
numneigh[inum + gnum] = n;
ilist[inum + (gnum++)] = i;
++locAIRct;
}
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor (ghost) list overflow, boost neigh_modify one");
}
list->AIRct_ssa[airnum] = locAIRct;
}
list->gnum = gnum;
}

View File

@ -15,7 +15,7 @@
NPairStyle(half/bin/newton/ssa,
NPairHalfBinNewtonSSA,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA)
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA | NP_GHOST)
#else
@ -28,9 +28,18 @@ namespace LAMMPS_NS {
class NPairHalfBinNewtonSSA : public NPair {
public:
// SSA Work plan data structures
int ssa_phaseCt;
int *ssa_phaseLen;
int **ssa_itemLoc;
int **ssa_itemLen;
NPairHalfBinNewtonSSA(class LAMMPS *);
~NPairHalfBinNewtonSSA() {}
~NPairHalfBinNewtonSSA();
void build(class NeighList *);
private:
int ssa_maxPhaseCt;
int ssa_maxPhaseLen;
};
}

View File

@ -1,132 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "npair_halffull_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
// allocate space for static class variable
// prototype for non-class function
static int *ssaAIRptr;
static int cmp_ssaAIR(const void *, const void *);
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonSSA::NPairHalffullNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
build half list from full list for use by Shardlow Spliting Algorithm
pair stored once if i,j are both owned and i < j
if j is ghost, only store if j coords are "above and to the right" of i
works if full list is a skip list
------------------------------------------------------------------------- */
void NPairHalffullNewtonSSA::build(NeighList *list)
{
int i,j,ii,jj,n,jnum,joriginal;
int *neighptr,*jlist;
int nlocal = atom->nlocal;
int *ssaAIR = atom->ssaAIR;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
MyPage<int> *ipage = list->ipage;
int *ilist_full = list->listfull->ilist;
int *numneigh_full = list->listfull->numneigh;
int **firstneigh_full = list->listfull->firstneigh;
int inum_full = list->listfull->inum;
int inum = 0;
ipage->reset();
// loop over parent full list
for (ii = 0; ii < inum_full; ii++) {
int AIRct[8] = { 0 };
n = 0;
neighptr = ipage->vget();
i = ilist_full[ii];
// loop over full neighbor list
jlist = firstneigh_full[i];
jnum = numneigh_full[i];
for (jj = 0; jj < jnum; jj++) {
joriginal = jlist[jj];
j = joriginal & NEIGHMASK;
if (j < nlocal) {
if (i > j) continue;
++(AIRct[0]);
} else {
if (ssaAIR[j] < 2) continue; // skip ghost atoms not in AIR
++(AIRct[ssaAIR[j] - 1]);
}
neighptr[n++] = joriginal;
}
ilist[inum++] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
// sort the locals+ghosts in the neighbor list by their ssaAIR number
ssaAIRptr = atom->ssaAIR;
qsort(&(neighptr[0]), n, sizeof(int), cmp_ssaAIR);
// do a prefix sum on the counts to turn them into indexes
list->ndxAIR_ssa[i][0] = AIRct[0];
for (int ndx = 1; ndx < 8; ++ndx) {
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
}
}
list->inum = inum;
}
/* ----------------------------------------------------------------------
comparison function invoked by qsort()
accesses static class member ssaAIRptr, set before call to qsort()
------------------------------------------------------------------------- */
static int cmp_ssaAIR(const void *iptr, const void *jptr)
{
int i = NEIGHMASK & *((int *) iptr);
int j = NEIGHMASK & *((int *) jptr);
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
return 0;
}

View File

@ -42,23 +42,72 @@ NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
void NStencilHalfBin2dNewtonSSA::create()
{
int i,j,pos = 0;
nstencil_ssa[0] = 0; // redundant info, but saves a conditional
// Include the centroid at the start.
// It will be handled as part of Subphase 0.
stencilxyz[pos][0] = 0;
stencilxyz[pos][1] = 0;
stencilxyz[pos][2] = 0;
stencil[pos++] = 0;
// Subphase 0: upper right front bins (red)
for (j = 0; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (j > 0 || (j == 0 && i > 0))
if (bin_distance(i,j,0) < cutneighmaxsq)
stencil[pos++] = j*mbinx + i;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,0) < cutneighmaxsq)
for (i = 0; i <= sx; i++)
if (j > 0 || i > 0) // skip the centroid
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
nstencil_ssa[1] = pos;
// Subphase 1: upper left front bins (light blue)
for (j = 1; j <= sy; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
nstencil_ssa[2] = pos;
// Subphase 2: lower right front bins (yellow)
nstencil_ssa[3] = pos;
// Subphase 3: lower left front bins (blue)
nstencil_ssa[4] = pos; // record end of half stencil
// Now include additional bins for AIR ghosts, and impure-to-pure locals
// Subphase 4: upper right back bins (pink)
// nstencil_ssa[5] = pos;
// Subphase 5: upper left back bins (light green)
// nstencil_ssa[6] = pos;
// Subphase 6: lower right back bins (white)
for (j = -sy; j < 0; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
// nstencil_ssa[7] = pos;
// Subphase 7: lower left back bins (purple)
for (j = -sy; j <= 0; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,0) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = 0;
stencil[pos++] = j*mbinx + i;
}
// nstencil_ssa[8] = pos;
nstencil = pos; // record where full stencil ends
}

View File

@ -15,7 +15,7 @@
NStencilStyle(half/bin/2d/newton/ssa,
NStencilHalfBin2dNewtonSSA,
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO)
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO | NS_GHOST)
#else

View File

@ -42,33 +42,112 @@ NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
void NStencilHalfBin3dNewtonSSA::create()
{
int i,j,k,pos = 0;
nstencil_ssa[0] = 0; // redundant info, but saves a conditional
// Include the centroid at the start.
// It will be handled as part of Subphase 0.
stencilxyz[pos][0] = 0;
stencilxyz[pos][1] = 0;
stencilxyz[pos][2] = 0;
stencil[pos++] = 0;
// Subphase 0: upper right front bins (red)
for (k = 0; k <= sz; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (k > 0 || j > 0 || (j == 0 && i > 0))
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (k = -sz; k < 0; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
// For k==0, make sure to skip already included bins
k = 0;
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,k) < cutneighmaxsq)
for (j = 0; j <= sy; j++)
for (i = 0; i <= sx; i++)
if (k > 0 || j > 0 || i > 0) // skip the centroid
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[1] = pos;
// Subphase 1: upper left front bins (light blue)
for (k = 0; k <= sz; k++)
for (j = 1; j <= sy; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[2] = pos;
// Subphase 2: lower right front bins (yellow)
for (k = 1; k <= sz; k++)
for (j = -sy; j < 0; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[3] = pos;
// Subphase 3: lower left front bins (blue)
for (k = 1; k <= sz; k++)
for (j = -sy; j <= 0; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil_ssa[4] = pos; // record end of half stencil
// Now include additional bins for AIR ghosts, and impure-to-pure locals
// Subphase 4: upper right back bins (pink)
for (k = -sz; k < 0; k++)
for (j = 0; j <= sy; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
// nstencil_ssa[5] = pos;
// Subphase 5: upper left back bins (light green)
for (k = -sz; k < 0; k++)
for (j = 1; j <= sy; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
// nstencil_ssa[6] = pos;
// Subphase 6: lower right back bins (white)
for (k = -sz; k <= 0; k++)
for (j = -sy; j < 0; j++)
for (i = 0; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
// nstencil_ssa[7] = pos;
// Subphase 7: lower left back bins (purple)
for (k = -sz; k <= 0; k++)
for (j = -sy; j <= 0; j++)
for (i = -sx; i < 0; i++)
if (bin_distance(i,j,k) < cutneighmaxsq) {
stencilxyz[pos][0] = i;
stencilxyz[pos][1] = j;
stencilxyz[pos][2] = k;
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
//nstencil_ssa[8] = pos;
nstencil = pos; // record where full stencil ends
}

View File

@ -15,7 +15,7 @@
NStencilStyle(half/bin/3d/newton/ssa,
NStencilHalfBin3dNewtonSSA,
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO)
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO | NS_GHOST)
#else

View File

@ -20,11 +20,12 @@ namespace LAMMPS_NS {
class NStencilSSA : public NStencil {
public:
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { }
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { xyzflag = 1; }
~NStencilSSA() {}
virtual void create() = 0;
int nstencil_half; // where the half stencil ends
// first stencil index for each subphase, with last index at end
int nstencil_ssa[5];
};
}

View File

@ -316,18 +316,17 @@ void PairDPDfdt::init_style()
if (comm->ghost_velocity == 0)
error->all(FLERR,"Pair dpd/fdt requires ghost atoms store velocity");
// if newton off, forces between atoms ij will be double computed
// using different random numbers
if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR,
"Pair dpd/fdt requires newton pair on");
splitFDT_flag = false;
int irequest = neighbor->request(this,instance_me);
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){
splitFDT_flag = true;
}
// if newton off, forces between atoms ij will be double computed
// using different random numbers if splitFDT_flag is false
if (!splitFDT_flag && (force->newton_pair == 0) && (comm->me == 0)) error->warning(FLERR,
"Pair dpd/fdt requires newton pair on if not also using fix shardlow");
}
/* ----------------------------------------------------------------------

View File

@ -55,6 +55,8 @@ PairDPDfdtEnergy::PairDPDfdtEnergy(LAMMPS *lmp) : Pair(lmp)
PairDPDfdtEnergy::~PairDPDfdtEnergy()
{
if (copymode) return;
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
@ -403,19 +405,18 @@ void PairDPDfdtEnergy::init_style()
if (comm->ghost_velocity == 0)
error->all(FLERR,"Pair dpd/fdt/energy requires ghost atoms store velocity");
// if newton off, forces between atoms ij will be double computed
// using different random numbers
if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR,
"Pair dpd/fdt/energy requires newton pair on");
splitFDT_flag = false;
int irequest = neighbor->request(this,instance_me);
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
if (strncmp(modify->fix[i]->style,"shardlow", 8) == 0){
splitFDT_flag = true;
}
// if newton off, forces between atoms ij will be double computed
// using different random numbers if splitFDT_flag is false
if (!splitFDT_flag && (force->newton_pair == 0) && (comm->me == 0)) error->warning(FLERR,
"Pair dpd/fdt/energy requires newton pair on if not also using fix shardlow");
bool eos_flag = false;
for (int i = 0; i < modify->nfix; i++)
if (strncmp(modify->fix[i]->style,"eos",3) == 0) eos_flag = true;

View File

@ -31,8 +31,8 @@ class PairDPDfdtEnergy : public Pair {
virtual void compute(int, int);
virtual void settings(int, char **);
virtual void coeff(int, char **);
void init_style();
double init_one(int, int);
virtual void init_style();
virtual double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
virtual void write_restart_settings(FILE *);
@ -46,15 +46,15 @@ class PairDPDfdtEnergy : public Pair {
double **sigma,**kappa;
double *duCond,*duMech;
int seed;
class RanMars *random;
protected:
double cut_global;
int seed;
bool splitFDT_flag;
bool a0_is_zero;
void allocate();
virtual void allocate();
};

View File

@ -84,11 +84,15 @@ PairExp6rx::PairExp6rx(LAMMPS *lmp) : Pair(lmp)
PairExp6rx::~PairExp6rx()
{
if (copymode) return;
if (params != NULL) {
for (int i=0; i < nparams; ++i) {
delete[] params[i].name;
delete[] params[i].potential;
}
memory->destroy(params);
}
memory->destroy(mol2param);
if (allocated) {

View File

@ -30,13 +30,21 @@ class PairExp6rx : public Pair {
virtual ~PairExp6rx();
virtual void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
virtual void coeff(int, char **);
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
struct Param {
double epsilon,rm,alpha;
int ispecies;
char *name, *potential; // names of unique molecules and interaction type
char *tablename; // name of interaction table
int potentialType; // enumerated interaction potential type.
};
protected:
enum{LINEAR};
enum{NONE,EXPONENT,POLYNOMIAL};
@ -45,21 +53,14 @@ class PairExp6rx : public Pair {
double **epsilon,**rm,**alpha;
double **rminv,**buck1,**buck2,**offset;
void allocate();
virtual void allocate();
int *mol2param; // mapping from molecule to parameters
int nparams; // # of stored parameter sets
int maxparam; // max # of parameter sets
struct Param {
double epsilon,rm,alpha;
int ispecies;
char *name, *potential; // names of unique molecules and interaction type
char *tablename; // name of interaction table
int potentialType; // enumerated interaction potential type.
};
Param *params; // parameter set for an I-J-K interaction
int nspecies;
void read_file(char *);
virtual void read_file(char *);
void read_file2(char *);
void setup();

View File

@ -85,6 +85,8 @@ PairMultiLucyRX::PairMultiLucyRX(LAMMPS *lmp) : Pair(lmp),
PairMultiLucyRX::~PairMultiLucyRX()
{
if (copymode) return;
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
memory->sfree(tables);

View File

@ -30,17 +30,17 @@ class PairMultiLucyRX : public Pair {
virtual ~PairMultiLucyRX();
virtual void compute(int, int);
void settings(int, char **);
virtual void settings(int, char **);
void coeff(int, char **);
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
int pack_forward_comm(int, int *, double *, int, int *);
void unpack_forward_comm(int, int, double *);
int pack_reverse_comm(int, int, double *);
void unpack_reverse_comm(int, int *, double *);
virtual int pack_forward_comm(int, int *, double *, int, int *);
virtual void unpack_forward_comm(int, int, double *);
virtual int pack_reverse_comm(int, int, double *);
virtual void unpack_reverse_comm(int, int *, double *);
void computeLocalDensity();
double rho_0;
@ -64,7 +64,7 @@ class PairMultiLucyRX : public Pair {
int **tabindex;
void allocate();
virtual void allocate();
void read_table(Table *, char *, char *);
void param_extract(Table *, char *);
void bcast_table(Table *);

View File

@ -33,8 +33,6 @@ using namespace LAMMPS_NS;
enum{NONE,RLINEAR,RSQ,BMP};
#define MAXLINE 1024
#ifdef DBL_EPSILON
#define MY_EPSILON (10.0*DBL_EPSILON)
#else
@ -46,25 +44,19 @@ enum{NONE,RLINEAR,RSQ,BMP};
/* ---------------------------------------------------------------------- */
PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp)
PairTableRX::PairTableRX(LAMMPS *lmp) : PairTable(lmp)
{
ntables = 0;
tables = NULL;
fractionalWeighting = true;
site1 = NULL;
site2 = NULL;
}
/* ---------------------------------------------------------------------- */
PairTableRX::~PairTableRX()
{
for (int m = 0; m < ntables; m++) free_table(&tables[m]);
memory->sfree(tables);
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
memory->destroy(tabindex);
}
delete [] site1;
delete [] site2;
}
/* ---------------------------------------------------------------------- */
@ -252,24 +244,6 @@ void PairTableRX::compute(int eflag, int vflag)
memory->destroy(mixWtSite2);
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairTableRX::allocate()
{
allocated = 1;
const int nt = atom->ntypes + 1;
memory->create(setflag,nt,nt,"pair:setflag");
memory->create(cutsq,nt,nt,"pair:cutsq");
memory->create(tabindex,nt,nt,"pair:tabindex");
memset(&setflag[0][0],0,nt*nt*sizeof(int));
memset(&cutsq[0][0],0,nt*nt*sizeof(double));
memset(&tabindex[0][0],0,nt*nt*sizeof(int));
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
@ -462,602 +436,6 @@ void PairTableRX::coeff(int narg, char **arg)
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairTableRX::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
tabindex[j][i] = tabindex[i][j];
return tables[tabindex[i][j]].cut;
}
/* ----------------------------------------------------------------------
read a table section from a tabulated potential file
only called by proc 0
this function sets these values in Table:
ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits
------------------------------------------------------------------------- */
void PairTableRX::read_table(Table *tb, char *file, char *keyword)
{
char line[MAXLINE];
// open file
FILE *fp = force->open_potential(file);
if (fp == NULL) {
char str[128];
sprintf(str,"Cannot open file %s",file);
error->one(FLERR,str);
}
// loop until section found with matching keyword
while (1) {
if (fgets(line,MAXLINE,fp) == NULL)
error->one(FLERR,"Did not find keyword in table file");
if (strspn(line," \t\n\r") == strlen(line)) continue; // blank line
if (line[0] == '#') continue; // comment
char *word = strtok(line," \t\n\r");
if (strcmp(word,keyword) == 0) break; // matching keyword
fgets(line,MAXLINE,fp); // no match, skip section
param_extract(tb,line);
fgets(line,MAXLINE,fp);
for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp);
}
// read args on 2nd line of section
// allocate table arrays for file values
fgets(line,MAXLINE,fp);
param_extract(tb,line);
memory->create(tb->rfile,tb->ninput,"pair:rfile");
memory->create(tb->efile,tb->ninput,"pair:efile");
memory->create(tb->ffile,tb->ninput,"pair:ffile");
// setup bitmap parameters for table to read in
tb->ntablebits = 0;
int masklo,maskhi,nmask,nshiftbits;
if (tb->rflag == BMP) {
while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++;
if (1 << tb->ntablebits != tb->ninput)
error->one(FLERR,"Bitmapped table is incorrect length in table file");
init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits);
}
// read r,e,f table values from file
// if rflag set, compute r
// if rflag not set, use r from file
int itmp;
double rtmp;
union_int_float_t rsq_lookup;
fgets(line,MAXLINE,fp);
for (int i = 0; i < tb->ninput; i++) {
fgets(line,MAXLINE,fp);
sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]);
if (tb->rflag == RLINEAR)
rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1);
else if (tb->rflag == RSQ) {
rtmp = tb->rlo*tb->rlo +
(tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1);
rtmp = sqrt(rtmp);
} else if (tb->rflag == BMP) {
rsq_lookup.i = i << nshiftbits;
rsq_lookup.i |= masklo;
if (rsq_lookup.f < tb->rlo*tb->rlo) {
rsq_lookup.i = i << nshiftbits;
rsq_lookup.i |= maskhi;
}
rtmp = sqrtf(rsq_lookup.f);
}
tb->rfile[i] = rtmp;
}
// close file
fclose(fp);
}
/* ----------------------------------------------------------------------
broadcast read-in table info from proc 0 to other procs
this function communicates these values in Table:
ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi
------------------------------------------------------------------------- */
void PairTableRX::bcast_table(Table *tb)
{
MPI_Bcast(&tb->ninput,1,MPI_INT,0,world);
int me;
MPI_Comm_rank(world,&me);
if (me > 0) {
memory->create(tb->rfile,tb->ninput,"pair:rfile");
memory->create(tb->efile,tb->ninput,"pair:efile");
memory->create(tb->ffile,tb->ninput,"pair:ffile");
}
MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world);
MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world);
MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world);
MPI_Bcast(&tb->rflag,1,MPI_INT,0,world);
if (tb->rflag) {
MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world);
MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world);
}
MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world);
if (tb->fpflag) {
MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world);
MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world);
}
}
/* ----------------------------------------------------------------------
build spline representation of e,f over entire range of read-in table
this function sets these values in Table: e2file,f2file
------------------------------------------------------------------------- */
void PairTableRX::spline_table(Table *tb)
{
memory->create(tb->e2file,tb->ninput,"pair:e2file");
memory->create(tb->f2file,tb->ninput,"pair:f2file");
double ep0 = - tb->ffile[0];
double epn = - tb->ffile[tb->ninput-1];
spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file);
if (tb->fpflag == 0) {
tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]);
tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) /
(tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]);
}
double fp0 = tb->fplo;
double fpn = tb->fphi;
spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file);
}
/* ----------------------------------------------------------------------
extract attributes from parameter line in table section
format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi
N is required, other params are optional
------------------------------------------------------------------------- */
void PairTableRX::param_extract(Table *tb, char *line)
{
tb->ninput = 0;
tb->rflag = NONE;
tb->fpflag = 0;
char *word = strtok(line," \t\n\r\f");
while (word) {
if (strcmp(word,"N") == 0) {
word = strtok(NULL," \t\n\r\f");
tb->ninput = atoi(word);
} else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 ||
strcmp(word,"BITMAP") == 0) {
if (strcmp(word,"R") == 0) tb->rflag = RLINEAR;
else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ;
else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP;
word = strtok(NULL," \t\n\r\f");
tb->rlo = atof(word);
word = strtok(NULL," \t\n\r\f");
tb->rhi = atof(word);
} else if (strcmp(word,"FP") == 0) {
tb->fpflag = 1;
word = strtok(NULL," \t\n\r\f");
tb->fplo = atof(word);
word = strtok(NULL," \t\n\r\f");
tb->fphi = atof(word);
} else {
printf("WORD: %s\n",word);
error->one(FLERR,"Invalid keyword in pair table parameters");
}
word = strtok(NULL," \t\n\r\f");
}
if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N");
}
/* ----------------------------------------------------------------------
compute r,e,f vectors from splined values
------------------------------------------------------------------------- */
void PairTableRX::compute_table(Table *tb)
{
int tlm1 = tablength-1;
// inner = inner table bound
// cut = outer table bound
// delta = table spacing in rsq for N-1 bins
double inner;
if (tb->rflag) inner = tb->rlo;
else inner = tb->rfile[0];
tb->innersq = double(inner)*double(inner);
tb->delta = double(tb->cut*tb->cut - double(tb->innersq)) / double(tlm1);
tb->invdelta = 1.0/double(tb->delta);
// direct lookup tables
// N-1 evenly spaced bins in rsq from inner to cut
// e,f = value at midpt of bin
// e,f are N-1 in length since store 1 value at bin midpt
// f is converted to f/r when stored in f[i]
// e,f are never a match to read-in values, always computed via spline interp
if (tabstyle == LOOKUP) {
memory->create(tb->e,tlm1,"pair:e");
memory->create(tb->f,tlm1,"pair:f");
double r,rsq;
for (int i = 0; i < tlm1; i++) {
rsq = tb->innersq + (i+0.5)*tb->delta;
r = sqrt(rsq);
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
}
}
// linear tables
// N-1 evenly spaced bins in rsq from inner to cut
// rsq,e,f = value at lower edge of bin
// de,df values = delta from lower edge to upper edge of bin
// rsq,e,f are N in length so de,df arrays can compute difference
// f is converted to f/r when stored in f[i]
// e,f can match read-in values, else compute via spline interp
if (tabstyle == LINEAR) {
memory->create(tb->rsq,tablength,"pair:rsq");
memory->create(tb->e,tablength,"pair:e");
memory->create(tb->f,tablength,"pair:f");
memory->create(tb->de,tlm1,"pair:de");
memory->create(tb->df,tlm1,"pair:df");
double r,rsq;
for (int i = 0; i < tablength; i++) {
rsq = tb->innersq + i*tb->delta;
r = sqrt(rsq);
tb->rsq[i] = rsq;
if (tb->match) {
tb->e[i] = tb->efile[i];
tb->f[i] = tb->ffile[i]/r;
} else {
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
}
}
for (int i = 0; i < tlm1; i++) {
tb->de[i] = tb->e[i+1] - tb->e[i];
tb->df[i] = tb->f[i+1] - tb->f[i];
}
}
// cubic spline tables
// N-1 evenly spaced bins in rsq from inner to cut
// rsq,e,f = value at lower edge of bin
// e2,f2 = spline coefficient for each bin
// rsq,e,f,e2,f2 are N in length so have N-1 spline bins
// f is converted to f/r after e is splined
// e,f can match read-in values, else compute via spline interp
if (tabstyle == SPLINE) {
memory->create(tb->rsq,tablength,"pair:rsq");
memory->create(tb->e,tablength,"pair:e");
memory->create(tb->f,tablength,"pair:f");
memory->create(tb->e2,tablength,"pair:e2");
memory->create(tb->f2,tablength,"pair:f2");
tb->deltasq6 = tb->delta*tb->delta / 6.0;
double r,rsq;
for (int i = 0; i < tablength; i++) {
rsq = tb->innersq + i*tb->delta;
r = sqrt(rsq);
tb->rsq[i] = rsq;
if (tb->match) {
tb->e[i] = tb->efile[i];
tb->f[i] = tb->ffile[i]/r;
} else {
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r);
}
}
// ep0,epn = dh/dg at inner and at cut
// h(r) = e(r) and g(r) = r^2
// dh/dg = (de/dr) / 2r = -f/2r
double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq));
double epn = - tb->f[tlm1] / (2.0 * tb->cut);
spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2);
// fp0,fpn = dh/dg at inner and at cut
// h(r) = f(r)/r and g(r) = r^2
// dh/dg = (1/r df/dr - f/r^2) / 2r
// dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1))
double fp0,fpn;
double secant_factor = 0.1;
if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) /
(2.0 * sqrt(tb->innersq));
else {
double rsq1 = tb->innersq;
double rsq2 = rsq1 + secant_factor*tb->delta;
fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) /
sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta);
}
if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn =
(tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut);
else {
double rsq2 = tb->cut * tb->cut;
double rsq1 = rsq2 - secant_factor*tb->delta;
fpn = (tb->f[tlm1] / sqrt(rsq2) -
splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) /
sqrt(rsq1)) / (secant_factor*tb->delta);
}
for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]);
spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2);
}
// bitmapped linear tables
// 2^N bins from inner to cut, spaced in bitmapped manner
// f is converted to f/r when stored in f[i]
// e,f can match read-in values, else compute via spline interp
if (tabstyle == BITMAP) {
double r;
union_int_float_t rsq_lookup;
int masklo,maskhi;
// linear lookup tables of length ntable = 2^n
// stored value = value at lower edge of bin
init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits);
int ntable = 1 << tablength;
int ntablem1 = ntable - 1;
memory->create(tb->rsq,ntable,"pair:rsq");
memory->create(tb->e,ntable,"pair:e");
memory->create(tb->f,ntable,"pair:f");
memory->create(tb->de,ntable,"pair:de");
memory->create(tb->df,ntable,"pair:df");
memory->create(tb->drsq,ntable,"pair:drsq");
union_int_float_t minrsq_lookup;
minrsq_lookup.i = 0 << tb->nshiftbits;
minrsq_lookup.i |= maskhi;
for (int i = 0; i < ntable; i++) {
rsq_lookup.i = i << tb->nshiftbits;
rsq_lookup.i |= masklo;
if (rsq_lookup.f < tb->innersq) {
rsq_lookup.i = i << tb->nshiftbits;
rsq_lookup.i |= maskhi;
}
r = sqrtf(rsq_lookup.f);
tb->rsq[i] = rsq_lookup.f;
if (tb->match) {
tb->e[i] = tb->efile[i];
tb->f[i] = tb->ffile[i]/r;
} else {
tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
}
minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f);
}
tb->innersq = minrsq_lookup.f;
for (int i = 0; i < ntablem1; i++) {
tb->de[i] = tb->e[i+1] - tb->e[i];
tb->df[i] = tb->f[i+1] - tb->f[i];
tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]);
}
// get the delta values for the last table entries
// tables are connected periodically between 0 and ntablem1
tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1];
tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1];
tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]);
// get the correct delta values at itablemax
// smallest r is in bin itablemin
// largest r is in bin itablemax, which is itablemin-1,
// or ntablem1 if itablemin=0
// deltas at itablemax only needed if corresponding rsq < cut*cut
// if so, compute deltas between rsq and cut*cut
// if tb->match, data at cut*cut is unavailable, so we'll take
// deltas at itablemax-1 as a good approximation
double e_tmp,f_tmp;
int itablemin = minrsq_lookup.i & tb->nmask;
itablemin >>= tb->nshiftbits;
int itablemax = itablemin - 1;
if (itablemin == 0) itablemax = ntablem1;
int itablemaxm1 = itablemax - 1;
if (itablemax == 0) itablemaxm1 = ntablem1;
rsq_lookup.i = itablemax << tb->nshiftbits;
rsq_lookup.i |= maskhi;
if (rsq_lookup.f < tb->cut*tb->cut) {
if (tb->match) {
tb->de[itablemax] = tb->de[itablemaxm1];
tb->df[itablemax] = tb->df[itablemaxm1];
tb->drsq[itablemax] = tb->drsq[itablemaxm1];
} else {
rsq_lookup.f = tb->cut*tb->cut;
r = sqrtf(rsq_lookup.f);
e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
tb->de[itablemax] = e_tmp - tb->e[itablemax];
tb->df[itablemax] = f_tmp - tb->f[itablemax];
tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]);
}
}
}
}
/* ----------------------------------------------------------------------
set all ptrs in a table to NULL, so can be freed safely
------------------------------------------------------------------------- */
void PairTableRX::null_table(Table *tb)
{
tb->rfile = tb->efile = tb->ffile = NULL;
tb->e2file = tb->f2file = NULL;
tb->rsq = tb->drsq = tb->e = tb->de = NULL;
tb->f = tb->df = tb->e2 = tb->f2 = NULL;
}
/* ----------------------------------------------------------------------
free all arrays in a table
------------------------------------------------------------------------- */
void PairTableRX::free_table(Table *tb)
{
memory->destroy(tb->rfile);
memory->destroy(tb->efile);
memory->destroy(tb->ffile);
memory->destroy(tb->e2file);
memory->destroy(tb->f2file);
memory->destroy(tb->rsq);
memory->destroy(tb->drsq);
memory->destroy(tb->e);
memory->destroy(tb->de);
memory->destroy(tb->f);
memory->destroy(tb->df);
memory->destroy(tb->e2);
memory->destroy(tb->f2);
}
/* ----------------------------------------------------------------------
spline and splint routines modified from Numerical Recipes
------------------------------------------------------------------------- */
void PairTableRX::spline(double *x, double *y, int n,
double yp1, double ypn, double *y2)
{
int i,k;
double p,qn,sig,un;
double *u = new double[n];
if (yp1 > 0.99e30) y2[0] = u[0] = 0.0;
else {
y2[0] = -0.5;
u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1);
}
for (i = 1; i < n-1; i++) {
sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]);
p = sig*y2[i-1] + 2.0;
y2[i] = (sig-1.0) / p;
u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]);
u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p;
}
if (ypn > 0.99e30) qn = un = 0.0;
else {
qn = 0.5;
un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2]));
}
y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0);
for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k];
delete [] u;
}
/* ---------------------------------------------------------------------- */
double PairTableRX::splint(double *xa, double *ya, double *y2a, int n, double x)
{
int klo,khi,k;
double h,b,a,y;
klo = 0;
khi = n-1;
while (khi-klo > 1) {
k = (khi+klo) >> 1;
if (xa[k] > x) khi = k;
else klo = k;
}
h = xa[khi]-xa[klo];
a = (xa[khi]-x) / h;
b = (x-xa[klo]) / h;
y = a*ya[klo] + b*ya[khi] +
((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0;
return y;
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairTableRX::write_restart(FILE *fp)
{
write_restart_settings(fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairTableRX::read_restart(FILE *fp)
{
read_restart_settings(fp);
allocate();
}
/* ----------------------------------------------------------------------
proc 0 writes to restart file
------------------------------------------------------------------------- */
void PairTableRX::write_restart_settings(FILE *fp)
{
fwrite(&tabstyle,sizeof(int),1,fp);
fwrite(&tablength,sizeof(int),1,fp);
fwrite(&ewaldflag,sizeof(int),1,fp);
fwrite(&pppmflag,sizeof(int),1,fp);
fwrite(&msmflag,sizeof(int),1,fp);
fwrite(&dispersionflag,sizeof(int),1,fp);
fwrite(&tip4pflag,sizeof(int),1,fp);
}
/* ----------------------------------------------------------------------
proc 0 reads from restart file, bcasts
------------------------------------------------------------------------- */
void PairTableRX::read_restart_settings(FILE *fp)
{
if (comm->me == 0) {
fread(&tabstyle,sizeof(int),1,fp);
fread(&tablength,sizeof(int),1,fp);
fread(&ewaldflag,sizeof(int),1,fp);
fread(&pppmflag,sizeof(int),1,fp);
fread(&msmflag,sizeof(int),1,fp);
fread(&dispersionflag,sizeof(int),1,fp);
fread(&tip4pflag,sizeof(int),1,fp);
}
MPI_Bcast(&tabstyle,1,MPI_INT,0,world);
MPI_Bcast(&tablength,1,MPI_INT,0,world);
MPI_Bcast(&ewaldflag,1,MPI_INT,0,world);
MPI_Bcast(&pppmflag,1,MPI_INT,0,world);
MPI_Bcast(&msmflag,1,MPI_INT,0,world);
MPI_Bcast(&dispersionflag,1,MPI_INT,0,world);
MPI_Bcast(&tip4pflag,1,MPI_INT,0,world);
}
/* ---------------------------------------------------------------------- */
double PairTableRX::single(int i, int j, int itype, int jtype, double rsq,
@ -1129,26 +507,6 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq,
return factor_lj*phi;
}
/* ----------------------------------------------------------------------
return the Coulomb cutoff for tabled potentials
called by KSpace solvers which require that all pairwise cutoffs be the same
loop over all tables not just those indexed by tabindex[i][j] since
no way to know which tables are active since pair::init() not yet called
------------------------------------------------------------------------- */
void *PairTableRX::extract(const char *str, int &dim)
{
if (strcmp(str,"cut_coul") != 0) return NULL;
if (ntables == 0) error->all(FLERR,"All pair coeffs are not set");
double cut_coul = tables[0].cut;
for (int m = 1; m < ntables; m++)
if (tables[m].cut != cut_coul)
error->all(FLERR,"Pair table cutoffs must all be equal to use with KSpace");
dim = 0;
return &tables[0].cut;
}
/* ---------------------------------------------------------------------- */
void PairTableRX::getMixingWeights(int id, double &mixWtSite1old, double &mixWtSite2old, double &mixWtSite1, double &mixWtSite2)

View File

@ -20,11 +20,11 @@ PairStyle(table/rx,PairTableRX)
#ifndef LMP_PAIR_TABLE_RX_H
#define LMP_PAIR_TABLE_RX_H
#include "pair.h"
#include "pair_table.h"
namespace LAMMPS_NS {
class PairTableRX : public Pair {
class PairTableRX : public PairTable {
public:
PairTableRX(class LAMMPS *);
virtual ~PairTableRX();
@ -32,42 +32,9 @@ class PairTableRX : public Pair {
virtual void compute(int, int);
void settings(int, char **);
void coeff(int, char **);
double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
void write_restart_settings(FILE *);
void read_restart_settings(FILE *);
double single(int, int, int, int, double, double, double, double &);
void *extract(const char *, int &);
virtual double single(int, int, int, int, double, double, double, double &);
protected:
enum{LOOKUP,LINEAR,SPLINE,BITMAP};
int tabstyle,tablength;
struct Table {
int ninput,rflag,fpflag,match,ntablebits;
int nshiftbits,nmask;
double rlo,rhi,fplo,fphi,cut;
double *rfile,*efile,*ffile;
double *e2file,*f2file;
double innersq,delta,invdelta,deltasq6;
double *rsq,*drsq,*e,*de,*f,*df,*e2,*f2;
};
int ntables;
Table *tables;
int **tabindex;
void allocate();
void read_table(Table *, char *, char *);
void param_extract(Table *, char *);
void bcast_table(Table *);
void spline_table(Table *);
void compute_table(Table *);
void null_table(Table *);
void free_table(Table *);
void spline(double *, double *, int, double, double, double *);
double splint(double *, double *, double *, int, double);
int nspecies;
char *site1, *site2;

View File

@ -103,7 +103,6 @@ Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
uCond = uMech = uChem = uCG = uCGnew = NULL;
duChem = NULL;
dpdTheta = NULL;
ssaAIR = NULL;
// USER-MESO
@ -305,7 +304,6 @@ Atom::~Atom()
memory->destroy(uCG);
memory->destroy(uCGnew);
memory->destroy(duChem);
memory->destroy(ssaAIR);
memory->destroy(cc);
memory->destroy(cc_flux);
@ -346,10 +344,12 @@ Atom::~Atom()
delete [] iname[i];
memory->destroy(ivector[i]);
}
if (dvector != NULL) {
for (int i = 0; i < ndvector; i++) {
delete [] dname[i];
memory->destroy(dvector[i]);
}
}
memory->sfree(iname);
memory->sfree(dname);

View File

@ -93,7 +93,6 @@ class Atom : protected Pointers {
double *duChem;
double *dpdTheta;
int nspecies_dpd;
int *ssaAIR; // Shardlow Splitting Algorithm Active Interaction Region number
// USER-MESO package
@ -262,8 +261,8 @@ class Atom : protected Pointers {
void update_callback(int);
int find_custom(const char *, int &);
int add_custom(const char *, int);
void remove_custom(int, int);
virtual int add_custom(const char *, int);
virtual void remove_custom(int, int);
virtual void sync_modify(ExecutionSpace, unsigned int, unsigned int) {}

View File

@ -42,6 +42,18 @@
#define ENERGY_MASK 0x00010000
#define VIRIAL_MASK 0x00020000
// DPD
#define DPDRHO_MASK 0x00040000
#define DPDTHETA_MASK 0x00080000
#define UCOND_MASK 0x00100000
#define UMECH_MASK 0x00200000
#define UCHEM_MASK 0x00400000
#define UCG_MASK 0x00800000
#define UCGNEW_MASK 0x01000000
#define DUCHEM_MASK 0x02000000
#define DVECTOR_MASK 0x04000000
// granular
#define RADIUS_MASK 0x00100000

View File

@ -134,7 +134,6 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
// register with Atom class
nmax_old = 0;
grow_arrays(atom->nmax);
atom->add_callback(0);
atom->add_callback(1);
if (border) atom->add_callback(2);
@ -190,6 +189,8 @@ int FixPropertyAtom::setmask()
void FixPropertyAtom::init()
{
grow_arrays(atom->nmax);
// error if atom style has changed since fix was defined
// don't allow this b/c user could change to style that defines molecule,q

View File

@ -27,7 +27,7 @@ namespace LAMMPS_NS {
class FixPropertyAtom : public Fix {
public:
FixPropertyAtom(class LAMMPS *, int, char **);
~FixPropertyAtom();
virtual ~FixPropertyAtom();
int setmask();
void init();
@ -38,7 +38,7 @@ class FixPropertyAtom : public Fix {
void write_data_section_keyword(int, FILE *);
void write_data_section(int, FILE *, int, double **, int);
void grow_arrays(int);
virtual void grow_arrays(int);
void copy_arrays(int, int, int);
int pack_border(int, int *, double *);
int unpack_border(int, int, double *);
@ -50,7 +50,7 @@ class FixPropertyAtom : public Fix {
int maxsize_restart();
double memory_usage();
private:
protected:
int nvalue,border;
int molecule_flag,q_flag,rmass_flag;
int *style,*index;

View File

@ -201,6 +201,8 @@ FixWall::FixWall(LAMMPS *lmp, int narg, char **arg) :
FixWall::~FixWall()
{
if (copymode) return;
for (int m = 0; m < nwall; m++) {
delete [] xstr[m];
delete [] estr[m];

View File

@ -28,9 +28,9 @@ class FixWallLJ93 : public FixWall {
public:
FixWallLJ93(class LAMMPS *, int, char **);
void precompute(int);
void wall_particle(int, int, double);
virtual void wall_particle(int, int, double);
private:
protected:
double coeff1[6],coeff2[6],coeff3[6],coeff4[6],offset[6];
};

View File

@ -79,7 +79,8 @@ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp)
// USER-DPD package
ndxAIR_ssa = NULL;
for (int i = 0; i < 8; i++) AIRct_ssa[i] = 0;
np = NULL;
}
/* ---------------------------------------------------------------------- */
@ -99,10 +100,6 @@ NeighList::~NeighList()
delete [] iskip;
memory->destroy(ijskip);
if (ssa) {
memory->sfree(ndxAIR_ssa);
}
}
/* ----------------------------------------------------------------------
@ -203,14 +200,16 @@ void NeighList::grow(int nlocal, int nall)
if (listmiddle) listmiddle->grow(nlocal,nall);
// skip if data structs are already big enough
if (ghost) {
if (ssa) {
if ((nlocal * 3) + nall <= maxatom) return;
} else if (ghost) {
if (nall <= maxatom) return;
} else {
if (nlocal <= maxatom) return;
}
maxatom = atom->nmax;
if (ssa) maxatom = (nlocal * 3) + nall;
else maxatom = atom->nmax;
memory->destroy(ilist);
memory->destroy(numneigh);
@ -224,12 +223,6 @@ void NeighList::grow(int nlocal, int nall)
firstdouble = (double **) memory->smalloc(maxatom*sizeof(double *),
"neighlist:firstdouble");
}
if (ssa) {
if (ndxAIR_ssa) memory->sfree(ndxAIR_ssa);
ndxAIR_ssa = (uint16_t (*)[8]) memory->smalloc(sizeof(uint16_t)*8*maxatom,
"neighlist:ndxAIR_ssa");
}
}
/* ----------------------------------------------------------------------
@ -306,7 +299,5 @@ bigint NeighList::memory_usage()
}
}
if (ndxAIR_ssa) bytes += sizeof(uint16_t) * 8 * maxatom;
return bytes;
}

Some files were not shown because too many files have changed in this diff Show More